glm4/finetune_demo/configs/sft.yaml

42 lines
1021 B
YAML

data_config:
train_file: train.jsonl
val_file: dev.jsonl
test_file: dev.jsonl
num_proc: 1
combine: True
freezeV: True
max_input_length: 512
max_output_length: 512
training_args:
# see `transformers.Seq2SeqTrainingArguments`
output_dir: ./output
max_steps: 3000
# needed to be fit for the dataset
learning_rate: 5e-5
# settings for data loading
per_device_train_batch_size: 1
dataloader_num_workers: 16
remove_unused_columns: false
# settings for saving checkpoints
save_strategy: steps
save_steps: 500
# settings for logging
log_level: info
logging_strategy: steps
logging_steps: 10
# settings for evaluation
per_device_eval_batch_size: 16
eval_strategy: steps
eval_steps: 500
# settings for optimizer
# adam_epsilon: 1e-6
# uncomment the following line to detect nan or inf values
# debug: underflow_overflow
predict_with_generate: true
generation_config:
max_new_tokens: 512
# set your absolute deepspeed path here
deepspeed: configs/ds_zero_3.json