glm4/finetune_demo/configs/ptuning_v2.yaml

45 lines
1.1 KiB
YAML
Raw Normal View History

2024-06-05 10:22:16 +08:00
data_config:
train_file: train.jsonl
val_file: dev.jsonl
test_file: dev.jsonl
num_proc: 1
max_input_length: 128
max_output_length: 128
training_args:
# see `transformers.Seq2SeqTrainingArguments`
output_dir: ./output
max_steps: 3000
# needed to be fit for the dataset
learning_rate: 5e-4
# settings for data loading
per_device_train_batch_size: 4
dataloader_num_workers: 16
remove_unused_columns: false
# settings for saving checkpoints
save_strategy: steps
save_steps: 500
# settings for logging
log_level: info
logging_strategy: steps
2024-07-08 16:51:25 +08:00
logging_steps: 10
2024-06-05 10:22:16 +08:00
# settings for evaluation
per_device_eval_batch_size: 16
2024-07-17 13:10:31 +08:00
eval_strategy: steps
2024-06-05 10:22:16 +08:00
eval_steps: 500
# settings for optimizer
# adam_epsilon: 1e-6
# uncomment the following line to detect nan or inf values
# debug: underflow_overflow
predict_with_generate: true
# see `transformers.GenerationConfig`
generation_config:
max_new_tokens: 512
# set your absolute deepspeed path here
#deepspeed: ds_zero_3.json
peft_config:
peft_type: PREFIX_TUNING
task_type: CAUSAL_LM
num_virtual_tokens: 512
num_attention_heads: 2
token_dim: 256