diff --git a/finetune_demo/finetune.py b/finetune_demo/finetune.py index 321836d..2552fa0 100644 --- a/finetune_demo/finetune.py +++ b/finetune_demo/finetune.py @@ -348,7 +348,7 @@ def load_tokenizer_and_model( model_dir: str, peft_config: Optional[PeftConfig] = None, ): - tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True) + tokenizer = AutoTokenizer.from_pretrained(model_dir, padding_side='left', trust_remote_code=True) if peft_config is not None: model = AutoModelForCausalLM.from_pretrained( model_dir, @@ -370,6 +370,8 @@ def load_tokenizer_and_model( def compute_metrics(eval_preds: EvalPrediction, tokenizer): batched_pred_ids, batched_label_ids = eval_preds + batched_pred_ids[batched_pred_ids==-100] = tokenizer.pad_token_id + batched_label_ids[batched_label_ids==-100] = tokenizer.pad_token_id metrics_dct = {'rouge-1': [], 'rouge-2': [], 'rouge-l': [], 'bleu-4': []} for pred_ids, label_ids in zip(batched_pred_ids, batched_label_ids): pred_txt = tokenizer.decode(pred_ids).strip() diff --git a/finetune_demo/finetune_vision.py b/finetune_demo/finetune_vision.py index c64cb05..cafe7e6 100644 --- a/finetune_demo/finetune_vision.py +++ b/finetune_demo/finetune_vision.py @@ -364,7 +364,7 @@ def process_batch_eval( if not dialogue_parts or dialogue_parts[-1] != len(input_ids): dialogue_parts.append(len(input_ids)) - # Split the conversation into multiple dialogue segments + # Split the conversation into multiple dialogue segments for end_idx in range(1, len(dialogue_parts)): input_segment = input_ids[:dialogue_parts[end_idx]] attention_segment = attention_mask[:dialogue_parts[end_idx]] @@ -394,7 +394,7 @@ def load_tokenizer_and_model( model_dir: str, peft_config: Optional[PeftConfig] = None, ): - tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True) + tokenizer = AutoTokenizer.from_pretrained(model_dir, padding_side='left', trust_remote_code=True) if peft_config is not None: model = AutoModelForCausalLM.from_pretrained( model_dir, @@ -416,6 +416,8 @@ def load_tokenizer_and_model( def compute_metrics(eval_preds: EvalPrediction, tokenizer): batched_pred_ids, batched_label_ids = eval_preds + batched_pred_ids[batched_pred_ids==-100] = tokenizer.pad_token_id + batched_label_ids[batched_label_ids==-100] = tokenizer.pad_token_id metrics_dct = {'rouge-1': [], 'rouge-2': [], 'rouge-l': [], 'bleu-4': []} for pred_ids, label_ids in zip(batched_pred_ids, batched_label_ids): pred_txt = tokenizer.decode(pred_ids).strip()