From a0c568753a3259fc63bbbb2f72381c476bf5551a Mon Sep 17 00:00:00 2001 From: zhipuch Date: Wed, 6 Nov 2024 11:40:09 +0000 Subject: [PATCH] adapt transformers>=4.46 --- finetune_demo/finetune.py | 8 +++----- finetune_demo/finetune_vision.py | 8 +++----- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/finetune_demo/finetune.py b/finetune_demo/finetune.py index 78c21fa..321836d 100644 --- a/finetune_demo/finetune.py +++ b/finetune_demo/finetune.py @@ -56,14 +56,14 @@ class DataCollatorForSeq2Seq(_DataCollatorForSeq2Seq): class Seq2SeqTrainer(_Seq2SeqTrainer): - # Not Support for apex - def training_step(self, model: nn.Module, inputs: dict[str, Any]) -> torch.Tensor: + # Not Support for apex. transformers>=4.46 require additional args: num_items_in_batch + def training_step(self, model: nn.Module, inputs: dict[str, Any], num_items_in_batch=None) -> torch.Tensor: model.train() inputs = self._prepare_inputs(inputs) with self.compute_loss_context_manager(): - loss = self.compute_loss(model, inputs) + loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch) if self.args.n_gpu > 1: loss = loss.mean() @@ -353,7 +353,6 @@ def load_tokenizer_and_model( model = AutoModelForCausalLM.from_pretrained( model_dir, trust_remote_code=True, - empty_init=False, use_cache=False, torch_dtype=torch.bfloat16 # Must use BFloat 16 ) @@ -363,7 +362,6 @@ def load_tokenizer_and_model( model = AutoModelForCausalLM.from_pretrained( model_dir, trust_remote_code=True, - empty_init=False, use_cache=False, torch_dtype=torch.bfloat16 ) diff --git a/finetune_demo/finetune_vision.py b/finetune_demo/finetune_vision.py index cd859ff..c64cb05 100644 --- a/finetune_demo/finetune_vision.py +++ b/finetune_demo/finetune_vision.py @@ -57,14 +57,14 @@ class DataCollatorForSeq2Seq(_DataCollatorForSeq2Seq): class Seq2SeqTrainer(_Seq2SeqTrainer): - # Not Support for apex - def training_step(self, model: nn.Module, inputs: dict[str, Any]) -> torch.Tensor: + # Not Support for apex. transformers>=4.46 require additional args: num_items_in_batch + def training_step(self, model: nn.Module, inputs: dict[str, Any], num_items_in_batch=None) -> torch.Tensor: model.train() inputs = self._prepare_inputs(inputs) with self.compute_loss_context_manager(): - loss = self.compute_loss(model, inputs) + loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch) if self.args.n_gpu > 1: loss = loss.mean() @@ -399,7 +399,6 @@ def load_tokenizer_and_model( model = AutoModelForCausalLM.from_pretrained( model_dir, trust_remote_code=True, - empty_init=False, #if transformers>=4.46 and load glm-4-9b-chat-hf, delete this use_cache=False, torch_dtype=torch.bfloat16 # Must use BFloat 16 ) @@ -409,7 +408,6 @@ def load_tokenizer_and_model( model = AutoModelForCausalLM.from_pretrained( model_dir, trust_remote_code=True, - empty_init=False, #if transformers>=4.46 and load glm-4-9b-chat-hf, delete this use_cache=False, torch_dtype=torch.bfloat16 )