Merge pull request #639 from zhipuch/main

adapt transformers==4.46
This commit is contained in:
Yuxuan.Zhang 2024-11-10 20:44:06 +08:00 committed by GitHub
commit 1e0fa42dd8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 7 additions and 10 deletions

View File

@ -47,3 +47,4 @@ peft_config:
lora_alpha: 32 lora_alpha: 32
lora_dropout: 0.1 lora_dropout: 0.1
target_modules: ["query_key_value"] target_modules: ["query_key_value"]
#target_modules: ["q_proj", "k_proj", "v_proj"] if model is glm-4-9b-chat-hf

View File

@ -56,14 +56,14 @@ class DataCollatorForSeq2Seq(_DataCollatorForSeq2Seq):
class Seq2SeqTrainer(_Seq2SeqTrainer): class Seq2SeqTrainer(_Seq2SeqTrainer):
# Not Support for apex # Not Support for apex. transformers>=4.46 require additional args: num_items_in_batch
def training_step(self, model: nn.Module, inputs: dict[str, Any]) -> torch.Tensor: def training_step(self, model: nn.Module, inputs: dict[str, Any], num_items_in_batch=None) -> torch.Tensor:
model.train() model.train()
inputs = self._prepare_inputs(inputs) inputs = self._prepare_inputs(inputs)
with self.compute_loss_context_manager(): with self.compute_loss_context_manager():
loss = self.compute_loss(model, inputs) loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch)
if self.args.n_gpu > 1: if self.args.n_gpu > 1:
loss = loss.mean() loss = loss.mean()
@ -353,7 +353,6 @@ def load_tokenizer_and_model(
model = AutoModelForCausalLM.from_pretrained( model = AutoModelForCausalLM.from_pretrained(
model_dir, model_dir,
trust_remote_code=True, trust_remote_code=True,
empty_init=False,
use_cache=False, use_cache=False,
torch_dtype=torch.bfloat16 # Must use BFloat 16 torch_dtype=torch.bfloat16 # Must use BFloat 16
) )
@ -363,7 +362,6 @@ def load_tokenizer_and_model(
model = AutoModelForCausalLM.from_pretrained( model = AutoModelForCausalLM.from_pretrained(
model_dir, model_dir,
trust_remote_code=True, trust_remote_code=True,
empty_init=False,
use_cache=False, use_cache=False,
torch_dtype=torch.bfloat16 torch_dtype=torch.bfloat16
) )

View File

@ -57,14 +57,14 @@ class DataCollatorForSeq2Seq(_DataCollatorForSeq2Seq):
class Seq2SeqTrainer(_Seq2SeqTrainer): class Seq2SeqTrainer(_Seq2SeqTrainer):
# Not Support for apex # Not Support for apex. transformers>=4.46 require additional args: num_items_in_batch
def training_step(self, model: nn.Module, inputs: dict[str, Any]) -> torch.Tensor: def training_step(self, model: nn.Module, inputs: dict[str, Any], num_items_in_batch=None) -> torch.Tensor:
model.train() model.train()
inputs = self._prepare_inputs(inputs) inputs = self._prepare_inputs(inputs)
with self.compute_loss_context_manager(): with self.compute_loss_context_manager():
loss = self.compute_loss(model, inputs) loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch)
if self.args.n_gpu > 1: if self.args.n_gpu > 1:
loss = loss.mean() loss = loss.mean()
@ -399,7 +399,6 @@ def load_tokenizer_and_model(
model = AutoModelForCausalLM.from_pretrained( model = AutoModelForCausalLM.from_pretrained(
model_dir, model_dir,
trust_remote_code=True, trust_remote_code=True,
empty_init=False,
use_cache=False, use_cache=False,
torch_dtype=torch.bfloat16 # Must use BFloat 16 torch_dtype=torch.bfloat16 # Must use BFloat 16
) )
@ -409,7 +408,6 @@ def load_tokenizer_and_model(
model = AutoModelForCausalLM.from_pretrained( model = AutoModelForCausalLM.from_pretrained(
model_dir, model_dir,
trust_remote_code=True, trust_remote_code=True,
empty_init=False,
use_cache=False, use_cache=False,
torch_dtype=torch.bfloat16 torch_dtype=torch.bfloat16
) )