adapt transformers>=4.46
This commit is contained in:
parent
af1d4f2f11
commit
a0c568753a
|
@ -56,14 +56,14 @@ class DataCollatorForSeq2Seq(_DataCollatorForSeq2Seq):
|
||||||
|
|
||||||
|
|
||||||
class Seq2SeqTrainer(_Seq2SeqTrainer):
|
class Seq2SeqTrainer(_Seq2SeqTrainer):
|
||||||
# Not Support for apex
|
# Not Support for apex. transformers>=4.46 require additional args: num_items_in_batch
|
||||||
def training_step(self, model: nn.Module, inputs: dict[str, Any]) -> torch.Tensor:
|
def training_step(self, model: nn.Module, inputs: dict[str, Any], num_items_in_batch=None) -> torch.Tensor:
|
||||||
|
|
||||||
model.train()
|
model.train()
|
||||||
inputs = self._prepare_inputs(inputs)
|
inputs = self._prepare_inputs(inputs)
|
||||||
|
|
||||||
with self.compute_loss_context_manager():
|
with self.compute_loss_context_manager():
|
||||||
loss = self.compute_loss(model, inputs)
|
loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch)
|
||||||
|
|
||||||
if self.args.n_gpu > 1:
|
if self.args.n_gpu > 1:
|
||||||
loss = loss.mean()
|
loss = loss.mean()
|
||||||
|
@ -353,7 +353,6 @@ def load_tokenizer_and_model(
|
||||||
model = AutoModelForCausalLM.from_pretrained(
|
model = AutoModelForCausalLM.from_pretrained(
|
||||||
model_dir,
|
model_dir,
|
||||||
trust_remote_code=True,
|
trust_remote_code=True,
|
||||||
empty_init=False,
|
|
||||||
use_cache=False,
|
use_cache=False,
|
||||||
torch_dtype=torch.bfloat16 # Must use BFloat 16
|
torch_dtype=torch.bfloat16 # Must use BFloat 16
|
||||||
)
|
)
|
||||||
|
@ -363,7 +362,6 @@ def load_tokenizer_and_model(
|
||||||
model = AutoModelForCausalLM.from_pretrained(
|
model = AutoModelForCausalLM.from_pretrained(
|
||||||
model_dir,
|
model_dir,
|
||||||
trust_remote_code=True,
|
trust_remote_code=True,
|
||||||
empty_init=False,
|
|
||||||
use_cache=False,
|
use_cache=False,
|
||||||
torch_dtype=torch.bfloat16
|
torch_dtype=torch.bfloat16
|
||||||
)
|
)
|
||||||
|
|
|
@ -57,14 +57,14 @@ class DataCollatorForSeq2Seq(_DataCollatorForSeq2Seq):
|
||||||
|
|
||||||
|
|
||||||
class Seq2SeqTrainer(_Seq2SeqTrainer):
|
class Seq2SeqTrainer(_Seq2SeqTrainer):
|
||||||
# Not Support for apex
|
# Not Support for apex. transformers>=4.46 require additional args: num_items_in_batch
|
||||||
def training_step(self, model: nn.Module, inputs: dict[str, Any]) -> torch.Tensor:
|
def training_step(self, model: nn.Module, inputs: dict[str, Any], num_items_in_batch=None) -> torch.Tensor:
|
||||||
|
|
||||||
model.train()
|
model.train()
|
||||||
inputs = self._prepare_inputs(inputs)
|
inputs = self._prepare_inputs(inputs)
|
||||||
|
|
||||||
with self.compute_loss_context_manager():
|
with self.compute_loss_context_manager():
|
||||||
loss = self.compute_loss(model, inputs)
|
loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch)
|
||||||
|
|
||||||
if self.args.n_gpu > 1:
|
if self.args.n_gpu > 1:
|
||||||
loss = loss.mean()
|
loss = loss.mean()
|
||||||
|
@ -399,7 +399,6 @@ def load_tokenizer_and_model(
|
||||||
model = AutoModelForCausalLM.from_pretrained(
|
model = AutoModelForCausalLM.from_pretrained(
|
||||||
model_dir,
|
model_dir,
|
||||||
trust_remote_code=True,
|
trust_remote_code=True,
|
||||||
empty_init=False, #if transformers>=4.46 and load glm-4-9b-chat-hf, delete this
|
|
||||||
use_cache=False,
|
use_cache=False,
|
||||||
torch_dtype=torch.bfloat16 # Must use BFloat 16
|
torch_dtype=torch.bfloat16 # Must use BFloat 16
|
||||||
)
|
)
|
||||||
|
@ -409,7 +408,6 @@ def load_tokenizer_and_model(
|
||||||
model = AutoModelForCausalLM.from_pretrained(
|
model = AutoModelForCausalLM.from_pretrained(
|
||||||
model_dir,
|
model_dir,
|
||||||
trust_remote_code=True,
|
trust_remote_code=True,
|
||||||
empty_init=False, #if transformers>=4.46 and load glm-4-9b-chat-hf, delete this
|
|
||||||
use_cache=False,
|
use_cache=False,
|
||||||
torch_dtype=torch.bfloat16
|
torch_dtype=torch.bfloat16
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in New Issue