From 081f0fb96c4cfa6390797c2e214282e1533c9738 Mon Sep 17 00:00:00 2001 From: zR <2448370773@qq.com> Date: Thu, 20 Jun 2024 01:24:00 +0800 Subject: [PATCH] fix for ds and zero3 error --- finetune_demo/README.md | 8 ++++---- finetune_demo/README_en.md | 8 ++++---- finetune_demo/finetune.py | 31 ++++++++++++++----------------- 3 files changed, 22 insertions(+), 25 deletions(-) diff --git a/finetune_demo/README.md b/finetune_demo/README.md index 0fe5fb4..64ef3f6 100644 --- a/finetune_demo/README.md +++ b/finetune_demo/README.md @@ -22,15 +22,15 @@ Read this in [English](README_en.md) | p-tuning v2 (PEFT) | 21381MiB | 121M | | SFT (Zero3 method) | 80935MiB<br/>(Each GPU,需要使用8张GPU) | 20G | -在开始微调之前,请你先安装`basic_demo`中的依赖,同时您需要安装本目录下的依赖项: - -> NOTE: NLTK 3.8.1 部分代码可能尚未对 python 3.12 -> 进行适配,该情况下的适配方法可参考[issues #38](https://github.com/THUDM/GLM-4/issues/38) +在开始微调之前,请你先安装 `basic_demo` 中的依赖,并保证克隆了最新版本的模型仓库,同时您需要安装本目录下的依赖项: ```bash pip install -r requirements.txt ``` +> NOTE: NLTK 3.8.1 部分代码可能尚未对 python 3.12 +> 进行适配,该情况下的适配方法可参考[issues #38](https://github.com/THUDM/GLM-4/issues/38) +> ## 多轮对话格式 多轮对话微调示例采用 GLM-4 对话格式约定,对不同角色添加不同 `loss_mask` 从而在一遍计算中为多轮回复计算 `loss`。 diff --git a/finetune_demo/README_en.md b/finetune_demo/README_en.md index 206953f..8e4f82a 100644 --- a/finetune_demo/README_en.md +++ b/finetune_demo/README_en.md @@ -24,16 +24,16 @@ Test hardware information: | p-tuning v2 (PEFT) | 21381MiB | 121M | | SFT (Zero3 method) | 80935MiB<br/>(Each GPU, 8 GPUs are required) | 20G | -Before starting fine-tuning, please install the dependencies in `basic_demo` first. You also need to install the +Before starting fine-tuning, please install the dependencies in `basic_demo` and clone the latest model repos (Hugging Face) first. You also need to install the dependencies in this directory: -> NOTE: Some codes in NLTK 3.8.1 might not yet be compatible with Python 3.12. For adaptation methods in such cases, -> please refer to [issues #38](https://github.com/THUDM/GLM-4/issues/38). - ```bash pip install -r requirements.txt ``` +> NOTE: Some codes in NLTK 3.8.1 might not yet be compatible with Python 3.12. For adaptation methods in such cases, +> please refer to [issues #38](https://github.com/THUDM/GLM-4/issues/38). + ## Multi-round dialogue format The multi-round dialogue fine-tuning example uses the GLM-4 dialogue format convention, adding different `loss_mask` to diff --git a/finetune_demo/finetune.py b/finetune_demo/finetune.py index e65eed7..6e67bb3 100644 --- a/finetune_demo/finetune.py +++ b/finetune_demo/finetune.py @@ -56,16 +56,22 @@ class DataCollatorForSeq2Seq(_DataCollatorForSeq2Seq): class Seq2SeqTrainer(_Seq2SeqTrainer): + # Not Support for apex def training_step(self, model: nn.Module, inputs: dict[str, Any]) -> torch.Tensor: + model.train() inputs = self._prepare_inputs(inputs) - loss = self.compute_loss(model, inputs) - if self.args.gradient_accumulation_steps > 1: - loss = loss / self.args.gradient_accumulation_steps - loss.backward() + + with self.compute_loss_context_manager(): + loss = self.compute_loss(model, inputs) + + if self.args.n_gpu > 1: + loss = loss.mean() + self.accelerator.backward(loss) + detached_loss = loss.detach() / self.args.gradient_accumulation_steps del inputs torch.cuda.empty_cache() - return loss.detach() + return detached_loss def prediction_step( self, @@ -75,6 +81,7 @@ class Seq2SeqTrainer(_Seq2SeqTrainer): ignore_keys=None, **gen_kwargs, ) -> tuple[Optional[float], Optional[torch.Tensor], Optional[torch.Tensor]]: + with torch.no_grad(): # Ensure no gradient computation if self.args.predict_with_generate: output_ids = inputs.pop('output_ids') @@ -255,12 +262,7 @@ def process_batch( message = process_message(message) loss_mask_val = False if message['role'] in ('system', 'user', 'observation') else True - - # New Code With Using apply_chat_template in jinjia template in tokenizer_config.json - # new_input_ids = tokenizer.apply_chat_template([message], tokenize=True, return_dict=False) - - # Old Code With Using apply_chat_template in tokenization_chatglm.py - new_input_ids = tokenizer.apply_chat_template([message], tokenize=True, return_dict=False)[0][2:] + new_input_ids = tokenizer.apply_chat_template([message], tokenize=True, return_dict=False)[2:] new_loss_masks = [loss_mask_val] * len(new_input_ids) input_ids += new_input_ids loss_masks += new_loss_masks @@ -299,12 +301,7 @@ def process_batch_eval( break else: message = process_message(message) - - # New Code With Using apply_chat_template in jinjia template in tokenizer_config.json - # new_input_ids = tokenizer.apply_chat_template([message], tokenize=True, return_dict=False) - - # Old Code With Using apply_chat_template in tokenization_chatglm.py - new_input_ids = tokenizer.apply_chat_template([message], tokenize=True, return_dict=False)[0][2:] + new_input_ids = tokenizer.apply_chat_template([message], tokenize=True, return_dict=False)[2:] if message['role'] == 'assistant': output_prompt, output_ids = ( new_input_ids[:1],