From 4ab7a1efd15294b5bd0b2f4978400910f90a6166 Mon Sep 17 00:00:00 2001 From: zR <2448370773@qq.com> Date: Tue, 16 Jul 2024 17:08:50 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BE=9D=E8=B5=96=E6=9B=B4=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 4 ++-- README_en.md | 7 ++++--- basic_demo/requirements.txt | 33 +++++++++++++-------------------- basic_demo/vllm_cli_demo.py | 5 ++--- finetune_demo/requirements.txt | 4 ++-- 5 files changed, 23 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index 6d439ef..ac1293b 100644 --- a/README.md +++ b/README.md @@ -10,8 +10,8 @@ Read this in [English](README_en.md) ## 项目更新 - -- 🔥🔥 **News**: ``2024/7/9``: GLM-4-9B-Chat +- 🔥🔥 **News**: ``2024/7/16``: GLM-4-9B-Chat 模型依赖的` transformers`版本升级到 `4.42.4`, 请更新模型配置文件并参考 `basic_demo/requirements.txt` 更新依赖。 +- 🔥 **News**: ``2024/7/9``: GLM-4-9B-Chat 模型已适配 [Ollama](https://github.com/ollama/ollama),[Llama.cpp](https://github.com/ggerganov/llama.cpp),您可以在[PR](https://github.com/ggerganov/llama.cpp/pull/8031) 查看具体的细节。 - 🔥 **News**: ``2024/7/1``: 我们更新了 GLM-4V-9B 的微调,您需要更新我们的模型仓库的运行文件和配置文件, 以支持这个功能,更多微调细节 (例如数据集格式,显存要求),请前往 [查看](finetune_demo)。 diff --git a/README_en.md b/README_en.md index b9afc85..e094288 100644 --- a/README_en.md +++ b/README_en.md @@ -8,9 +8,10 @@ </p> ## Update - -- 🔥🔥 **News**: ``2024/7/9``: The GLM-4-9B-Chat model has been adapted to [Ollama](https://github.com/ollama/ollama) - and [Llama.cpp](https://github.com/ggerganov/llama.cpp), you can check the specific details +- 🔥🔥 **News**: ``2024/7/16``: The ` transformers` version that the GLM-4-9B-Chat model depends on has been upgraded +to `4.42.4`. Please update the model configuration file and refer to `basic_demo/requirements.txt` to update the dependencies. +- 🔥 **News**: ``2024/7/9``: The GLM-4-9B-Chat model has been adapted to [Ollama](https://github.com/ollama/ollama) + and [Llama.cpp](https://github.com/ggerganov/llama.cpp), you can check the specific details in [PR](https://github.com/ggerganov/llama.cpp/pull/8031). - 🔥 **News**: ``2024/7/1``: We have updated the multimodal fine-tuning of GLM-4V-9B. You need to update the run file and configuration file of our model repository to support this feature. For more fine-tuning details (such as dataset diff --git a/basic_demo/requirements.txt b/basic_demo/requirements.txt index 9ab348f..3878fe2 100644 --- a/basic_demo/requirements.txt +++ b/basic_demo/requirements.txt @@ -1,27 +1,20 @@ -# use vllm -# vllm>=0.5.0 - torch>=2.3.0 torchvision>=0.18.0 -transformers==4.40.0 +transformers==4.42.4 huggingface-hub>=0.23.1 sentencepiece>=0.2.0 -pydantic>=2.7.1 -timm>=0.9.16 +pydantic>=2.8.2 +timm>=1.0.7 tiktoken>=0.7.0 -accelerate>=0.30.1 -sentence_transformers>=2.7.0 - -# web demo -gradio>=4.33.0 - -# openai demo -openai>=1.34.0 -einops>=0.7.0 -sse-starlette>=2.1.0 - -# INT4 -bitsandbytes>=0.43.1 +accelerate>=0.32.1 +sentence_transformers>=3.0.1 +gradio>=4.38.1 # web demo +openai>=1.35.0 # openai demo +einops>=0.8.0 +sse-starlette>=2.1.2 +bitsandbytes>=0.43.1 # INT4 Loading +# vllm>=0.5.2 +# flash-attn>=2.5.9 # using with flash-attention 2 # PEFT model, not need if you don't use PEFT finetune model. -# peft>=0.11.0 \ No newline at end of file +# peft>=0.11.1 \ No newline at end of file diff --git a/basic_demo/vllm_cli_demo.py b/basic_demo/vllm_cli_demo.py index f6acf0f..b5cc0a3 100644 --- a/basic_demo/vllm_cli_demo.py +++ b/basic_demo/vllm_cli_demo.py @@ -15,7 +15,7 @@ from transformers import AutoTokenizer from vllm import SamplingParams, AsyncEngineArgs, AsyncLLMEngine from typing import List, Dict -MODEL_PATH = 'THUDM/glm-4-9b' +MODEL_PATH = 'THUDM/glm-4-9b-chat' def load_model_and_tokenizer(model_dir: str): @@ -25,7 +25,7 @@ def load_model_and_tokenizer(model_dir: str): tensor_parallel_size=1, dtype="bfloat16", trust_remote_code=True, - gpu_memory_utilization=0.3, + gpu_memory_utilization=0.9, enforce_eager=True, worker_use_ray=True, engine_use_ray=False, @@ -63,7 +63,6 @@ async def vllm_gen(messages: List[Dict[str, str]], top_p: float, temperature: fl "use_beam_search": False, "length_penalty": 1, "early_stopping": False, - "stop_token_ids": [151329, 151336, 151338], "ignore_eos": False, "max_tokens": max_dec_len, "logprobs": None, diff --git a/finetune_demo/requirements.txt b/finetune_demo/requirements.txt index e805a5b..4485eec 100644 --- a/finetune_demo/requirements.txt +++ b/finetune_demo/requirements.txt @@ -1,7 +1,7 @@ jieba>=0.42.1 -datasets>2.20.0 +datasets>=2.20.0 peft>=0.11.1 -deepspeed>=0.14.3 +deepspeed>=0.14.4 nltk==3.8.1 rouge_chinese>=1.0.3 ruamel.yaml>=0.18.6 \ No newline at end of file