依赖更新
This commit is contained in:
parent
e7a1124bf9
commit
4ab7a1efd1
|
@ -10,8 +10,8 @@
|
||||||
Read this in [English](README_en.md)
|
Read this in [English](README_en.md)
|
||||||
|
|
||||||
## 项目更新
|
## 项目更新
|
||||||
|
- 🔥🔥 **News**: ``2024/7/16``: GLM-4-9B-Chat 模型依赖的` transformers`版本升级到 `4.42.4`, 请更新模型配置文件并参考 `basic_demo/requirements.txt` 更新依赖。
|
||||||
- 🔥🔥 **News**: ``2024/7/9``: GLM-4-9B-Chat
|
- 🔥 **News**: ``2024/7/9``: GLM-4-9B-Chat
|
||||||
模型已适配 [Ollama](https://github.com/ollama/ollama),[Llama.cpp](https://github.com/ggerganov/llama.cpp),您可以在[PR](https://github.com/ggerganov/llama.cpp/pull/8031) 查看具体的细节。
|
模型已适配 [Ollama](https://github.com/ollama/ollama),[Llama.cpp](https://github.com/ggerganov/llama.cpp),您可以在[PR](https://github.com/ggerganov/llama.cpp/pull/8031) 查看具体的细节。
|
||||||
- 🔥 **News**: ``2024/7/1``: 我们更新了 GLM-4V-9B 的微调,您需要更新我们的模型仓库的运行文件和配置文件,
|
- 🔥 **News**: ``2024/7/1``: 我们更新了 GLM-4V-9B 的微调,您需要更新我们的模型仓库的运行文件和配置文件,
|
||||||
以支持这个功能,更多微调细节 (例如数据集格式,显存要求),请前往 [查看](finetune_demo)。
|
以支持这个功能,更多微调细节 (例如数据集格式,显存要求),请前往 [查看](finetune_demo)。
|
||||||
|
|
|
@ -8,9 +8,10 @@
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
## Update
|
## Update
|
||||||
|
- 🔥🔥 **News**: ``2024/7/16``: The ` transformers` version that the GLM-4-9B-Chat model depends on has been upgraded
|
||||||
- 🔥🔥 **News**: ``2024/7/9``: The GLM-4-9B-Chat model has been adapted to [Ollama](https://github.com/ollama/ollama)
|
to `4.42.4`. Please update the model configuration file and refer to `basic_demo/requirements.txt` to update the dependencies.
|
||||||
and [Llama.cpp](https://github.com/ggerganov/llama.cpp), you can check the specific details
|
- 🔥 **News**: ``2024/7/9``: The GLM-4-9B-Chat model has been adapted to [Ollama](https://github.com/ollama/ollama)
|
||||||
|
and [Llama.cpp](https://github.com/ggerganov/llama.cpp), you can check the specific details
|
||||||
in [PR](https://github.com/ggerganov/llama.cpp/pull/8031).
|
in [PR](https://github.com/ggerganov/llama.cpp/pull/8031).
|
||||||
- 🔥 **News**: ``2024/7/1``: We have updated the multimodal fine-tuning of GLM-4V-9B. You need to update the run file and
|
- 🔥 **News**: ``2024/7/1``: We have updated the multimodal fine-tuning of GLM-4V-9B. You need to update the run file and
|
||||||
configuration file of our model repository to support this feature. For more fine-tuning details (such as dataset
|
configuration file of our model repository to support this feature. For more fine-tuning details (such as dataset
|
||||||
|
|
|
@ -1,27 +1,20 @@
|
||||||
# use vllm
|
|
||||||
# vllm>=0.5.0
|
|
||||||
|
|
||||||
torch>=2.3.0
|
torch>=2.3.0
|
||||||
torchvision>=0.18.0
|
torchvision>=0.18.0
|
||||||
transformers==4.40.0
|
transformers==4.42.4
|
||||||
huggingface-hub>=0.23.1
|
huggingface-hub>=0.23.1
|
||||||
sentencepiece>=0.2.0
|
sentencepiece>=0.2.0
|
||||||
pydantic>=2.7.1
|
pydantic>=2.8.2
|
||||||
timm>=0.9.16
|
timm>=1.0.7
|
||||||
tiktoken>=0.7.0
|
tiktoken>=0.7.0
|
||||||
accelerate>=0.30.1
|
accelerate>=0.32.1
|
||||||
sentence_transformers>=2.7.0
|
sentence_transformers>=3.0.1
|
||||||
|
gradio>=4.38.1 # web demo
|
||||||
# web demo
|
openai>=1.35.0 # openai demo
|
||||||
gradio>=4.33.0
|
einops>=0.8.0
|
||||||
|
sse-starlette>=2.1.2
|
||||||
# openai demo
|
bitsandbytes>=0.43.1 # INT4 Loading
|
||||||
openai>=1.34.0
|
|
||||||
einops>=0.7.0
|
|
||||||
sse-starlette>=2.1.0
|
|
||||||
|
|
||||||
# INT4
|
|
||||||
bitsandbytes>=0.43.1
|
|
||||||
|
|
||||||
|
# vllm>=0.5.2
|
||||||
|
# flash-attn>=2.5.9 # using with flash-attention 2
|
||||||
# PEFT model, not need if you don't use PEFT finetune model.
|
# PEFT model, not need if you don't use PEFT finetune model.
|
||||||
# peft>=0.11.0
|
# peft>=0.11.1
|
|
@ -15,7 +15,7 @@ from transformers import AutoTokenizer
|
||||||
from vllm import SamplingParams, AsyncEngineArgs, AsyncLLMEngine
|
from vllm import SamplingParams, AsyncEngineArgs, AsyncLLMEngine
|
||||||
from typing import List, Dict
|
from typing import List, Dict
|
||||||
|
|
||||||
MODEL_PATH = 'THUDM/glm-4-9b'
|
MODEL_PATH = 'THUDM/glm-4-9b-chat'
|
||||||
|
|
||||||
|
|
||||||
def load_model_and_tokenizer(model_dir: str):
|
def load_model_and_tokenizer(model_dir: str):
|
||||||
|
@ -25,7 +25,7 @@ def load_model_and_tokenizer(model_dir: str):
|
||||||
tensor_parallel_size=1,
|
tensor_parallel_size=1,
|
||||||
dtype="bfloat16",
|
dtype="bfloat16",
|
||||||
trust_remote_code=True,
|
trust_remote_code=True,
|
||||||
gpu_memory_utilization=0.3,
|
gpu_memory_utilization=0.9,
|
||||||
enforce_eager=True,
|
enforce_eager=True,
|
||||||
worker_use_ray=True,
|
worker_use_ray=True,
|
||||||
engine_use_ray=False,
|
engine_use_ray=False,
|
||||||
|
@ -63,7 +63,6 @@ async def vllm_gen(messages: List[Dict[str, str]], top_p: float, temperature: fl
|
||||||
"use_beam_search": False,
|
"use_beam_search": False,
|
||||||
"length_penalty": 1,
|
"length_penalty": 1,
|
||||||
"early_stopping": False,
|
"early_stopping": False,
|
||||||
"stop_token_ids": [151329, 151336, 151338],
|
|
||||||
"ignore_eos": False,
|
"ignore_eos": False,
|
||||||
"max_tokens": max_dec_len,
|
"max_tokens": max_dec_len,
|
||||||
"logprobs": None,
|
"logprobs": None,
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
jieba>=0.42.1
|
jieba>=0.42.1
|
||||||
datasets>2.20.0
|
datasets>=2.20.0
|
||||||
peft>=0.11.1
|
peft>=0.11.1
|
||||||
deepspeed>=0.14.3
|
deepspeed>=0.14.4
|
||||||
nltk==3.8.1
|
nltk==3.8.1
|
||||||
rouge_chinese>=1.0.3
|
rouge_chinese>=1.0.3
|
||||||
ruamel.yaml>=0.18.6
|
ruamel.yaml>=0.18.6
|
Loading…
Reference in New Issue