依赖更新
This commit is contained in:
parent
e7a1124bf9
commit
4ab7a1efd1
|
@ -10,8 +10,8 @@
|
|||
Read this in [English](README_en.md)
|
||||
|
||||
## 项目更新
|
||||
|
||||
- 🔥🔥 **News**: ``2024/7/9``: GLM-4-9B-Chat
|
||||
- 🔥🔥 **News**: ``2024/7/16``: GLM-4-9B-Chat 模型依赖的` transformers`版本升级到 `4.42.4`, 请更新模型配置文件并参考 `basic_demo/requirements.txt` 更新依赖。
|
||||
- 🔥 **News**: ``2024/7/9``: GLM-4-9B-Chat
|
||||
模型已适配 [Ollama](https://github.com/ollama/ollama),[Llama.cpp](https://github.com/ggerganov/llama.cpp),您可以在[PR](https://github.com/ggerganov/llama.cpp/pull/8031) 查看具体的细节。
|
||||
- 🔥 **News**: ``2024/7/1``: 我们更新了 GLM-4V-9B 的微调,您需要更新我们的模型仓库的运行文件和配置文件,
|
||||
以支持这个功能,更多微调细节 (例如数据集格式,显存要求),请前往 [查看](finetune_demo)。
|
||||
|
|
|
@ -8,9 +8,10 @@
|
|||
</p>
|
||||
|
||||
## Update
|
||||
|
||||
- 🔥🔥 **News**: ``2024/7/9``: The GLM-4-9B-Chat model has been adapted to [Ollama](https://github.com/ollama/ollama)
|
||||
and [Llama.cpp](https://github.com/ggerganov/llama.cpp), you can check the specific details
|
||||
- 🔥🔥 **News**: ``2024/7/16``: The ` transformers` version that the GLM-4-9B-Chat model depends on has been upgraded
|
||||
to `4.42.4`. Please update the model configuration file and refer to `basic_demo/requirements.txt` to update the dependencies.
|
||||
- 🔥 **News**: ``2024/7/9``: The GLM-4-9B-Chat model has been adapted to [Ollama](https://github.com/ollama/ollama)
|
||||
and [Llama.cpp](https://github.com/ggerganov/llama.cpp), you can check the specific details
|
||||
in [PR](https://github.com/ggerganov/llama.cpp/pull/8031).
|
||||
- 🔥 **News**: ``2024/7/1``: We have updated the multimodal fine-tuning of GLM-4V-9B. You need to update the run file and
|
||||
configuration file of our model repository to support this feature. For more fine-tuning details (such as dataset
|
||||
|
|
|
@ -1,27 +1,20 @@
|
|||
# use vllm
|
||||
# vllm>=0.5.0
|
||||
|
||||
torch>=2.3.0
|
||||
torchvision>=0.18.0
|
||||
transformers==4.40.0
|
||||
transformers==4.42.4
|
||||
huggingface-hub>=0.23.1
|
||||
sentencepiece>=0.2.0
|
||||
pydantic>=2.7.1
|
||||
timm>=0.9.16
|
||||
pydantic>=2.8.2
|
||||
timm>=1.0.7
|
||||
tiktoken>=0.7.0
|
||||
accelerate>=0.30.1
|
||||
sentence_transformers>=2.7.0
|
||||
|
||||
# web demo
|
||||
gradio>=4.33.0
|
||||
|
||||
# openai demo
|
||||
openai>=1.34.0
|
||||
einops>=0.7.0
|
||||
sse-starlette>=2.1.0
|
||||
|
||||
# INT4
|
||||
bitsandbytes>=0.43.1
|
||||
accelerate>=0.32.1
|
||||
sentence_transformers>=3.0.1
|
||||
gradio>=4.38.1 # web demo
|
||||
openai>=1.35.0 # openai demo
|
||||
einops>=0.8.0
|
||||
sse-starlette>=2.1.2
|
||||
bitsandbytes>=0.43.1 # INT4 Loading
|
||||
|
||||
# vllm>=0.5.2
|
||||
# flash-attn>=2.5.9 # using with flash-attention 2
|
||||
# PEFT model, not need if you don't use PEFT finetune model.
|
||||
# peft>=0.11.0
|
||||
# peft>=0.11.1
|
|
@ -15,7 +15,7 @@ from transformers import AutoTokenizer
|
|||
from vllm import SamplingParams, AsyncEngineArgs, AsyncLLMEngine
|
||||
from typing import List, Dict
|
||||
|
||||
MODEL_PATH = 'THUDM/glm-4-9b'
|
||||
MODEL_PATH = 'THUDM/glm-4-9b-chat'
|
||||
|
||||
|
||||
def load_model_and_tokenizer(model_dir: str):
|
||||
|
@ -25,7 +25,7 @@ def load_model_and_tokenizer(model_dir: str):
|
|||
tensor_parallel_size=1,
|
||||
dtype="bfloat16",
|
||||
trust_remote_code=True,
|
||||
gpu_memory_utilization=0.3,
|
||||
gpu_memory_utilization=0.9,
|
||||
enforce_eager=True,
|
||||
worker_use_ray=True,
|
||||
engine_use_ray=False,
|
||||
|
@ -63,7 +63,6 @@ async def vllm_gen(messages: List[Dict[str, str]], top_p: float, temperature: fl
|
|||
"use_beam_search": False,
|
||||
"length_penalty": 1,
|
||||
"early_stopping": False,
|
||||
"stop_token_ids": [151329, 151336, 151338],
|
||||
"ignore_eos": False,
|
||||
"max_tokens": max_dec_len,
|
||||
"logprobs": None,
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
jieba>=0.42.1
|
||||
datasets>2.20.0
|
||||
datasets>=2.20.0
|
||||
peft>=0.11.1
|
||||
deepspeed>=0.14.3
|
||||
deepspeed>=0.14.4
|
||||
nltk==3.8.1
|
||||
rouge_chinese>=1.0.3
|
||||
ruamel.yaml>=0.18.6
|
Loading…
Reference in New Issue