update the req and chatglm_tokenizer.py

This commit is contained in:
zR 2024-10-06 14:09:05 +08:00
parent 23773d94e2
commit 3e7735d4f7
4 changed files with 12 additions and 18 deletions

View File

@ -673,7 +673,6 @@ if __name__ == "__main__":
gpu_memory_utilization=0.9,
enforce_eager=True,
worker_use_ray=False,
engine_use_ray=False,
disable_log_requests=True,
max_model_len=MAX_MODEL_LENGTH,
)

View File

@ -1,23 +1,23 @@
torch>=2.4.0
torchvision>=0.19.0
transformers==4.44.0
huggingface-hub>=0.24.5
transformers>=4.45.0
huggingface-hub>=0.25.1
sentencepiece>=0.2.0
jinja2>=3.1.4
pydantic>=2.8.2
timm>=1.0.8
pydantic>=2.9.2
timm>=1.0.9
tiktoken>=0.7.0
numpy==1.26.4 # Need less than 2.0.0
accelerate>=0.33.0
sentence_transformers>=3.0.1
gradio>=4.42.0 # web demo
openai>=1.43.0 # openai demo
accelerate>=0.34.0
sentence_transformers>=3.1.1
gradio>=4.44.1 # web demo
openai>=1.51.0 # openai demo
einops>=0.8.0
pillow>=10.4.0
sse-starlette>=2.1.3
bitsandbytes>=0.43.3 # INT4 Loading
# vllm==0.5.4 # using with VLLM Framework
# flash-attn>=2.6.1 # using with flash-attention 2
# vllm>=0.6.2 # using with VLLM Framework
# flash-attn>=2.6.3 # using with flash-attention 2
# PEFT model, not need if you don't use PEFT finetune model.
# peft>=0.12.2 # Using with finetune model
# peft>=0.13.0 # Using with finetune model

View File

@ -37,11 +37,7 @@ MODEL_PATH = os.environ.get('MODEL_PATH', 'THUDM/glm-4-9b-chat')
# return model, tokenizer
tokenizer = AutoTokenizer.from_pretrained(
MODEL_PATH,
trust_remote_code=True,
encode_special_tokens=True
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH,trust_remote_code=True)
model = AutoModel.from_pretrained(
MODEL_PATH,

View File

@ -30,7 +30,6 @@ def load_model_and_tokenizer(model_dir: str, enable_lora: bool):
gpu_memory_utilization=0.9,
enforce_eager=True,
worker_use_ray=True,
engine_use_ray=False,
disable_log_requests=True
# 如果遇见 OOM 现象,建议开启下述参数
# enable_chunked_prefill=True,