update the req and chatglm_tokenizer.py
This commit is contained in:
parent
23773d94e2
commit
3e7735d4f7
|
@ -673,7 +673,6 @@ if __name__ == "__main__":
|
||||||
gpu_memory_utilization=0.9,
|
gpu_memory_utilization=0.9,
|
||||||
enforce_eager=True,
|
enforce_eager=True,
|
||||||
worker_use_ray=False,
|
worker_use_ray=False,
|
||||||
engine_use_ray=False,
|
|
||||||
disable_log_requests=True,
|
disable_log_requests=True,
|
||||||
max_model_len=MAX_MODEL_LENGTH,
|
max_model_len=MAX_MODEL_LENGTH,
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,23 +1,23 @@
|
||||||
torch>=2.4.0
|
torch>=2.4.0
|
||||||
torchvision>=0.19.0
|
torchvision>=0.19.0
|
||||||
transformers==4.44.0
|
transformers>=4.45.0
|
||||||
huggingface-hub>=0.24.5
|
huggingface-hub>=0.25.1
|
||||||
sentencepiece>=0.2.0
|
sentencepiece>=0.2.0
|
||||||
jinja2>=3.1.4
|
jinja2>=3.1.4
|
||||||
pydantic>=2.8.2
|
pydantic>=2.9.2
|
||||||
timm>=1.0.8
|
timm>=1.0.9
|
||||||
tiktoken>=0.7.0
|
tiktoken>=0.7.0
|
||||||
numpy==1.26.4 # Need less than 2.0.0
|
numpy==1.26.4 # Need less than 2.0.0
|
||||||
accelerate>=0.33.0
|
accelerate>=0.34.0
|
||||||
sentence_transformers>=3.0.1
|
sentence_transformers>=3.1.1
|
||||||
gradio>=4.42.0 # web demo
|
gradio>=4.44.1 # web demo
|
||||||
openai>=1.43.0 # openai demo
|
openai>=1.51.0 # openai demo
|
||||||
einops>=0.8.0
|
einops>=0.8.0
|
||||||
pillow>=10.4.0
|
pillow>=10.4.0
|
||||||
sse-starlette>=2.1.3
|
sse-starlette>=2.1.3
|
||||||
bitsandbytes>=0.43.3 # INT4 Loading
|
bitsandbytes>=0.43.3 # INT4 Loading
|
||||||
|
|
||||||
# vllm==0.5.4 # using with VLLM Framework
|
# vllm>=0.6.2 # using with VLLM Framework
|
||||||
# flash-attn>=2.6.1 # using with flash-attention 2
|
# flash-attn>=2.6.3 # using with flash-attention 2
|
||||||
# PEFT model, not need if you don't use PEFT finetune model.
|
# PEFT model, not need if you don't use PEFT finetune model.
|
||||||
# peft>=0.12.2 # Using with finetune model
|
# peft>=0.13.0 # Using with finetune model
|
|
@ -37,11 +37,7 @@ MODEL_PATH = os.environ.get('MODEL_PATH', 'THUDM/glm-4-9b-chat')
|
||||||
# return model, tokenizer
|
# return model, tokenizer
|
||||||
|
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH,trust_remote_code=True)
|
||||||
MODEL_PATH,
|
|
||||||
trust_remote_code=True,
|
|
||||||
encode_special_tokens=True
|
|
||||||
)
|
|
||||||
|
|
||||||
model = AutoModel.from_pretrained(
|
model = AutoModel.from_pretrained(
|
||||||
MODEL_PATH,
|
MODEL_PATH,
|
||||||
|
|
|
@ -30,7 +30,6 @@ def load_model_and_tokenizer(model_dir: str, enable_lora: bool):
|
||||||
gpu_memory_utilization=0.9,
|
gpu_memory_utilization=0.9,
|
||||||
enforce_eager=True,
|
enforce_eager=True,
|
||||||
worker_use_ray=True,
|
worker_use_ray=True,
|
||||||
engine_use_ray=False,
|
|
||||||
disable_log_requests=True
|
disable_log_requests=True
|
||||||
# 如果遇见 OOM 现象,建议开启下述参数
|
# 如果遇见 OOM 现象,建议开启下述参数
|
||||||
# enable_chunked_prefill=True,
|
# enable_chunked_prefill=True,
|
||||||
|
|
Loading…
Reference in New Issue