update the req and chatglm_tokenizer.py
This commit is contained in:
parent
23773d94e2
commit
3e7735d4f7
|
@ -673,7 +673,6 @@ if __name__ == "__main__":
|
|||
gpu_memory_utilization=0.9,
|
||||
enforce_eager=True,
|
||||
worker_use_ray=False,
|
||||
engine_use_ray=False,
|
||||
disable_log_requests=True,
|
||||
max_model_len=MAX_MODEL_LENGTH,
|
||||
)
|
||||
|
|
|
@ -1,23 +1,23 @@
|
|||
torch>=2.4.0
|
||||
torchvision>=0.19.0
|
||||
transformers==4.44.0
|
||||
huggingface-hub>=0.24.5
|
||||
transformers>=4.45.0
|
||||
huggingface-hub>=0.25.1
|
||||
sentencepiece>=0.2.0
|
||||
jinja2>=3.1.4
|
||||
pydantic>=2.8.2
|
||||
timm>=1.0.8
|
||||
pydantic>=2.9.2
|
||||
timm>=1.0.9
|
||||
tiktoken>=0.7.0
|
||||
numpy==1.26.4 # Need less than 2.0.0
|
||||
accelerate>=0.33.0
|
||||
sentence_transformers>=3.0.1
|
||||
gradio>=4.42.0 # web demo
|
||||
openai>=1.43.0 # openai demo
|
||||
accelerate>=0.34.0
|
||||
sentence_transformers>=3.1.1
|
||||
gradio>=4.44.1 # web demo
|
||||
openai>=1.51.0 # openai demo
|
||||
einops>=0.8.0
|
||||
pillow>=10.4.0
|
||||
sse-starlette>=2.1.3
|
||||
bitsandbytes>=0.43.3 # INT4 Loading
|
||||
|
||||
# vllm==0.5.4 # using with VLLM Framework
|
||||
# flash-attn>=2.6.1 # using with flash-attention 2
|
||||
# vllm>=0.6.2 # using with VLLM Framework
|
||||
# flash-attn>=2.6.3 # using with flash-attention 2
|
||||
# PEFT model, not need if you don't use PEFT finetune model.
|
||||
# peft>=0.12.2 # Using with finetune model
|
||||
# peft>=0.13.0 # Using with finetune model
|
|
@ -37,11 +37,7 @@ MODEL_PATH = os.environ.get('MODEL_PATH', 'THUDM/glm-4-9b-chat')
|
|||
# return model, tokenizer
|
||||
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(
|
||||
MODEL_PATH,
|
||||
trust_remote_code=True,
|
||||
encode_special_tokens=True
|
||||
)
|
||||
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH,trust_remote_code=True)
|
||||
|
||||
model = AutoModel.from_pretrained(
|
||||
MODEL_PATH,
|
||||
|
|
|
@ -30,7 +30,6 @@ def load_model_and_tokenizer(model_dir: str, enable_lora: bool):
|
|||
gpu_memory_utilization=0.9,
|
||||
enforce_eager=True,
|
||||
worker_use_ray=True,
|
||||
engine_use_ray=False,
|
||||
disable_log_requests=True
|
||||
# 如果遇见 OOM 现象,建议开启下述参数
|
||||
# enable_chunked_prefill=True,
|
||||
|
|
Loading…
Reference in New Issue