change vllm demo gpu_memory_utilization size

This commit is contained in:
Final 2024-06-05 14:40:30 +08:00 committed by GitHub
parent eec34b1038
commit 492d4cc63d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 1 additions and 1 deletions

View File

@ -25,7 +25,7 @@ def load_model_and_tokenizer(model_dir: str):
tensor_parallel_size=1,
dtype="bfloat16",
trust_remote_code=True,
gpu_memory_utilization=0.3,
gpu_memory_utilization=0.9,
enforce_eager=True,
worker_use_ray=True,
engine_use_ray=False,