Update openai_api_server.py
Changed vllm’s gpu memory utilization
This commit is contained in:
parent
d95f131b03
commit
205b7db3cc
|
@ -533,7 +533,7 @@ if __name__ == "__main__":
|
|||
tensor_parallel_size=1,
|
||||
dtype="bfloat16",
|
||||
trust_remote_code=True,
|
||||
gpu_memory_utilization=0.3,
|
||||
gpu_memory_utilization=0.9,
|
||||
enforce_eager=True,
|
||||
worker_use_ray=True,
|
||||
engine_use_ray=False,
|
||||
|
|
Loading…
Reference in New Issue