From 205b7db3cc00e2becf95bd293c3b3225ae033664 Mon Sep 17 00:00:00 2001 From: Lao <khazzz1c@gmail.com> Date: Wed, 5 Jun 2024 11:48:20 +0800 Subject: [PATCH] Update openai_api_server.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changed vllm’s gpu memory utilization --- basic_demo/openai_api_server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/basic_demo/openai_api_server.py b/basic_demo/openai_api_server.py index 8f64053..20eb881 100644 --- a/basic_demo/openai_api_server.py +++ b/basic_demo/openai_api_server.py @@ -533,7 +533,7 @@ if __name__ == "__main__": tensor_parallel_size=1, dtype="bfloat16", trust_remote_code=True, - gpu_memory_utilization=0.3, + gpu_memory_utilization=0.9, enforce_eager=True, worker_use_ray=True, engine_use_ray=False,