From 205b7db3cc00e2becf95bd293c3b3225ae033664 Mon Sep 17 00:00:00 2001
From: Lao <khazzz1c@gmail.com>
Date: Wed, 5 Jun 2024 11:48:20 +0800
Subject: [PATCH] Update openai_api_server.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Changed vllm’s gpu memory utilization
---
 basic_demo/openai_api_server.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/basic_demo/openai_api_server.py b/basic_demo/openai_api_server.py
index 8f64053..20eb881 100644
--- a/basic_demo/openai_api_server.py
+++ b/basic_demo/openai_api_server.py
@@ -533,7 +533,7 @@ if __name__ == "__main__":
         tensor_parallel_size=1,
         dtype="bfloat16",
         trust_remote_code=True,
-        gpu_memory_utilization=0.3,
+        gpu_memory_utilization=0.9,
         enforce_eager=True,
         worker_use_ray=True,
         engine_use_ray=False,