From 59994bb5766d3ef0504fc9befdc5383c1648b7be Mon Sep 17 00:00:00 2001 From: pubw <2287177244@qq.com> Date: Thu, 9 Jan 2025 20:47:38 +0800 Subject: [PATCH] vllm --- vllm.sh | 1 + 1 file changed, 1 insertion(+) create mode 100644 vllm.sh diff --git a/vllm.sh b/vllm.sh new file mode 100644 index 0000000..14a33f0 --- /dev/null +++ b/vllm.sh @@ -0,0 +1 @@ +python -m vllm.entrypoints.openai.api_server --model /home/pubw/proj/Qwen2-VL-72B-Instruct-GPTQ-Int4 --served-model-name Qwen2-7B-Instruct --tensor-parallel-size 4 --limit-mm-per-prompt image=2 --gpu-memory-utilization 0.95 --port 12345