From 034bf5c448ba6dd39bbaaa47f38360bcd9e11c6d Mon Sep 17 00:00:00 2001 From: sixgod Date: Sat, 12 Oct 2024 21:30:13 +0800 Subject: [PATCH] Update README_en.md: Add GLM-4v-9B model support for vllm framework --- README_en.md | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/README_en.md b/README_en.md index 2d52d50..9433030 100644 --- a/README_en.md +++ b/README_en.md @@ -9,6 +9,7 @@ ## Update +- 🔥 **News**: ```2024/09/12```: Add GLM-4v-9B model support for vllm framework. - 🔥 **News**: ```2024/09/06```: Add support for OpenAI API server on the GLM-4v-9B model. - 🔥 **News**: ```2024/09/05```: We open-sourced a model enabling LLMs to generate fine-grained citations in long-context Q&A: [longcite-glm4-9b](https://huggingface.co/THUDM/LongCite-glm4-9b), along with the @@ -269,7 +270,39 @@ with torch.no_grad(): print(tokenizer.decode(outputs[0])) ``` -Note: GLM-4V-9B does not support calling using vLLM method yet. +Use the vLLM backend for inference: + +```python +from PIL import Image +from vllm import LLM, SamplingParams + +model_name = "THUDM/glm-4v-9b" + +llm = LLM(model=model_name, + tensor_parallel_size=1, + max_model_len=8192, + trust_remote_code=True, + enforce_eager=True) +stop_token_ids = [151329, 151336, 151338] +sampling_params = SamplingParams(temperature=0.2, + max_tokens=1024, + stop_token_ids=stop_token_ids) + +prompt = "What's the content of the image?" +image = Image.open("your image").convert('RGB') +inputs = { + "prompt": prompt, + "multi_modal_data": { + "image": image + }, + } +outputs = llm.generate(inputs, sampling_params=sampling_params) + +for o in outputs: + generated_text = o.outputs[0].text + print(generated_text) + +``` ## Complete project list