From e3e6de52c45290291f984cfe934839d0954a17ef Mon Sep 17 00:00:00 2001 From: sixgod Date: Fri, 1 Nov 2024 09:00:39 +0000 Subject: [PATCH 1/8] transformers4.46 and vllm0.6.3 --- README.md | 21 ++++++++++++--------- README_en.md | 20 +++++++++++--------- basic_demo/glm4v_server.py | 5 ++++- basic_demo/glm_server.py | 5 +---- basic_demo/openai_api_request.py | 15 ++++++++++----- basic_demo/requirements.txt | 2 +- basic_demo/trans_cli_vision_demo.py | 2 +- basic_demo/trans_web_demo.py | 5 ++++- basic_demo/trans_web_vision_demo.py | 4 ++-- 9 files changed, 46 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index b82ef4b..72eb90b 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ Read this in [English](README_en.md) ## ้กน็›ฎๆ›ดๆ–ฐ +- ๐Ÿ”ฅ๐Ÿ”ฅ **News**: ```2024/11/01```: ๆ”ฏๆŒไบ† GLM-4-9B-Chat-hf ๅ’Œ GLM-4v-9B ๆจกๅž‹ๅœจ vLLM 0.6.3 ไปฅไธŠ็‰ˆๆœฌๅ’Œ transformers 4.46.0 ไปฅไธŠ็‰ˆๆœฌ่ฟ่กŒ - ๐Ÿ”ฅ๐Ÿ”ฅ **News**: ```2024/10/25```: ๆˆ‘ไปฌๅผ€ๆบไบ†็ซฏๅˆฐ็ซฏไธญ่‹ฑ่ฏญ้Ÿณๅฏน่ฏๆจกๅž‹ [GLM-4-Voice](https://github.com/THUDM/GLM-4-Voice) - ๐Ÿ”ฅ **News**: ```2024/10/12```: ๅขžๅŠ ไบ† GLM-4v-9B ๆจกๅž‹ๅฏนvllmๆก†ๆžถ็š„ๆ”ฏๆŒ - ๐Ÿ”ฅ **News**: ```2024/09/06```: ๅขžๅŠ ไบ†ๅœจ GLM-4v-9B ๆจกๅž‹ไธŠๆž„ๅปบOpenAI APIๅ…ผๅฎน็š„ๆœๅŠก็ซฏ @@ -54,12 +55,14 @@ GLM-4V-9Bใ€‚**GLM-4V-9B** ๅ…ทๅค‡ 1120 * 1120 ้ซ˜ๅˆ†่พจ็އไธ‹็š„ไธญ่‹ฑๅŒ่ฏญๅคš ## Model List -| Model | Type | Seq Length | Download | Online Demo | -|------------------|------|------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| GLM-4-9B | Base | 8K | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b) [๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / | -| GLM-4-9B-Chat | Chat | 128K | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat) [๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat) | [๐Ÿค– ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐Ÿค– ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) | -| GLM-4-9B-Chat-1M | Chat | 1M | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m) [๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / | -| GLM-4V-9B | Chat | 8K | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4v-9b) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b) [๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B ) | [๐Ÿค– ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) | +| Model | Type | Seq Length | Transformers | vLLM | Download | Online Demo | +|---------------------|------|------------|--------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b) [๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / | +| GLM-4-9B-Chat | Chat | 128K | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat) [๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat) | [๐Ÿค– ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐Ÿค– ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) | +| GLM-4-9B-Chat-HF | Chat | 128K | >= 4.46 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-hf) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-hf) | [๐Ÿค– ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐Ÿค– ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) | +| GLM-4-9B-Chat-1M | Chat | 1M | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m) [๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / | +| GLM-4-9B-Chat-1M-HF | Chat | 1M | >= 4.46 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m-hf) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m-hf) | / | +| GLM-4V-9B | Chat | 8K | >= 4.46 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4v-9b) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b) [๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐Ÿค– ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) | ## ่ฏ„ๆต‹็ป“ๆžœ @@ -151,7 +154,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer import os os.environ['CUDA_VISIBLE_DEVICES'] = '0' # ่ฎพ็ฝฎ GPU ็ผ–ๅท๏ผŒๅฆ‚ๆžœๅ•ๆœบๅ•ๅกๆŒ‡ๅฎšไธ€ไธช๏ผŒๅ•ๆœบๅคšๅกๆŒ‡ๅฎšๅคšไธช GPU ็ผ–ๅท -MODEL_PATH = "THUDM/glm-4-9b-chat" +MODEL_PATH = "THUDM/glm-4-9b-chat-hf" device = "cuda" if torch.cuda.is_available() else "cpu" @@ -192,7 +195,7 @@ from vllm import LLM, SamplingParams # max_model_len, tp_size = 1048576, 4 # ๅฆ‚ๆžœ้‡่ง OOM ็Žฐ่ฑก๏ผŒๅปบ่ฎฎๅ‡ๅฐ‘max_model_len๏ผŒๆˆ–่€…ๅขžๅŠ tp_size max_model_len, tp_size = 131072, 1 -model_name = "THUDM/glm-4-9b-chat" +model_name = "THUDM/glm-4-9b-chat-hf" prompt = [{"role": "user", "content": "ไฝ ๅฅฝ"}] tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) @@ -290,7 +293,7 @@ for o in outputs: ## ๅฎŒๆ•ด้กน็›ฎๅˆ—่กจ -ๅฆ‚ๆžœไฝ ๆƒณๆ›ด่ฟ›ไธ€ๆญฅไบ†่งฃ GLM-4-9B ็ณปๅˆ—ๅผ€ๆบๆจกๅž‹๏ผŒๆœฌๅผ€ๆบไป“ๅบ“้€š่ฟ‡ไปฅไธ‹ๅ†…ๅฎนไธบๅผ€ๅ‘่€…ๆไพ›ๅŸบ็ก€็š„ GLM-4-9B็š„ไฝฟ็”จๅ’Œๅผ€ๅ‘ไปฃ็  +ๅฆ‚ๆžœไฝ ๆƒณๆ›ด่ฟ›ไธ€ๆญฅไบ†่งฃ GLM-4-9B ็ณปๅˆ—ๅผ€ๆบๆจกๅž‹๏ผŒๆœฌๅผ€ๆบไป“ๅบ“้€š่ฟ‡ไปฅไธ‹ๅ†…ๅฎนไธบๅผ€ๅ‘่€…ๆไพ›ๅŸบ็ก€็š„ GLM-4-9B ็š„ไฝฟ็”จๅ’Œๅผ€ๅ‘ไปฃ็  + [basic_demo](basic_demo/README.md): ๅœจ่ฟ™้‡ŒๅŒ…ๅซไบ† + ไฝฟ็”จ transformers ๅ’Œ vLLM ๅŽ็ซฏ็š„ไบคไบ’ไปฃ็  diff --git a/README_en.md b/README_en.md index 4719a3e..554f0cd 100644 --- a/README_en.md +++ b/README_en.md @@ -8,7 +8,7 @@

## Update - +- ๐Ÿ”ฅ๐Ÿ”ฅ **News**: ```2024/11/01```: Support for GLM-4-9B-Chat-hf and GLM-4v-9B models on vLLM >= 0.6.3 and transformers >= 4.46.0 - ๐Ÿ”ฅ๐Ÿ”ฅ **News**: ```2024/10/25```: We have open-sourced the end-to-end Chinese-English voice dialogue model [GLM-4-Voice](https://github.com/THUDM/GLM-4-Voice). - ๐Ÿ”ฅ **News**: ```2024/10/12```: Add GLM-4v-9B model support for vllm framework. - ๐Ÿ”ฅ **News**: ```2024/09/06```: Add support for OpenAI API server on the GLM-4v-9B model. @@ -67,12 +67,14 @@ GPT-4-turbo-2024-04-09, Gemini 1.0 Pro, Qwen-VL-Max, and Claude 3 Opus. ## Model List -| Model | Type | Seq Length | Download | Online Demo | -|------------------|------|------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| GLM-4-9B | Base | 8K | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b) [๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B) | / | -| GLM-4-9B-Chat | Chat | 128K | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat) [๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat) | [๐Ÿค– ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐Ÿค– ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) | -| GLM-4-9B-Chat-1M | Chat | 1M | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m) [๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / | -| GLM-4V-9B | Chat | 8K | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4v-9b) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b) [๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐Ÿค– ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) | +| Model | Type | Seq Length | Transformers | vLLM | Download | Online Demo | +|---------------------|------|------------|--------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b) [๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / | +| GLM-4-9B-Chat | Chat | 128K | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat) [๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat) | [๐Ÿค– ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐Ÿค– ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) | +| GLM-4-9B-Chat-HF | Chat | 128K | >= 4.46 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-hf) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-hf) | [๐Ÿค– ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐Ÿค– ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) | +| GLM-4-9B-Chat-1M | Chat | 1M | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m) [๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / | +| GLM-4-9B-Chat-1M-HF | Chat | 1M | >= 4.46 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m-hf) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m-hf) | / | +| GLM-4V-9B | Chat | 8K | >= 4.46 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4v-9b) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b) [๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐Ÿค– ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) | ## BenchMark @@ -168,7 +170,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer import os os.environ['CUDA_VISIBLE_DEVICES'] = '0' # Set the GPU number. If inference with multiple GPUs, set multiple GPU numbers -MODEL_PATH = "THUDM/glm-4-9b-chat" +MODEL_PATH = "THUDM/glm-4-9b-chat-hf" device = "cuda" if torch.cuda.is_available() else "cpu" @@ -208,7 +210,7 @@ from vllm import LLM, SamplingParams # GLM-4-9B-Chat # If you encounter OOM, you can try to reduce max_model_len or increase tp_size max_model_len, tp_size = 131072, 1 -model_name = "THUDM/glm-4-9b-chat" +model_name = "THUDM/glm-4-9b-chat-hf" prompt = [{"role": "user", "content": "ไฝ ๅฅฝ"}] tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) diff --git a/basic_demo/glm4v_server.py b/basic_demo/glm4v_server.py index cffa944..549f661 100644 --- a/basic_demo/glm4v_server.py +++ b/basic_demo/glm4v_server.py @@ -302,7 +302,10 @@ def generate_stream_glm4v(model: AutoModel, tokenizer: AutoTokenizer, params: di inputs.append({"role": "user", "content": user_msg}) if model_msg: inputs.append({"role": "assistant", "content": model_msg}) - inputs.append({"role": "user", "content": query, "image": image_list[0]}) + if len(image_list) >= 1: + inputs.append({"role": "user", "content": query, "image": image_list[0]}) + else: + inputs.append({"role": "user", "content": query}) model_inputs = tokenizer.apply_chat_template( inputs, diff --git a/basic_demo/glm_server.py b/basic_demo/glm_server.py index 2ae8b22..d975dea 100644 --- a/basic_demo/glm_server.py +++ b/basic_demo/glm_server.py @@ -207,9 +207,6 @@ async def generate_stream_glm4(params): "top_p": top_p, "top_k": -1, "repetition_penalty": repetition_penalty, - "use_beam_search": False, - "length_penalty": 1, - "early_stopping": False, "stop_token_ids": [151329, 151336, 151338], "ignore_eos": False, "max_tokens": max_new_tokens, @@ -218,7 +215,7 @@ async def generate_stream_glm4(params): "skip_special_tokens": True, } sampling_params = SamplingParams(**params_dict) - async for output in engine.generate(inputs=inputs, sampling_params=sampling_params, request_id=f"{time.time()}"): + async for output in engine.generate(prompt=inputs, sampling_params=sampling_params, request_id=f"{time.time()}"): output_len = len(output.outputs[0].token_ids) input_len = len(output.prompt_token_ids) ret = { diff --git a/basic_demo/openai_api_request.py b/basic_demo/openai_api_request.py index 92bee35..f1c7fab 100644 --- a/basic_demo/openai_api_request.py +++ b/basic_demo/openai_api_request.py @@ -95,12 +95,12 @@ def function_chat(use_stream=False): def simple_chat(use_stream=False): messages = [ { - "role": "system", + "role": "user", "content": "่ฏทๅœจไฝ ่พ“ๅ‡บ็š„ๆ—ถๅ€™้ƒฝๅธฆไธŠโ€œๅ–ตๅ–ตๅ–ตโ€ไธ‰ไธชๅญ—๏ผŒๆ”พๅœจๅผ€ๅคดใ€‚", }, { "role": "user", - "content": "ไฝ ๆ˜ฏ่ฐ" + "content": "ไฝ ๆ˜ฏ็Œซๅ—" } ] response = client.chat.completions.create( @@ -201,7 +201,12 @@ def glm4v_simple_image_chat(use_stream=False, img_path=None): if __name__ == "__main__": - simple_chat(use_stream=False) - # function_chat(use_stream=False) - # glm4v_simple_image_chat(use_stream=False, img_path="demo.jpg") + # Testing the text model + simple_chat(use_stream=False) + + # Testing the text model with tools + # function_chat(use_stream=False) + + # Testing images of multimodal models + # glm4v_simple_image_chat(use_stream=False, img_path="demo.jpg") diff --git a/basic_demo/requirements.txt b/basic_demo/requirements.txt index 4ff1483..0480a78 100644 --- a/basic_demo/requirements.txt +++ b/basic_demo/requirements.txt @@ -17,7 +17,7 @@ pillow>=10.4.0 sse-starlette>=2.1.3 bitsandbytes>=0.43.3 # INT4 Loading -# vllm>=0.6.4 # using with VLLM Framework +# vllm>=0.6.3 # using with VLLM Framework # flash-attn>=2.6.3 # using with flash-attention 2 # PEFT model, not need if you don't use PEFT finetune model. # peft>=0.13.0 # Using with finetune model \ No newline at end of file diff --git a/basic_demo/trans_cli_vision_demo.py b/basic_demo/trans_cli_vision_demo.py index 30a78d2..758ccc6 100644 --- a/basic_demo/trans_cli_vision_demo.py +++ b/basic_demo/trans_cli_vision_demo.py @@ -17,7 +17,7 @@ from transformers import ( AutoTokenizer, StoppingCriteria, StoppingCriteriaList, - TextIteratorStreamer, AutoModel, BitsAndBytesConfig + TextIteratorStreamer, AutoModel ) from PIL import Image diff --git a/basic_demo/trans_web_demo.py b/basic_demo/trans_web_demo.py index 1a470de..2d8e35a 100644 --- a/basic_demo/trans_web_demo.py +++ b/basic_demo/trans_web_demo.py @@ -3,6 +3,9 @@ This script creates an interactive web demo for the GLM-4-9B model using Gradio, a Python library for building quick and easy UI components for machine learning models. It's designed to showcase the capabilities of the GLM-4-9B model in a user-friendly interface, allowing users to interact with the model through a chat-like interface. + +Note: + Using with glm-4-9b-chat-hf will require `transformers>=4.46.0". """ import os @@ -27,7 +30,7 @@ from transformers import ( ModelType = Union[PreTrainedModel, PeftModelForCausalLM] TokenizerType = Union[PreTrainedTokenizer, PreTrainedTokenizerFast] -MODEL_PATH = os.environ.get('MODEL_PATH', 'THUDM/glm-4-9b-chat') +MODEL_PATH = os.environ.get('MODEL_PATH', 'THUDM/glm-4-9b-chat-hf') TOKENIZER_PATH = os.environ.get("TOKENIZER_PATH", MODEL_PATH) diff --git a/basic_demo/trans_web_vision_demo.py b/basic_demo/trans_web_vision_demo.py index 05d1563..91d9875 100644 --- a/basic_demo/trans_web_vision_demo.py +++ b/basic_demo/trans_web_vision_demo.py @@ -7,7 +7,7 @@ Usage: Requirements: - Gradio package - - Type `pip install gradio` to install Gradio. + - Type `pip install gradio==4.44.1` to install Gradio. """ import os @@ -18,7 +18,7 @@ from transformers import ( AutoTokenizer, StoppingCriteria, StoppingCriteriaList, - TextIteratorStreamer, AutoModel, BitsAndBytesConfig + TextIteratorStreamer, AutoModel ) from PIL import Image import requests From 9b39ba6d1bb641595e9b48c0a3341191111e3b6c Mon Sep 17 00:00:00 2001 From: sixgod Date: Fri, 1 Nov 2024 09:06:04 +0000 Subject: [PATCH 2/8] transformers4.46 and vllm0.6.3 --- README.md | 12 ++++++------ README_en.md | 8 ++++---- basic_demo/requirements.txt | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 72eb90b..01d7ad6 100644 --- a/README.md +++ b/README.md @@ -57,12 +57,12 @@ GLM-4V-9Bใ€‚**GLM-4V-9B** ๅ…ทๅค‡ 1120 * 1120 ้ซ˜ๅˆ†่พจ็އไธ‹็š„ไธญ่‹ฑๅŒ่ฏญๅคš | Model | Type | Seq Length | Transformers | vLLM | Download | Online Demo | |---------------------|------|------------|--------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b) [๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / | -| GLM-4-9B-Chat | Chat | 128K | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat) [๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat) | [๐Ÿค– ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐Ÿค– ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) | -| GLM-4-9B-Chat-HF | Chat | 128K | >= 4.46 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-hf) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-hf) | [๐Ÿค– ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐Ÿค– ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) | -| GLM-4-9B-Chat-1M | Chat | 1M | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m) [๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / | -| GLM-4-9B-Chat-1M-HF | Chat | 1M | >= 4.46 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m-hf) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m-hf) | / | -| GLM-4V-9B | Chat | 8K | >= 4.46 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4v-9b) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b) [๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐Ÿค– ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) | +| GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / | +| GLM-4-9B-Chat | Chat | 128K | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat) | [๐Ÿค– ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐Ÿค– ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) | +| GLM-4-9B-Chat-HF | Chat | 128K | >= 4.46 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-hf)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-hf) | [๐Ÿค– ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐Ÿค– ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) | +| GLM-4-9B-Chat-1M | Chat | 1M | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / | +| GLM-4-9B-Chat-1M-HF | Chat | 1M | >= 4.46 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m-hf)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m-hf) | / | +| GLM-4V-9B | Chat | 8K | >= 4.46 | >= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4v-9b)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐Ÿค– ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) | ## ่ฏ„ๆต‹็ป“ๆžœ diff --git a/README_en.md b/README_en.md index 554f0cd..eb8be2e 100644 --- a/README_en.md +++ b/README_en.md @@ -69,12 +69,12 @@ GPT-4-turbo-2024-04-09, Gemini 1.0 Pro, Qwen-VL-Max, and Claude 3 Opus. | Model | Type | Seq Length | Transformers | vLLM | Download | Online Demo | |---------------------|------|------------|--------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b) [๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / | -| GLM-4-9B-Chat | Chat | 128K | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat) [๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat) | [๐Ÿค– ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐Ÿค– ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) | +| GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / | +| GLM-4-9B-Chat | Chat | 128K | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat) | [๐Ÿค– ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐Ÿค– ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) | | GLM-4-9B-Chat-HF | Chat | 128K | >= 4.46 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-hf) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-hf) | [๐Ÿค– ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐Ÿค– ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) | -| GLM-4-9B-Chat-1M | Chat | 1M | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m) [๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / | +| GLM-4-9B-Chat-1M | Chat | 1M | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / | | GLM-4-9B-Chat-1M-HF | Chat | 1M | >= 4.46 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m-hf) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m-hf) | / | -| GLM-4V-9B | Chat | 8K | >= 4.46 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4v-9b) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b) [๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐Ÿค– ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) | +| GLM-4V-9B | Chat | 8K | >= 4.46 | >= 0.6.3 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4v-9b)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐Ÿค– ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) | ## BenchMark diff --git a/basic_demo/requirements.txt b/basic_demo/requirements.txt index 0480a78..cad81ed 100644 --- a/basic_demo/requirements.txt +++ b/basic_demo/requirements.txt @@ -10,7 +10,7 @@ tiktoken>=0.7.0 numpy==1.26.4 # Need less than 2.0.0 accelerate>=1.0.1 sentence_transformers>=3.1.1 -gradio>=4.44.1 # web demo +gradio==4.44.1 # web demo openai>=1.51.0 # openai demo einops>=0.8.0 pillow>=10.4.0 From 1d68300ecfa1625ca40440526b4658c2cde2ca1a Mon Sep 17 00:00:00 2001 From: sixgod Date: Fri, 1 Nov 2024 09:11:59 +0000 Subject: [PATCH 3/8] transformers4.46 and vllm0.6.3 --- README.md | 16 ++++++++-------- README_en.md | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 01d7ad6..26554d8 100644 --- a/README.md +++ b/README.md @@ -55,14 +55,14 @@ GLM-4V-9Bใ€‚**GLM-4V-9B** ๅ…ทๅค‡ 1120 * 1120 ้ซ˜ๅˆ†่พจ็އไธ‹็š„ไธญ่‹ฑๅŒ่ฏญๅคš ## Model List -| Model | Type | Seq Length | Transformers | vLLM | Download | Online Demo | -|---------------------|------|------------|--------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / | -| GLM-4-9B-Chat | Chat | 128K | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat) | [๐Ÿค– ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐Ÿค– ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) | -| GLM-4-9B-Chat-HF | Chat | 128K | >= 4.46 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-hf)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-hf) | [๐Ÿค– ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐Ÿค– ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) | -| GLM-4-9B-Chat-1M | Chat | 1M | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / | -| GLM-4-9B-Chat-1M-HF | Chat | 1M | >= 4.46 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m-hf)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m-hf) | / | -| GLM-4V-9B | Chat | 8K | >= 4.46 | >= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4v-9b)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐Ÿค– ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) | +| Model | Type | Seq Length | Transformers | vLLM | Download | Online Demo | +|---------------------|------|------------|--------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / | +| GLM-4-9B-Chat | Chat | 128K | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat) | [๐Ÿค– ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐Ÿค– ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) | +| GLM-4-9B-Chat-HF | Chat | 128K | >= 4.46 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-hf)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-hf) | [๐Ÿค– ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐Ÿค– ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) | +| GLM-4-9B-Chat-1M | Chat | 1M | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / | +| GLM-4-9B-Chat-1M-HF | Chat | 1M | >= 4.46 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m-hf)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m-hf) | / | +| GLM-4V-9B | Chat | 8K | >= 4.46 | >= 0.6.3 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4v-9b)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐Ÿค– ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) | ## ่ฏ„ๆต‹็ป“ๆžœ diff --git a/README_en.md b/README_en.md index eb8be2e..0ad31db 100644 --- a/README_en.md +++ b/README_en.md @@ -67,14 +67,14 @@ GPT-4-turbo-2024-04-09, Gemini 1.0 Pro, Qwen-VL-Max, and Claude 3 Opus. ## Model List -| Model | Type | Seq Length | Transformers | vLLM | Download | Online Demo | -|---------------------|------|------------|--------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / | -| GLM-4-9B-Chat | Chat | 128K | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat) | [๐Ÿค– ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐Ÿค– ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) | -| GLM-4-9B-Chat-HF | Chat | 128K | >= 4.46 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-hf) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-hf) | [๐Ÿค– ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐Ÿค– ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) | -| GLM-4-9B-Chat-1M | Chat | 1M | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / | -| GLM-4-9B-Chat-1M-HF | Chat | 1M | >= 4.46 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m-hf) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m-hf) | / | -| GLM-4V-9B | Chat | 8K | >= 4.46 | >= 0.6.3 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4v-9b)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐Ÿค– ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) | +| Model | Type | Seq Length | Transformers | vLLM | Download | Online Demo | +|---------------------|------|------------|--------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / | +| GLM-4-9B-Chat | Chat | 128K | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat) | [๐Ÿค– ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐Ÿค– ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) | +| GLM-4-9B-Chat-HF | Chat | 128K | >= 4.46 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-hf) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-hf) | [๐Ÿค– ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐Ÿค– ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) | +| GLM-4-9B-Chat-1M | Chat | 1M | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / | +| GLM-4-9B-Chat-1M-HF | Chat | 1M | >= 4.46 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m-hf) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m-hf) | / | +| GLM-4V-9B | Chat | 8K | >= 4.46 | >= 0.6.3 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4v-9b)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐Ÿค– ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) | ## BenchMark From a8069aca733ed7a088359400b85308dbf385ccc2 Mon Sep 17 00:00:00 2001 From: sixgod Date: Fri, 1 Nov 2024 09:14:20 +0000 Subject: [PATCH 4/8] transformers4.46 and vllm0.6.3 --- README.md | 2 +- README_en.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 26554d8..924513d 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ GLM-4V-9Bใ€‚**GLM-4V-9B** ๅ…ทๅค‡ 1120 * 1120 ้ซ˜ๅˆ†่พจ็އไธ‹็š„ไธญ่‹ฑๅŒ่ฏญๅคš ## Model List | Model | Type | Seq Length | Transformers | vLLM | Download | Online Demo | -|---------------------|------|------------|--------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +|:-------------------:|:----:|:----------:|:------------:|:--------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:| | GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / | | GLM-4-9B-Chat | Chat | 128K | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat) | [๐Ÿค– ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐Ÿค– ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) | | GLM-4-9B-Chat-HF | Chat | 128K | >= 4.46 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-hf)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-hf) | [๐Ÿค– ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐Ÿค– ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) | diff --git a/README_en.md b/README_en.md index 0ad31db..689ce32 100644 --- a/README_en.md +++ b/README_en.md @@ -68,7 +68,7 @@ GPT-4-turbo-2024-04-09, Gemini 1.0 Pro, Qwen-VL-Max, and Claude 3 Opus. ## Model List | Model | Type | Seq Length | Transformers | vLLM | Download | Online Demo | -|---------------------|------|------------|--------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +|:-------------------:|:----:|:----------:|:------------:|:--------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:| | GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / | | GLM-4-9B-Chat | Chat | 128K | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat) | [๐Ÿค– ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐Ÿค– ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) | | GLM-4-9B-Chat-HF | Chat | 128K | >= 4.46 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-hf) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-hf) | [๐Ÿค– ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐Ÿค– ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) | From 02831d280b314ac8aeafbcbb6c1c12229d2411ec Mon Sep 17 00:00:00 2001 From: sixgod Date: Fri, 1 Nov 2024 09:20:57 +0000 Subject: [PATCH 5/8] transformers4.46 and vllm0.6.3 --- README.md | 2 ++ README_en.md | 3 +++ 2 files changed, 5 insertions(+) diff --git a/README.md b/README.md index 924513d..343aa2b 100644 --- a/README.md +++ b/README.md @@ -55,6 +55,7 @@ GLM-4V-9Bใ€‚**GLM-4V-9B** ๅ…ทๅค‡ 1120 * 1120 ้ซ˜ๅˆ†่พจ็އไธ‹็š„ไธญ่‹ฑๅŒ่ฏญๅคš ## Model List +
| Model | Type | Seq Length | Transformers | vLLM | Download | Online Demo | |:-------------------:|:----:|:----------:|:------------:|:--------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:| | GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / | @@ -63,6 +64,7 @@ GLM-4V-9Bใ€‚**GLM-4V-9B** ๅ…ทๅค‡ 1120 * 1120 ้ซ˜ๅˆ†่พจ็އไธ‹็š„ไธญ่‹ฑๅŒ่ฏญๅคš | GLM-4-9B-Chat-1M | Chat | 1M | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / | | GLM-4-9B-Chat-1M-HF | Chat | 1M | >= 4.46 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m-hf)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m-hf) | / | | GLM-4V-9B | Chat | 8K | >= 4.46 | >= 0.6.3 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4v-9b)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐Ÿค– ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) | +
## ่ฏ„ๆต‹็ป“ๆžœ diff --git a/README_en.md b/README_en.md index 689ce32..0626dcf 100644 --- a/README_en.md +++ b/README_en.md @@ -67,6 +67,7 @@ GPT-4-turbo-2024-04-09, Gemini 1.0 Pro, Qwen-VL-Max, and Claude 3 Opus. ## Model List +
| Model | Type | Seq Length | Transformers | vLLM | Download | Online Demo | |:-------------------:|:----:|:----------:|:------------:|:--------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:| | GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / | @@ -75,6 +76,8 @@ GPT-4-turbo-2024-04-09, Gemini 1.0 Pro, Qwen-VL-Max, and Claude 3 Opus. | GLM-4-9B-Chat-1M | Chat | 1M | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / | | GLM-4-9B-Chat-1M-HF | Chat | 1M | >= 4.46 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m-hf) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m-hf) | / | | GLM-4V-9B | Chat | 8K | >= 4.46 | >= 0.6.3 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4v-9b)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐Ÿค– ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) | +
+ ## BenchMark From 2ba0aa4b54fafabc6d084f63588b7b62c4ad0279 Mon Sep 17 00:00:00 2001 From: sixgod Date: Fri, 1 Nov 2024 09:23:05 +0000 Subject: [PATCH 6/8] transformers4.46 and vllm0.6.3 --- README.md | 2 -- README_en.md | 2 -- 2 files changed, 4 deletions(-) diff --git a/README.md b/README.md index 343aa2b..924513d 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,6 @@ GLM-4V-9Bใ€‚**GLM-4V-9B** ๅ…ทๅค‡ 1120 * 1120 ้ซ˜ๅˆ†่พจ็އไธ‹็š„ไธญ่‹ฑๅŒ่ฏญๅคš ## Model List -
| Model | Type | Seq Length | Transformers | vLLM | Download | Online Demo | |:-------------------:|:----:|:----------:|:------------:|:--------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:| | GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / | @@ -64,7 +63,6 @@ GLM-4V-9Bใ€‚**GLM-4V-9B** ๅ…ทๅค‡ 1120 * 1120 ้ซ˜ๅˆ†่พจ็އไธ‹็š„ไธญ่‹ฑๅŒ่ฏญๅคš | GLM-4-9B-Chat-1M | Chat | 1M | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / | | GLM-4-9B-Chat-1M-HF | Chat | 1M | >= 4.46 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m-hf)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m-hf) | / | | GLM-4V-9B | Chat | 8K | >= 4.46 | >= 0.6.3 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4v-9b)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐Ÿค– ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) | -
## ่ฏ„ๆต‹็ป“ๆžœ diff --git a/README_en.md b/README_en.md index 0626dcf..10024a0 100644 --- a/README_en.md +++ b/README_en.md @@ -67,7 +67,6 @@ GPT-4-turbo-2024-04-09, Gemini 1.0 Pro, Qwen-VL-Max, and Claude 3 Opus. ## Model List -
| Model | Type | Seq Length | Transformers | vLLM | Download | Online Demo | |:-------------------:|:----:|:----------:|:------------:|:--------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:| | GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / | @@ -76,7 +75,6 @@ GPT-4-turbo-2024-04-09, Gemini 1.0 Pro, Qwen-VL-Max, and Claude 3 Opus. | GLM-4-9B-Chat-1M | Chat | 1M | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / | | GLM-4-9B-Chat-1M-HF | Chat | 1M | >= 4.46 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m-hf) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m-hf) | / | | GLM-4V-9B | Chat | 8K | >= 4.46 | >= 0.6.3 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4v-9b)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐Ÿค– ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) | -
## BenchMark From 24c8e1b3171990db2134a07b3cd8f7689adc583c Mon Sep 17 00:00:00 2001 From: sixgod Date: Fri, 1 Nov 2024 09:24:39 +0000 Subject: [PATCH 7/8] transformers4.46 and vllm0.6.3 --- README.md | 16 ++++++++-------- README_en.md | 18 +++++++++--------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 924513d..9af3a60 100644 --- a/README.md +++ b/README.md @@ -55,14 +55,14 @@ GLM-4V-9Bใ€‚**GLM-4V-9B** ๅ…ทๅค‡ 1120 * 1120 ้ซ˜ๅˆ†่พจ็އไธ‹็š„ไธญ่‹ฑๅŒ่ฏญๅคš ## Model List -| Model | Type | Seq Length | Transformers | vLLM | Download | Online Demo | -|:-------------------:|:----:|:----------:|:------------:|:--------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:| -| GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / | -| GLM-4-9B-Chat | Chat | 128K | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat) | [๐Ÿค– ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐Ÿค– ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) | -| GLM-4-9B-Chat-HF | Chat | 128K | >= 4.46 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-hf)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-hf) | [๐Ÿค– ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐Ÿค– ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) | -| GLM-4-9B-Chat-1M | Chat | 1M | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / | -| GLM-4-9B-Chat-1M-HF | Chat | 1M | >= 4.46 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m-hf)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m-hf) | / | -| GLM-4V-9B | Chat | 8K | >= 4.46 | >= 0.6.3 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4v-9b)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐Ÿค– ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) | +| Model | Type | Seq Length | Transformers | vLLM | Download | Online Demo | +|:-------------------:|:----:|:----------:|:------------:|:--------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:| +| GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / | +| GLM-4-9B-Chat | Chat | 128K | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat) | [๐Ÿค– ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐Ÿค– ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) | +| GLM-4-9B-Chat-HF | Chat | 128K | >= 4.46 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-hf)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-hf) | [๐Ÿค– ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐Ÿค– ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) | +| GLM-4-9B-Chat-1M | Chat | 1M | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / | +| GLM-4-9B-Chat-1M-HF | Chat | 1M | >= 4.46 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m-hf)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m-hf) | / | +| GLM-4V-9B | Chat | 8K | >= 4.46 | >= 0.6.3 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4v-9b)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐Ÿค– ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) | ## ่ฏ„ๆต‹็ป“ๆžœ diff --git a/README_en.md b/README_en.md index 10024a0..9f4cd42 100644 --- a/README_en.md +++ b/README_en.md @@ -67,17 +67,17 @@ GPT-4-turbo-2024-04-09, Gemini 1.0 Pro, Qwen-VL-Max, and Claude 3 Opus. ## Model List -| Model | Type | Seq Length | Transformers | vLLM | Download | Online Demo | -|:-------------------:|:----:|:----------:|:------------:|:--------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:| -| GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / | -| GLM-4-9B-Chat | Chat | 128K | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat) | [๐Ÿค– ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐Ÿค– ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) | -| GLM-4-9B-Chat-HF | Chat | 128K | >= 4.46 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-hf) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-hf) | [๐Ÿค– ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐Ÿค– ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) | -| GLM-4-9B-Chat-1M | Chat | 1M | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / | -| GLM-4-9B-Chat-1M-HF | Chat | 1M | >= 4.46 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m-hf) [๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m-hf) | / | -| GLM-4V-9B | Chat | 8K | >= 4.46 | >= 0.6.3 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4v-9b)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐Ÿค– ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) | +| Model | Type | Seq Length | Transformers | vLLM | Download | Online Demo | +|:-------------------:|:----:|:----------:|:------------:|:--------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:| +| GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / | +| GLM-4-9B-Chat | Chat | 128K | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat) | [๐Ÿค– ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐Ÿค– ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) | +| GLM-4-9B-Chat-HF | Chat | 128K | >= 4.46 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-hf)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-hf) | [๐Ÿค– ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐Ÿค– ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) | +| GLM-4-9B-Chat-1M | Chat | 1M | <= 4.45 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / | +| GLM-4-9B-Chat-1M-HF | Chat | 1M | >= 4.46 | <= 0.6.2 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m-hf)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m-hf) | / | +| GLM-4V-9B | Chat | 8K | >= 4.46 | >= 0.6.3 | [๐Ÿค— Huggingface](https://huggingface.co/THUDM/glm-4v-9b)
[๐Ÿค– ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b)
[๐ŸŸฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐Ÿค– ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) | -## BenchMark +## BenchMarkรŸ ### Typical Tasks From 471943bfd785b7e9d6390596f08e62ec65c0ace7 Mon Sep 17 00:00:00 2001 From: sixgod Date: Fri, 1 Nov 2024 10:21:56 +0000 Subject: [PATCH 8/8] support INT4 inference --- basic_demo/trans_cli_vision_demo.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/basic_demo/trans_cli_vision_demo.py b/basic_demo/trans_cli_vision_demo.py index 758ccc6..adca35d 100644 --- a/basic_demo/trans_cli_vision_demo.py +++ b/basic_demo/trans_cli_vision_demo.py @@ -17,7 +17,7 @@ from transformers import ( AutoTokenizer, StoppingCriteria, StoppingCriteriaList, - TextIteratorStreamer, AutoModel + TextIteratorStreamer, AutoModel, BitsAndBytesConfig ) from PIL import Image @@ -29,6 +29,8 @@ tokenizer = AutoTokenizer.from_pretrained( trust_remote_code=True, encode_special_tokens=True ) + +## For BF16 inference model = AutoModel.from_pretrained( MODEL_PATH, trust_remote_code=True, @@ -37,7 +39,6 @@ model = AutoModel.from_pretrained( device_map="auto", ).eval() - ## For INT4 inference # model = AutoModel.from_pretrained( # MODEL_PATH,