From e3e6de52c45290291f984cfe934839d0954a17ef Mon Sep 17 00:00:00 2001
From: sixgod
Date: Fri, 1 Nov 2024 09:00:39 +0000
Subject: [PATCH 1/8] transformers4.46 and vllm0.6.3
---
README.md | 21 ++++++++++++---------
README_en.md | 20 +++++++++++---------
basic_demo/glm4v_server.py | 5 ++++-
basic_demo/glm_server.py | 5 +----
basic_demo/openai_api_request.py | 15 ++++++++++-----
basic_demo/requirements.txt | 2 +-
basic_demo/trans_cli_vision_demo.py | 2 +-
basic_demo/trans_web_demo.py | 5 ++++-
basic_demo/trans_web_vision_demo.py | 4 ++--
9 files changed, 46 insertions(+), 33 deletions(-)
diff --git a/README.md b/README.md
index b82ef4b..72eb90b 100644
--- a/README.md
+++ b/README.md
@@ -11,6 +11,7 @@ Read this in [English](README_en.md)
## ้กน็ฎๆดๆฐ
+- ๐ฅ๐ฅ **News**: ```2024/11/01```: ๆฏๆไบ GLM-4-9B-Chat-hf ๅ GLM-4v-9B ๆจกๅๅจ vLLM 0.6.3 ไปฅไธ็ๆฌๅ transformers 4.46.0 ไปฅไธ็ๆฌ่ฟ่ก
- ๐ฅ๐ฅ **News**: ```2024/10/25```: ๆไปฌๅผๆบไบ็ซฏๅฐ็ซฏไธญ่ฑ่ฏญ้ณๅฏน่ฏๆจกๅ [GLM-4-Voice](https://github.com/THUDM/GLM-4-Voice)
- ๐ฅ **News**: ```2024/10/12```: ๅขๅ ไบ GLM-4v-9B ๆจกๅๅฏนvllmๆกๆถ็ๆฏๆ
- ๐ฅ **News**: ```2024/09/06```: ๅขๅ ไบๅจ GLM-4v-9B ๆจกๅไธๆๅปบOpenAI APIๅ
ผๅฎน็ๆๅก็ซฏ
@@ -54,12 +55,14 @@ GLM-4V-9Bใ**GLM-4V-9B** ๅ
ทๅค 1120 * 1120 ้ซๅ่พจ็ไธ็ไธญ่ฑๅ่ฏญๅค
## Model List
-| Model | Type | Seq Length | Download | Online Demo |
-|------------------|------|------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| GLM-4-9B | Base | 8K | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b) [๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / |
-| GLM-4-9B-Chat | Chat | 128K | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat) [๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat) | [๐ค ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐ค ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) |
-| GLM-4-9B-Chat-1M | Chat | 1M | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m) [๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / |
-| GLM-4V-9B | Chat | 8K | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4v-9b) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b) [๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B ) | [๐ค ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) |
+| Model | Type | Seq Length | Transformers | vLLM | Download | Online Demo |
+|---------------------|------|------------|--------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b) [๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / |
+| GLM-4-9B-Chat | Chat | 128K | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat) [๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat) | [๐ค ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐ค ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) |
+| GLM-4-9B-Chat-HF | Chat | 128K | >= 4.46 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-hf) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-hf) | [๐ค ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐ค ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) |
+| GLM-4-9B-Chat-1M | Chat | 1M | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m) [๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / |
+| GLM-4-9B-Chat-1M-HF | Chat | 1M | >= 4.46 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m-hf) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m-hf) | / |
+| GLM-4V-9B | Chat | 8K | >= 4.46 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4v-9b) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b) [๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐ค ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) |
## ่ฏๆต็ปๆ
@@ -151,7 +154,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0' # ่ฎพ็ฝฎ GPU ็ผๅท๏ผๅฆๆๅๆบๅๅกๆๅฎไธไธช๏ผๅๆบๅคๅกๆๅฎๅคไธช GPU ็ผๅท
-MODEL_PATH = "THUDM/glm-4-9b-chat"
+MODEL_PATH = "THUDM/glm-4-9b-chat-hf"
device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -192,7 +195,7 @@ from vllm import LLM, SamplingParams
# max_model_len, tp_size = 1048576, 4
# ๅฆๆ้่ง OOM ็ฐ่ฑก๏ผๅปบ่ฎฎๅๅฐmax_model_len๏ผๆ่
ๅขๅ tp_size
max_model_len, tp_size = 131072, 1
-model_name = "THUDM/glm-4-9b-chat"
+model_name = "THUDM/glm-4-9b-chat-hf"
prompt = [{"role": "user", "content": "ไฝ ๅฅฝ"}]
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
@@ -290,7 +293,7 @@ for o in outputs:
## ๅฎๆด้กน็ฎๅ่กจ
-ๅฆๆไฝ ๆณๆด่ฟไธๆญฅไบ่งฃ GLM-4-9B ็ณปๅๅผๆบๆจกๅ๏ผๆฌๅผๆบไปๅบ้่ฟไปฅไธๅ
ๅฎนไธบๅผๅ่
ๆไพๅบ็ก็ GLM-4-9B็ไฝฟ็จๅๅผๅไปฃ็
+ๅฆๆไฝ ๆณๆด่ฟไธๆญฅไบ่งฃ GLM-4-9B ็ณปๅๅผๆบๆจกๅ๏ผๆฌๅผๆบไปๅบ้่ฟไปฅไธๅ
ๅฎนไธบๅผๅ่
ๆไพๅบ็ก็ GLM-4-9B ็ไฝฟ็จๅๅผๅไปฃ็
+ [basic_demo](basic_demo/README.md): ๅจ่ฟ้ๅ
ๅซไบ
+ ไฝฟ็จ transformers ๅ vLLM ๅ็ซฏ็ไบคไบไปฃ็
diff --git a/README_en.md b/README_en.md
index 4719a3e..554f0cd 100644
--- a/README_en.md
+++ b/README_en.md
@@ -8,7 +8,7 @@
## Update
-
+- ๐ฅ๐ฅ **News**: ```2024/11/01```: Support for GLM-4-9B-Chat-hf and GLM-4v-9B models on vLLM >= 0.6.3 and transformers >= 4.46.0
- ๐ฅ๐ฅ **News**: ```2024/10/25```: We have open-sourced the end-to-end Chinese-English voice dialogue model [GLM-4-Voice](https://github.com/THUDM/GLM-4-Voice).
- ๐ฅ **News**: ```2024/10/12```: Add GLM-4v-9B model support for vllm framework.
- ๐ฅ **News**: ```2024/09/06```: Add support for OpenAI API server on the GLM-4v-9B model.
@@ -67,12 +67,14 @@ GPT-4-turbo-2024-04-09, Gemini 1.0 Pro, Qwen-VL-Max, and Claude 3 Opus.
## Model List
-| Model | Type | Seq Length | Download | Online Demo |
-|------------------|------|------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| GLM-4-9B | Base | 8K | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b) [๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B) | / |
-| GLM-4-9B-Chat | Chat | 128K | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat) [๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat) | [๐ค ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐ค ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) |
-| GLM-4-9B-Chat-1M | Chat | 1M | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m) [๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / |
-| GLM-4V-9B | Chat | 8K | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4v-9b) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b) [๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐ค ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) |
+| Model | Type | Seq Length | Transformers | vLLM | Download | Online Demo |
+|---------------------|------|------------|--------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b) [๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / |
+| GLM-4-9B-Chat | Chat | 128K | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat) [๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat) | [๐ค ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐ค ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) |
+| GLM-4-9B-Chat-HF | Chat | 128K | >= 4.46 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-hf) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-hf) | [๐ค ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐ค ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) |
+| GLM-4-9B-Chat-1M | Chat | 1M | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m) [๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / |
+| GLM-4-9B-Chat-1M-HF | Chat | 1M | >= 4.46 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m-hf) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m-hf) | / |
+| GLM-4V-9B | Chat | 8K | >= 4.46 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4v-9b) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b) [๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐ค ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) |
## BenchMark
@@ -168,7 +170,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0' # Set the GPU number. If inference with multiple GPUs, set multiple GPU numbers
-MODEL_PATH = "THUDM/glm-4-9b-chat"
+MODEL_PATH = "THUDM/glm-4-9b-chat-hf"
device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -208,7 +210,7 @@ from vllm import LLM, SamplingParams
# GLM-4-9B-Chat
# If you encounter OOM, you can try to reduce max_model_len or increase tp_size
max_model_len, tp_size = 131072, 1
-model_name = "THUDM/glm-4-9b-chat"
+model_name = "THUDM/glm-4-9b-chat-hf"
prompt = [{"role": "user", "content": "ไฝ ๅฅฝ"}]
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
diff --git a/basic_demo/glm4v_server.py b/basic_demo/glm4v_server.py
index cffa944..549f661 100644
--- a/basic_demo/glm4v_server.py
+++ b/basic_demo/glm4v_server.py
@@ -302,7 +302,10 @@ def generate_stream_glm4v(model: AutoModel, tokenizer: AutoTokenizer, params: di
inputs.append({"role": "user", "content": user_msg})
if model_msg:
inputs.append({"role": "assistant", "content": model_msg})
- inputs.append({"role": "user", "content": query, "image": image_list[0]})
+ if len(image_list) >= 1:
+ inputs.append({"role": "user", "content": query, "image": image_list[0]})
+ else:
+ inputs.append({"role": "user", "content": query})
model_inputs = tokenizer.apply_chat_template(
inputs,
diff --git a/basic_demo/glm_server.py b/basic_demo/glm_server.py
index 2ae8b22..d975dea 100644
--- a/basic_demo/glm_server.py
+++ b/basic_demo/glm_server.py
@@ -207,9 +207,6 @@ async def generate_stream_glm4(params):
"top_p": top_p,
"top_k": -1,
"repetition_penalty": repetition_penalty,
- "use_beam_search": False,
- "length_penalty": 1,
- "early_stopping": False,
"stop_token_ids": [151329, 151336, 151338],
"ignore_eos": False,
"max_tokens": max_new_tokens,
@@ -218,7 +215,7 @@ async def generate_stream_glm4(params):
"skip_special_tokens": True,
}
sampling_params = SamplingParams(**params_dict)
- async for output in engine.generate(inputs=inputs, sampling_params=sampling_params, request_id=f"{time.time()}"):
+ async for output in engine.generate(prompt=inputs, sampling_params=sampling_params, request_id=f"{time.time()}"):
output_len = len(output.outputs[0].token_ids)
input_len = len(output.prompt_token_ids)
ret = {
diff --git a/basic_demo/openai_api_request.py b/basic_demo/openai_api_request.py
index 92bee35..f1c7fab 100644
--- a/basic_demo/openai_api_request.py
+++ b/basic_demo/openai_api_request.py
@@ -95,12 +95,12 @@ def function_chat(use_stream=False):
def simple_chat(use_stream=False):
messages = [
{
- "role": "system",
+ "role": "user",
"content": "่ฏทๅจไฝ ่พๅบ็ๆถๅ้ฝๅธฆไธโๅตๅตๅตโไธไธชๅญ๏ผๆพๅจๅผๅคดใ",
},
{
"role": "user",
- "content": "ไฝ ๆฏ่ฐ"
+ "content": "ไฝ ๆฏ็ซๅ"
}
]
response = client.chat.completions.create(
@@ -201,7 +201,12 @@ def glm4v_simple_image_chat(use_stream=False, img_path=None):
if __name__ == "__main__":
- simple_chat(use_stream=False)
- # function_chat(use_stream=False)
- # glm4v_simple_image_chat(use_stream=False, img_path="demo.jpg")
+ # Testing the text model
+ simple_chat(use_stream=False)
+
+ # Testing the text model with tools
+ # function_chat(use_stream=False)
+
+ # Testing images of multimodal models
+ # glm4v_simple_image_chat(use_stream=False, img_path="demo.jpg")
diff --git a/basic_demo/requirements.txt b/basic_demo/requirements.txt
index 4ff1483..0480a78 100644
--- a/basic_demo/requirements.txt
+++ b/basic_demo/requirements.txt
@@ -17,7 +17,7 @@ pillow>=10.4.0
sse-starlette>=2.1.3
bitsandbytes>=0.43.3 # INT4 Loading
-# vllm>=0.6.4 # using with VLLM Framework
+# vllm>=0.6.3 # using with VLLM Framework
# flash-attn>=2.6.3 # using with flash-attention 2
# PEFT model, not need if you don't use PEFT finetune model.
# peft>=0.13.0 # Using with finetune model
\ No newline at end of file
diff --git a/basic_demo/trans_cli_vision_demo.py b/basic_demo/trans_cli_vision_demo.py
index 30a78d2..758ccc6 100644
--- a/basic_demo/trans_cli_vision_demo.py
+++ b/basic_demo/trans_cli_vision_demo.py
@@ -17,7 +17,7 @@ from transformers import (
AutoTokenizer,
StoppingCriteria,
StoppingCriteriaList,
- TextIteratorStreamer, AutoModel, BitsAndBytesConfig
+ TextIteratorStreamer, AutoModel
)
from PIL import Image
diff --git a/basic_demo/trans_web_demo.py b/basic_demo/trans_web_demo.py
index 1a470de..2d8e35a 100644
--- a/basic_demo/trans_web_demo.py
+++ b/basic_demo/trans_web_demo.py
@@ -3,6 +3,9 @@ This script creates an interactive web demo for the GLM-4-9B model using Gradio,
a Python library for building quick and easy UI components for machine learning models.
It's designed to showcase the capabilities of the GLM-4-9B model in a user-friendly interface,
allowing users to interact with the model through a chat-like interface.
+
+Note:
+ Using with glm-4-9b-chat-hf will require `transformers>=4.46.0".
"""
import os
@@ -27,7 +30,7 @@ from transformers import (
ModelType = Union[PreTrainedModel, PeftModelForCausalLM]
TokenizerType = Union[PreTrainedTokenizer, PreTrainedTokenizerFast]
-MODEL_PATH = os.environ.get('MODEL_PATH', 'THUDM/glm-4-9b-chat')
+MODEL_PATH = os.environ.get('MODEL_PATH', 'THUDM/glm-4-9b-chat-hf')
TOKENIZER_PATH = os.environ.get("TOKENIZER_PATH", MODEL_PATH)
diff --git a/basic_demo/trans_web_vision_demo.py b/basic_demo/trans_web_vision_demo.py
index 05d1563..91d9875 100644
--- a/basic_demo/trans_web_vision_demo.py
+++ b/basic_demo/trans_web_vision_demo.py
@@ -7,7 +7,7 @@ Usage:
Requirements:
- Gradio package
- - Type `pip install gradio` to install Gradio.
+ - Type `pip install gradio==4.44.1` to install Gradio.
"""
import os
@@ -18,7 +18,7 @@ from transformers import (
AutoTokenizer,
StoppingCriteria,
StoppingCriteriaList,
- TextIteratorStreamer, AutoModel, BitsAndBytesConfig
+ TextIteratorStreamer, AutoModel
)
from PIL import Image
import requests
From 9b39ba6d1bb641595e9b48c0a3341191111e3b6c Mon Sep 17 00:00:00 2001
From: sixgod
Date: Fri, 1 Nov 2024 09:06:04 +0000
Subject: [PATCH 2/8] transformers4.46 and vllm0.6.3
---
README.md | 12 ++++++------
README_en.md | 8 ++++----
basic_demo/requirements.txt | 2 +-
3 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/README.md b/README.md
index 72eb90b..01d7ad6 100644
--- a/README.md
+++ b/README.md
@@ -57,12 +57,12 @@ GLM-4V-9Bใ**GLM-4V-9B** ๅ
ทๅค 1120 * 1120 ้ซๅ่พจ็ไธ็ไธญ่ฑๅ่ฏญๅค
| Model | Type | Seq Length | Transformers | vLLM | Download | Online Demo |
|---------------------|------|------------|--------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b) [๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / |
-| GLM-4-9B-Chat | Chat | 128K | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat) [๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat) | [๐ค ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐ค ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) |
-| GLM-4-9B-Chat-HF | Chat | 128K | >= 4.46 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-hf) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-hf) | [๐ค ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐ค ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) |
-| GLM-4-9B-Chat-1M | Chat | 1M | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m) [๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / |
-| GLM-4-9B-Chat-1M-HF | Chat | 1M | >= 4.46 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m-hf) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m-hf) | / |
-| GLM-4V-9B | Chat | 8K | >= 4.46 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4v-9b) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b) [๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐ค ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) |
+| GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / |
+| GLM-4-9B-Chat | Chat | 128K | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat) | [๐ค ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐ค ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) |
+| GLM-4-9B-Chat-HF | Chat | 128K | >= 4.46 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-hf)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-hf) | [๐ค ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐ค ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) |
+| GLM-4-9B-Chat-1M | Chat | 1M | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / |
+| GLM-4-9B-Chat-1M-HF | Chat | 1M | >= 4.46 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m-hf)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m-hf) | / |
+| GLM-4V-9B | Chat | 8K | >= 4.46 | >= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4v-9b)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐ค ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) |
## ่ฏๆต็ปๆ
diff --git a/README_en.md b/README_en.md
index 554f0cd..eb8be2e 100644
--- a/README_en.md
+++ b/README_en.md
@@ -69,12 +69,12 @@ GPT-4-turbo-2024-04-09, Gemini 1.0 Pro, Qwen-VL-Max, and Claude 3 Opus.
| Model | Type | Seq Length | Transformers | vLLM | Download | Online Demo |
|---------------------|------|------------|--------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b) [๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / |
-| GLM-4-9B-Chat | Chat | 128K | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat) [๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat) | [๐ค ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐ค ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) |
+| GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / |
+| GLM-4-9B-Chat | Chat | 128K | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat) | [๐ค ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐ค ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) |
| GLM-4-9B-Chat-HF | Chat | 128K | >= 4.46 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-hf) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-hf) | [๐ค ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐ค ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) |
-| GLM-4-9B-Chat-1M | Chat | 1M | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m) [๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / |
+| GLM-4-9B-Chat-1M | Chat | 1M | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / |
| GLM-4-9B-Chat-1M-HF | Chat | 1M | >= 4.46 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m-hf) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m-hf) | / |
-| GLM-4V-9B | Chat | 8K | >= 4.46 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4v-9b) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b) [๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐ค ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) |
+| GLM-4V-9B | Chat | 8K | >= 4.46 | >= 0.6.3 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4v-9b)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐ค ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) |
## BenchMark
diff --git a/basic_demo/requirements.txt b/basic_demo/requirements.txt
index 0480a78..cad81ed 100644
--- a/basic_demo/requirements.txt
+++ b/basic_demo/requirements.txt
@@ -10,7 +10,7 @@ tiktoken>=0.7.0
numpy==1.26.4 # Need less than 2.0.0
accelerate>=1.0.1
sentence_transformers>=3.1.1
-gradio>=4.44.1 # web demo
+gradio==4.44.1 # web demo
openai>=1.51.0 # openai demo
einops>=0.8.0
pillow>=10.4.0
From 1d68300ecfa1625ca40440526b4658c2cde2ca1a Mon Sep 17 00:00:00 2001
From: sixgod
Date: Fri, 1 Nov 2024 09:11:59 +0000
Subject: [PATCH 3/8] transformers4.46 and vllm0.6.3
---
README.md | 16 ++++++++--------
README_en.md | 16 ++++++++--------
2 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/README.md b/README.md
index 01d7ad6..26554d8 100644
--- a/README.md
+++ b/README.md
@@ -55,14 +55,14 @@ GLM-4V-9Bใ**GLM-4V-9B** ๅ
ทๅค 1120 * 1120 ้ซๅ่พจ็ไธ็ไธญ่ฑๅ่ฏญๅค
## Model List
-| Model | Type | Seq Length | Transformers | vLLM | Download | Online Demo |
-|---------------------|------|------------|--------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / |
-| GLM-4-9B-Chat | Chat | 128K | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat) | [๐ค ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐ค ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) |
-| GLM-4-9B-Chat-HF | Chat | 128K | >= 4.46 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-hf)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-hf) | [๐ค ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐ค ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) |
-| GLM-4-9B-Chat-1M | Chat | 1M | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / |
-| GLM-4-9B-Chat-1M-HF | Chat | 1M | >= 4.46 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m-hf)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m-hf) | / |
-| GLM-4V-9B | Chat | 8K | >= 4.46 | >= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4v-9b)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐ค ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) |
+| Model | Type | Seq Length | Transformers | vLLM | Download | Online Demo |
+|---------------------|------|------------|--------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / |
+| GLM-4-9B-Chat | Chat | 128K | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat) | [๐ค ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐ค ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) |
+| GLM-4-9B-Chat-HF | Chat | 128K | >= 4.46 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-hf)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-hf) | [๐ค ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐ค ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) |
+| GLM-4-9B-Chat-1M | Chat | 1M | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / |
+| GLM-4-9B-Chat-1M-HF | Chat | 1M | >= 4.46 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m-hf)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m-hf) | / |
+| GLM-4V-9B | Chat | 8K | >= 4.46 | >= 0.6.3 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4v-9b)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐ค ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) |
## ่ฏๆต็ปๆ
diff --git a/README_en.md b/README_en.md
index eb8be2e..0ad31db 100644
--- a/README_en.md
+++ b/README_en.md
@@ -67,14 +67,14 @@ GPT-4-turbo-2024-04-09, Gemini 1.0 Pro, Qwen-VL-Max, and Claude 3 Opus.
## Model List
-| Model | Type | Seq Length | Transformers | vLLM | Download | Online Demo |
-|---------------------|------|------------|--------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / |
-| GLM-4-9B-Chat | Chat | 128K | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat) | [๐ค ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐ค ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) |
-| GLM-4-9B-Chat-HF | Chat | 128K | >= 4.46 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-hf) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-hf) | [๐ค ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐ค ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) |
-| GLM-4-9B-Chat-1M | Chat | 1M | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / |
-| GLM-4-9B-Chat-1M-HF | Chat | 1M | >= 4.46 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m-hf) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m-hf) | / |
-| GLM-4V-9B | Chat | 8K | >= 4.46 | >= 0.6.3 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4v-9b)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐ค ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) |
+| Model | Type | Seq Length | Transformers | vLLM | Download | Online Demo |
+|---------------------|------|------------|--------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / |
+| GLM-4-9B-Chat | Chat | 128K | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat) | [๐ค ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐ค ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) |
+| GLM-4-9B-Chat-HF | Chat | 128K | >= 4.46 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-hf) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-hf) | [๐ค ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐ค ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) |
+| GLM-4-9B-Chat-1M | Chat | 1M | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / |
+| GLM-4-9B-Chat-1M-HF | Chat | 1M | >= 4.46 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m-hf) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m-hf) | / |
+| GLM-4V-9B | Chat | 8K | >= 4.46 | >= 0.6.3 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4v-9b)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐ค ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) |
## BenchMark
From a8069aca733ed7a088359400b85308dbf385ccc2 Mon Sep 17 00:00:00 2001
From: sixgod
Date: Fri, 1 Nov 2024 09:14:20 +0000
Subject: [PATCH 4/8] transformers4.46 and vllm0.6.3
---
README.md | 2 +-
README_en.md | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index 26554d8..924513d 100644
--- a/README.md
+++ b/README.md
@@ -56,7 +56,7 @@ GLM-4V-9Bใ**GLM-4V-9B** ๅ
ทๅค 1120 * 1120 ้ซๅ่พจ็ไธ็ไธญ่ฑๅ่ฏญๅค
## Model List
| Model | Type | Seq Length | Transformers | vLLM | Download | Online Demo |
-|---------------------|------|------------|--------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+|:-------------------:|:----:|:----------:|:------------:|:--------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
| GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / |
| GLM-4-9B-Chat | Chat | 128K | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat) | [๐ค ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐ค ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) |
| GLM-4-9B-Chat-HF | Chat | 128K | >= 4.46 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-hf)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-hf) | [๐ค ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐ค ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) |
diff --git a/README_en.md b/README_en.md
index 0ad31db..689ce32 100644
--- a/README_en.md
+++ b/README_en.md
@@ -68,7 +68,7 @@ GPT-4-turbo-2024-04-09, Gemini 1.0 Pro, Qwen-VL-Max, and Claude 3 Opus.
## Model List
| Model | Type | Seq Length | Transformers | vLLM | Download | Online Demo |
-|---------------------|------|------------|--------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+|:-------------------:|:----:|:----------:|:------------:|:--------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
| GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / |
| GLM-4-9B-Chat | Chat | 128K | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat) | [๐ค ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐ค ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) |
| GLM-4-9B-Chat-HF | Chat | 128K | >= 4.46 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-hf) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-hf) | [๐ค ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐ค ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) |
From 02831d280b314ac8aeafbcbb6c1c12229d2411ec Mon Sep 17 00:00:00 2001
From: sixgod
Date: Fri, 1 Nov 2024 09:20:57 +0000
Subject: [PATCH 5/8] transformers4.46 and vllm0.6.3
---
README.md | 2 ++
README_en.md | 3 +++
2 files changed, 5 insertions(+)
diff --git a/README.md b/README.md
index 924513d..343aa2b 100644
--- a/README.md
+++ b/README.md
@@ -55,6 +55,7 @@ GLM-4V-9Bใ**GLM-4V-9B** ๅ
ทๅค 1120 * 1120 ้ซๅ่พจ็ไธ็ไธญ่ฑๅ่ฏญๅค
## Model List
+
| Model | Type | Seq Length | Transformers | vLLM | Download | Online Demo |
|:-------------------:|:----:|:----------:|:------------:|:--------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
| GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / |
@@ -63,6 +64,7 @@ GLM-4V-9Bใ**GLM-4V-9B** ๅ
ทๅค 1120 * 1120 ้ซๅ่พจ็ไธ็ไธญ่ฑๅ่ฏญๅค
| GLM-4-9B-Chat-1M | Chat | 1M | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / |
| GLM-4-9B-Chat-1M-HF | Chat | 1M | >= 4.46 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m-hf)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m-hf) | / |
| GLM-4V-9B | Chat | 8K | >= 4.46 | >= 0.6.3 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4v-9b)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐ค ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) |
+
## ่ฏๆต็ปๆ
diff --git a/README_en.md b/README_en.md
index 689ce32..0626dcf 100644
--- a/README_en.md
+++ b/README_en.md
@@ -67,6 +67,7 @@ GPT-4-turbo-2024-04-09, Gemini 1.0 Pro, Qwen-VL-Max, and Claude 3 Opus.
## Model List
+
| Model | Type | Seq Length | Transformers | vLLM | Download | Online Demo |
|:-------------------:|:----:|:----------:|:------------:|:--------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
| GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / |
@@ -75,6 +76,8 @@ GPT-4-turbo-2024-04-09, Gemini 1.0 Pro, Qwen-VL-Max, and Claude 3 Opus.
| GLM-4-9B-Chat-1M | Chat | 1M | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / |
| GLM-4-9B-Chat-1M-HF | Chat | 1M | >= 4.46 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m-hf) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m-hf) | / |
| GLM-4V-9B | Chat | 8K | >= 4.46 | >= 0.6.3 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4v-9b)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐ค ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) |
+
+
## BenchMark
From 2ba0aa4b54fafabc6d084f63588b7b62c4ad0279 Mon Sep 17 00:00:00 2001
From: sixgod
Date: Fri, 1 Nov 2024 09:23:05 +0000
Subject: [PATCH 6/8] transformers4.46 and vllm0.6.3
---
README.md | 2 --
README_en.md | 2 --
2 files changed, 4 deletions(-)
diff --git a/README.md b/README.md
index 343aa2b..924513d 100644
--- a/README.md
+++ b/README.md
@@ -55,7 +55,6 @@ GLM-4V-9Bใ**GLM-4V-9B** ๅ
ทๅค 1120 * 1120 ้ซๅ่พจ็ไธ็ไธญ่ฑๅ่ฏญๅค
## Model List
-
| Model | Type | Seq Length | Transformers | vLLM | Download | Online Demo |
|:-------------------:|:----:|:----------:|:------------:|:--------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
| GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / |
@@ -64,7 +63,6 @@ GLM-4V-9Bใ**GLM-4V-9B** ๅ
ทๅค 1120 * 1120 ้ซๅ่พจ็ไธ็ไธญ่ฑๅ่ฏญๅค
| GLM-4-9B-Chat-1M | Chat | 1M | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / |
| GLM-4-9B-Chat-1M-HF | Chat | 1M | >= 4.46 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m-hf)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m-hf) | / |
| GLM-4V-9B | Chat | 8K | >= 4.46 | >= 0.6.3 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4v-9b)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐ค ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) |
-
## ่ฏๆต็ปๆ
diff --git a/README_en.md b/README_en.md
index 0626dcf..10024a0 100644
--- a/README_en.md
+++ b/README_en.md
@@ -67,7 +67,6 @@ GPT-4-turbo-2024-04-09, Gemini 1.0 Pro, Qwen-VL-Max, and Claude 3 Opus.
## Model List
-
| Model | Type | Seq Length | Transformers | vLLM | Download | Online Demo |
|:-------------------:|:----:|:----------:|:------------:|:--------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
| GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / |
@@ -76,7 +75,6 @@ GPT-4-turbo-2024-04-09, Gemini 1.0 Pro, Qwen-VL-Max, and Claude 3 Opus.
| GLM-4-9B-Chat-1M | Chat | 1M | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / |
| GLM-4-9B-Chat-1M-HF | Chat | 1M | >= 4.46 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m-hf) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m-hf) | / |
| GLM-4V-9B | Chat | 8K | >= 4.46 | >= 0.6.3 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4v-9b)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐ค ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) |
-
## BenchMark
From 24c8e1b3171990db2134a07b3cd8f7689adc583c Mon Sep 17 00:00:00 2001
From: sixgod
Date: Fri, 1 Nov 2024 09:24:39 +0000
Subject: [PATCH 7/8] transformers4.46 and vllm0.6.3
---
README.md | 16 ++++++++--------
README_en.md | 18 +++++++++---------
2 files changed, 17 insertions(+), 17 deletions(-)
diff --git a/README.md b/README.md
index 924513d..9af3a60 100644
--- a/README.md
+++ b/README.md
@@ -55,14 +55,14 @@ GLM-4V-9Bใ**GLM-4V-9B** ๅ
ทๅค 1120 * 1120 ้ซๅ่พจ็ไธ็ไธญ่ฑๅ่ฏญๅค
## Model List
-| Model | Type | Seq Length | Transformers | vLLM | Download | Online Demo |
-|:-------------------:|:----:|:----------:|:------------:|:--------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
-| GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / |
-| GLM-4-9B-Chat | Chat | 128K | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat) | [๐ค ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐ค ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) |
-| GLM-4-9B-Chat-HF | Chat | 128K | >= 4.46 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-hf)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-hf) | [๐ค ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐ค ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) |
-| GLM-4-9B-Chat-1M | Chat | 1M | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / |
-| GLM-4-9B-Chat-1M-HF | Chat | 1M | >= 4.46 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m-hf)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m-hf) | / |
-| GLM-4V-9B | Chat | 8K | >= 4.46 | >= 0.6.3 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4v-9b)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐ค ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) |
+| Model | Type | Seq Length | Transformers | vLLM | Download | Online Demo |
+|:-------------------:|:----:|:----------:|:------------:|:--------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
+| GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / |
+| GLM-4-9B-Chat | Chat | 128K | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat) | [๐ค ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐ค ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) |
+| GLM-4-9B-Chat-HF | Chat | 128K | >= 4.46 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-hf)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-hf) | [๐ค ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐ค ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) |
+| GLM-4-9B-Chat-1M | Chat | 1M | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / |
+| GLM-4-9B-Chat-1M-HF | Chat | 1M | >= 4.46 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m-hf)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m-hf) | / |
+| GLM-4V-9B | Chat | 8K | >= 4.46 | >= 0.6.3 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4v-9b)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐ค ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) |
## ่ฏๆต็ปๆ
diff --git a/README_en.md b/README_en.md
index 10024a0..9f4cd42 100644
--- a/README_en.md
+++ b/README_en.md
@@ -67,17 +67,17 @@ GPT-4-turbo-2024-04-09, Gemini 1.0 Pro, Qwen-VL-Max, and Claude 3 Opus.
## Model List
-| Model | Type | Seq Length | Transformers | vLLM | Download | Online Demo |
-|:-------------------:|:----:|:----------:|:------------:|:--------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
-| GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / |
-| GLM-4-9B-Chat | Chat | 128K | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat) | [๐ค ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐ค ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) |
-| GLM-4-9B-Chat-HF | Chat | 128K | >= 4.46 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-hf) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-hf) | [๐ค ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐ค ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) |
-| GLM-4-9B-Chat-1M | Chat | 1M | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / |
-| GLM-4-9B-Chat-1M-HF | Chat | 1M | >= 4.46 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m-hf) [๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m-hf) | / |
-| GLM-4V-9B | Chat | 8K | >= 4.46 | >= 0.6.3 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4v-9b)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐ค ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) |
+| Model | Type | Seq Length | Transformers | vLLM | Download | Online Demo |
+|:-------------------:|:----:|:----------:|:------------:|:--------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
+| GLM-4-9B | Base | 8K | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/glm-4-9b) | / |
+| GLM-4-9B-Chat | Chat | 128K | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat) | [๐ค ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐ค ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) |
+| GLM-4-9B-Chat-HF | Chat | 128K | >= 4.46 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-hf)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-hf) | [๐ค ModelScope CPU](https://modelscope.cn/studios/dash-infer/GLM-4-Chat-DashInfer-Demo/summary)
[๐ค ModelScope vLLM](https://modelscope.cn/studios/ZhipuAI/glm-4-9b-chat-vllm/summary) |
+| GLM-4-9B-Chat-1M | Chat | 1M | <= 4.45 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4-9B-Chat-1M) | / |
+| GLM-4-9B-Chat-1M-HF | Chat | 1M | >= 4.46 | <= 0.6.2 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4-9b-chat-1m-hf)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m-hf) | / |
+| GLM-4V-9B | Chat | 8K | >= 4.46 | >= 0.6.3 | [๐ค Huggingface](https://huggingface.co/THUDM/glm-4v-9b)
[๐ค ModelScope](https://modelscope.cn/models/ZhipuAI/glm-4v-9b)
[๐ฃ WiseModel](https://wisemodel.cn/models/ZhipuAI/GLM-4V-9B) | [๐ค ModelScope](https://modelscope.cn/studios/ZhipuAI/glm-4v-9b-Demo/summary) |
-## BenchMark
+## BenchMarkร
### Typical Tasks
From 471943bfd785b7e9d6390596f08e62ec65c0ace7 Mon Sep 17 00:00:00 2001
From: sixgod
Date: Fri, 1 Nov 2024 10:21:56 +0000
Subject: [PATCH 8/8] support INT4 inference
---
basic_demo/trans_cli_vision_demo.py | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/basic_demo/trans_cli_vision_demo.py b/basic_demo/trans_cli_vision_demo.py
index 758ccc6..adca35d 100644
--- a/basic_demo/trans_cli_vision_demo.py
+++ b/basic_demo/trans_cli_vision_demo.py
@@ -17,7 +17,7 @@ from transformers import (
AutoTokenizer,
StoppingCriteria,
StoppingCriteriaList,
- TextIteratorStreamer, AutoModel
+ TextIteratorStreamer, AutoModel, BitsAndBytesConfig
)
from PIL import Image
@@ -29,6 +29,8 @@ tokenizer = AutoTokenizer.from_pretrained(
trust_remote_code=True,
encode_special_tokens=True
)
+
+## For BF16 inference
model = AutoModel.from_pretrained(
MODEL_PATH,
trust_remote_code=True,
@@ -37,7 +39,6 @@ model = AutoModel.from_pretrained(
device_map="auto",
).eval()
-
## For INT4 inference
# model = AutoModel.from_pretrained(
# MODEL_PATH,