This commit is contained in:
zR 2024-06-06 16:18:14 +08:00
parent 8102212b9f
commit ce2667cf5d
6 changed files with 44 additions and 39 deletions

View File

@ -19,7 +19,7 @@ GLM-4V-9B。**GLM-4V-9B** 具备 1120 * 1120 高分辨率下的中英双语多
表现出超越 GPT-4-turbo-2024-04-09、Gemini
1.0 Pro、Qwen-VL-Max 和 Claude 3 Opus 的卓越性能。
## 模型列表
## Model List
| Model | Type | Seq Length | Download | Online Demo |
|------------------|------|------------|-----------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|

View File

@ -93,6 +93,7 @@ on [Berkeley Function Calling Leaderboard](https://github.com/ShishirPatil/goril
| ChatGLM3-6B | 57.88 | 62.18 | 69.78 | 5.42 |
| GLM-4-9B-Chat | 81.00 | 80.26 | 84.40 | 87.92 |
### Multi-Modal
GLM-4V-9B is a multimodal language model with visual understanding capabilities. The evaluation results of its related
@ -114,7 +115,7 @@ classic tasks are as follows:
## Quick call
**硬件配置和系统要求,请查看[这里](basic_demo/README_en.md)。**
**For hardware configuration and system requirements, please check [here](basic_demo/README_en.md). **
### Use the following method to quickly call the GLM-4-9B-Chat language model

View File

@ -21,4 +21,7 @@ einops>=0.7.0
sse-starlette>=2.1.0
# INT4
bitsandbytes>=0.43.1
bitsandbytes>=0.43.1
# PEFT model, not need if you don't use PEFT finetune model.
# peft>=0.11.0

View File

@ -13,45 +13,38 @@ ensuring that the CLI interface displays formatted text correctly.
import os
import torch
from threading import Thread
from typing import Union
from pathlib import Path
from peft import AutoPeftModelForCausalLM, PeftModelForCausalLM
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
PreTrainedModel,
PreTrainedTokenizer,
PreTrainedTokenizerFast,
StoppingCriteria,
StoppingCriteriaList,
TextIteratorStreamer
)
ModelType = Union[PreTrainedModel, PeftModelForCausalLM]
TokenizerType = Union[PreTrainedTokenizer, PreTrainedTokenizerFast]
from transformers import AutoTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer, AutoModel
MODEL_PATH = os.environ.get('MODEL_PATH', 'THUDM/glm-4-9b-chat')
def load_model_and_tokenizer(
model_dir: Union[str, Path], trust_remote_code: bool = True
) -> tuple[ModelType, TokenizerType]:
model_dir = Path(model_dir).expanduser().resolve()
if (model_dir / 'adapter_config.json').exists():
model = AutoPeftModelForCausalLM.from_pretrained(
model_dir, trust_remote_code=trust_remote_code, device_map='auto')
tokenizer_dir = model.peft_config['default'].base_model_name_or_path
else:
model = AutoModelForCausalLM.from_pretrained(model_dir, trust_remote_code=trust_remote_code, device_map='auto')
tokenizer_dir = model_dir
tokenizer = AutoTokenizer.from_pretrained(
tokenizer_dir, trust_remote_code=trust_remote_code, encode_special_tokens=True, use_fast=False
)
return model, tokenizer
## If use peft model.
# def load_model_and_tokenizer(model_dir, trust_remote_code: bool = True):
# if (model_dir / 'adapter_config.json').exists():
# model = AutoModel.from_pretrained(
# model_dir, trust_remote_code=trust_remote_code, device_map='auto'
# )
# tokenizer_dir = model.peft_config['default'].base_model_name_or_path
# else:
# model = AutoModel.from_pretrained(
# model_dir, trust_remote_code=trust_remote_code, device_map='auto'
# )
# tokenizer_dir = model_dir
# tokenizer = AutoTokenizer.from_pretrained(
# tokenizer_dir, trust_remote_code=trust_remote_code, use_fast=False
# )
# return model, tokenizer
model, tokenizer = load_model_and_tokenizer(MODEL_PATH, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(
MODEL_PATH,
trust_remote_code=True,
encode_special_tokens=True
)
model = AutoModel.from_pretrained(
MODEL_PATH,
trust_remote_code=True,
device_map="auto",
torch_dtype=torch.bfloat16).eval()
class StopOnTokens(StoppingCriteria):

View File

@ -17,10 +17,17 @@ def stress_test(token_len, n, num_gpu):
model = AutoModelForCausalLM.from_pretrained(
MODEL_PATH,
trust_remote_code=True,
# quantization_config=BitsAndBytesConfig(load_in_4bit=True),
# low_cpu_mem_usage=True,
torch_dtype=torch.bfloat16
).to(device).eval()
# Use INT4 weight infer
# model = AutoModelForCausalLM.from_pretrained(
# MODEL_PATH,
# trust_remote_code=True,
# quantization_config=BitsAndBytesConfig(load_in_4bit=True),
# low_cpu_mem_usage=True,
# ).eval()
times = []
decode_times = []

View File

@ -18,6 +18,7 @@ def extract_docx(path):
for paragraph in doc.paragraphs:
data.append(paragraph.text)
content = '\n\n'.join(data)
return content
def extract_pptx(path):
prs = Presentation(path)