fix issue #74
This commit is contained in:
parent
8102212b9f
commit
ce2667cf5d
|
@ -19,7 +19,7 @@ GLM-4V-9B。**GLM-4V-9B** 具备 1120 * 1120 高分辨率下的中英双语多
|
|||
表现出超越 GPT-4-turbo-2024-04-09、Gemini
|
||||
1.0 Pro、Qwen-VL-Max 和 Claude 3 Opus 的卓越性能。
|
||||
|
||||
## 模型列表
|
||||
## Model List
|
||||
|
||||
| Model | Type | Seq Length | Download | Online Demo |
|
||||
|------------------|------|------------|-----------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
|
|
|
@ -93,6 +93,7 @@ on [Berkeley Function Calling Leaderboard](https://github.com/ShishirPatil/goril
|
|||
| ChatGLM3-6B | 57.88 | 62.18 | 69.78 | 5.42 |
|
||||
| GLM-4-9B-Chat | 81.00 | 80.26 | 84.40 | 87.92 |
|
||||
|
||||
|
||||
### Multi-Modal
|
||||
|
||||
GLM-4V-9B is a multimodal language model with visual understanding capabilities. The evaluation results of its related
|
||||
|
@ -114,7 +115,7 @@ classic tasks are as follows:
|
|||
|
||||
## Quick call
|
||||
|
||||
**硬件配置和系统要求,请查看[这里](basic_demo/README_en.md)。**
|
||||
**For hardware configuration and system requirements, please check [here](basic_demo/README_en.md). **
|
||||
|
||||
### Use the following method to quickly call the GLM-4-9B-Chat language model
|
||||
|
||||
|
|
|
@ -21,4 +21,7 @@ einops>=0.7.0
|
|||
sse-starlette>=2.1.0
|
||||
|
||||
# INT4
|
||||
bitsandbytes>=0.43.1
|
||||
bitsandbytes>=0.43.1
|
||||
|
||||
# PEFT model, not need if you don't use PEFT finetune model.
|
||||
# peft>=0.11.0
|
|
@ -13,45 +13,38 @@ ensuring that the CLI interface displays formatted text correctly.
|
|||
import os
|
||||
import torch
|
||||
from threading import Thread
|
||||
from typing import Union
|
||||
from pathlib import Path
|
||||
from peft import AutoPeftModelForCausalLM, PeftModelForCausalLM
|
||||
from transformers import (
|
||||
AutoModelForCausalLM,
|
||||
AutoTokenizer,
|
||||
PreTrainedModel,
|
||||
PreTrainedTokenizer,
|
||||
PreTrainedTokenizerFast,
|
||||
StoppingCriteria,
|
||||
StoppingCriteriaList,
|
||||
TextIteratorStreamer
|
||||
)
|
||||
|
||||
ModelType = Union[PreTrainedModel, PeftModelForCausalLM]
|
||||
TokenizerType = Union[PreTrainedTokenizer, PreTrainedTokenizerFast]
|
||||
from transformers import AutoTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer, AutoModel
|
||||
|
||||
MODEL_PATH = os.environ.get('MODEL_PATH', 'THUDM/glm-4-9b-chat')
|
||||
|
||||
|
||||
def load_model_and_tokenizer(
|
||||
model_dir: Union[str, Path], trust_remote_code: bool = True
|
||||
) -> tuple[ModelType, TokenizerType]:
|
||||
model_dir = Path(model_dir).expanduser().resolve()
|
||||
if (model_dir / 'adapter_config.json').exists():
|
||||
model = AutoPeftModelForCausalLM.from_pretrained(
|
||||
model_dir, trust_remote_code=trust_remote_code, device_map='auto')
|
||||
tokenizer_dir = model.peft_config['default'].base_model_name_or_path
|
||||
else:
|
||||
model = AutoModelForCausalLM.from_pretrained(model_dir, trust_remote_code=trust_remote_code, device_map='auto')
|
||||
tokenizer_dir = model_dir
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(
|
||||
tokenizer_dir, trust_remote_code=trust_remote_code, encode_special_tokens=True, use_fast=False
|
||||
)
|
||||
return model, tokenizer
|
||||
## If use peft model.
|
||||
# def load_model_and_tokenizer(model_dir, trust_remote_code: bool = True):
|
||||
# if (model_dir / 'adapter_config.json').exists():
|
||||
# model = AutoModel.from_pretrained(
|
||||
# model_dir, trust_remote_code=trust_remote_code, device_map='auto'
|
||||
# )
|
||||
# tokenizer_dir = model.peft_config['default'].base_model_name_or_path
|
||||
# else:
|
||||
# model = AutoModel.from_pretrained(
|
||||
# model_dir, trust_remote_code=trust_remote_code, device_map='auto'
|
||||
# )
|
||||
# tokenizer_dir = model_dir
|
||||
# tokenizer = AutoTokenizer.from_pretrained(
|
||||
# tokenizer_dir, trust_remote_code=trust_remote_code, use_fast=False
|
||||
# )
|
||||
# return model, tokenizer
|
||||
|
||||
|
||||
model, tokenizer = load_model_and_tokenizer(MODEL_PATH, trust_remote_code=True)
|
||||
tokenizer = AutoTokenizer.from_pretrained(
|
||||
MODEL_PATH,
|
||||
trust_remote_code=True,
|
||||
encode_special_tokens=True
|
||||
)
|
||||
model = AutoModel.from_pretrained(
|
||||
MODEL_PATH,
|
||||
trust_remote_code=True,
|
||||
device_map="auto",
|
||||
torch_dtype=torch.bfloat16).eval()
|
||||
|
||||
|
||||
class StopOnTokens(StoppingCriteria):
|
||||
|
|
|
@ -17,10 +17,17 @@ def stress_test(token_len, n, num_gpu):
|
|||
model = AutoModelForCausalLM.from_pretrained(
|
||||
MODEL_PATH,
|
||||
trust_remote_code=True,
|
||||
# quantization_config=BitsAndBytesConfig(load_in_4bit=True),
|
||||
# low_cpu_mem_usage=True,
|
||||
torch_dtype=torch.bfloat16
|
||||
).to(device).eval()
|
||||
|
||||
# Use INT4 weight infer
|
||||
# model = AutoModelForCausalLM.from_pretrained(
|
||||
# MODEL_PATH,
|
||||
# trust_remote_code=True,
|
||||
# quantization_config=BitsAndBytesConfig(load_in_4bit=True),
|
||||
# low_cpu_mem_usage=True,
|
||||
# ).eval()
|
||||
|
||||
times = []
|
||||
decode_times = []
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@ def extract_docx(path):
|
|||
for paragraph in doc.paragraphs:
|
||||
data.append(paragraph.text)
|
||||
content = '\n\n'.join(data)
|
||||
return content
|
||||
|
||||
def extract_pptx(path):
|
||||
prs = Presentation(path)
|
||||
|
|
Loading…
Reference in New Issue