fix issue #74
This commit is contained in:
parent
8102212b9f
commit
ce2667cf5d
|
@ -19,7 +19,7 @@ GLM-4V-9B。**GLM-4V-9B** 具备 1120 * 1120 高分辨率下的中英双语多
|
||||||
表现出超越 GPT-4-turbo-2024-04-09、Gemini
|
表现出超越 GPT-4-turbo-2024-04-09、Gemini
|
||||||
1.0 Pro、Qwen-VL-Max 和 Claude 3 Opus 的卓越性能。
|
1.0 Pro、Qwen-VL-Max 和 Claude 3 Opus 的卓越性能。
|
||||||
|
|
||||||
## 模型列表
|
## Model List
|
||||||
|
|
||||||
| Model | Type | Seq Length | Download | Online Demo |
|
| Model | Type | Seq Length | Download | Online Demo |
|
||||||
|------------------|------|------------|-----------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
|------------------|------|------------|-----------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||||
|
|
|
@ -93,6 +93,7 @@ on [Berkeley Function Calling Leaderboard](https://github.com/ShishirPatil/goril
|
||||||
| ChatGLM3-6B | 57.88 | 62.18 | 69.78 | 5.42 |
|
| ChatGLM3-6B | 57.88 | 62.18 | 69.78 | 5.42 |
|
||||||
| GLM-4-9B-Chat | 81.00 | 80.26 | 84.40 | 87.92 |
|
| GLM-4-9B-Chat | 81.00 | 80.26 | 84.40 | 87.92 |
|
||||||
|
|
||||||
|
|
||||||
### Multi-Modal
|
### Multi-Modal
|
||||||
|
|
||||||
GLM-4V-9B is a multimodal language model with visual understanding capabilities. The evaluation results of its related
|
GLM-4V-9B is a multimodal language model with visual understanding capabilities. The evaluation results of its related
|
||||||
|
@ -114,7 +115,7 @@ classic tasks are as follows:
|
||||||
|
|
||||||
## Quick call
|
## Quick call
|
||||||
|
|
||||||
**硬件配置和系统要求,请查看[这里](basic_demo/README_en.md)。**
|
**For hardware configuration and system requirements, please check [here](basic_demo/README_en.md). **
|
||||||
|
|
||||||
### Use the following method to quickly call the GLM-4-9B-Chat language model
|
### Use the following method to quickly call the GLM-4-9B-Chat language model
|
||||||
|
|
||||||
|
|
|
@ -21,4 +21,7 @@ einops>=0.7.0
|
||||||
sse-starlette>=2.1.0
|
sse-starlette>=2.1.0
|
||||||
|
|
||||||
# INT4
|
# INT4
|
||||||
bitsandbytes>=0.43.1
|
bitsandbytes>=0.43.1
|
||||||
|
|
||||||
|
# PEFT model, not need if you don't use PEFT finetune model.
|
||||||
|
# peft>=0.11.0
|
|
@ -13,45 +13,38 @@ ensuring that the CLI interface displays formatted text correctly.
|
||||||
import os
|
import os
|
||||||
import torch
|
import torch
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
from typing import Union
|
from transformers import AutoTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer, AutoModel
|
||||||
from pathlib import Path
|
|
||||||
from peft import AutoPeftModelForCausalLM, PeftModelForCausalLM
|
|
||||||
from transformers import (
|
|
||||||
AutoModelForCausalLM,
|
|
||||||
AutoTokenizer,
|
|
||||||
PreTrainedModel,
|
|
||||||
PreTrainedTokenizer,
|
|
||||||
PreTrainedTokenizerFast,
|
|
||||||
StoppingCriteria,
|
|
||||||
StoppingCriteriaList,
|
|
||||||
TextIteratorStreamer
|
|
||||||
)
|
|
||||||
|
|
||||||
ModelType = Union[PreTrainedModel, PeftModelForCausalLM]
|
|
||||||
TokenizerType = Union[PreTrainedTokenizer, PreTrainedTokenizerFast]
|
|
||||||
|
|
||||||
MODEL_PATH = os.environ.get('MODEL_PATH', 'THUDM/glm-4-9b-chat')
|
MODEL_PATH = os.environ.get('MODEL_PATH', 'THUDM/glm-4-9b-chat')
|
||||||
|
|
||||||
|
## If use peft model.
|
||||||
def load_model_and_tokenizer(
|
# def load_model_and_tokenizer(model_dir, trust_remote_code: bool = True):
|
||||||
model_dir: Union[str, Path], trust_remote_code: bool = True
|
# if (model_dir / 'adapter_config.json').exists():
|
||||||
) -> tuple[ModelType, TokenizerType]:
|
# model = AutoModel.from_pretrained(
|
||||||
model_dir = Path(model_dir).expanduser().resolve()
|
# model_dir, trust_remote_code=trust_remote_code, device_map='auto'
|
||||||
if (model_dir / 'adapter_config.json').exists():
|
# )
|
||||||
model = AutoPeftModelForCausalLM.from_pretrained(
|
# tokenizer_dir = model.peft_config['default'].base_model_name_or_path
|
||||||
model_dir, trust_remote_code=trust_remote_code, device_map='auto')
|
# else:
|
||||||
tokenizer_dir = model.peft_config['default'].base_model_name_or_path
|
# model = AutoModel.from_pretrained(
|
||||||
else:
|
# model_dir, trust_remote_code=trust_remote_code, device_map='auto'
|
||||||
model = AutoModelForCausalLM.from_pretrained(model_dir, trust_remote_code=trust_remote_code, device_map='auto')
|
# )
|
||||||
tokenizer_dir = model_dir
|
# tokenizer_dir = model_dir
|
||||||
|
# tokenizer = AutoTokenizer.from_pretrained(
|
||||||
tokenizer = AutoTokenizer.from_pretrained(
|
# tokenizer_dir, trust_remote_code=trust_remote_code, use_fast=False
|
||||||
tokenizer_dir, trust_remote_code=trust_remote_code, encode_special_tokens=True, use_fast=False
|
# )
|
||||||
)
|
# return model, tokenizer
|
||||||
return model, tokenizer
|
|
||||||
|
|
||||||
|
|
||||||
model, tokenizer = load_model_and_tokenizer(MODEL_PATH, trust_remote_code=True)
|
tokenizer = AutoTokenizer.from_pretrained(
|
||||||
|
MODEL_PATH,
|
||||||
|
trust_remote_code=True,
|
||||||
|
encode_special_tokens=True
|
||||||
|
)
|
||||||
|
model = AutoModel.from_pretrained(
|
||||||
|
MODEL_PATH,
|
||||||
|
trust_remote_code=True,
|
||||||
|
device_map="auto",
|
||||||
|
torch_dtype=torch.bfloat16).eval()
|
||||||
|
|
||||||
|
|
||||||
class StopOnTokens(StoppingCriteria):
|
class StopOnTokens(StoppingCriteria):
|
||||||
|
|
|
@ -17,10 +17,17 @@ def stress_test(token_len, n, num_gpu):
|
||||||
model = AutoModelForCausalLM.from_pretrained(
|
model = AutoModelForCausalLM.from_pretrained(
|
||||||
MODEL_PATH,
|
MODEL_PATH,
|
||||||
trust_remote_code=True,
|
trust_remote_code=True,
|
||||||
# quantization_config=BitsAndBytesConfig(load_in_4bit=True),
|
|
||||||
# low_cpu_mem_usage=True,
|
|
||||||
torch_dtype=torch.bfloat16
|
torch_dtype=torch.bfloat16
|
||||||
).to(device).eval()
|
).to(device).eval()
|
||||||
|
|
||||||
|
# Use INT4 weight infer
|
||||||
|
# model = AutoModelForCausalLM.from_pretrained(
|
||||||
|
# MODEL_PATH,
|
||||||
|
# trust_remote_code=True,
|
||||||
|
# quantization_config=BitsAndBytesConfig(load_in_4bit=True),
|
||||||
|
# low_cpu_mem_usage=True,
|
||||||
|
# ).eval()
|
||||||
|
|
||||||
times = []
|
times = []
|
||||||
decode_times = []
|
decode_times = []
|
||||||
|
|
||||||
|
|
|
@ -18,6 +18,7 @@ def extract_docx(path):
|
||||||
for paragraph in doc.paragraphs:
|
for paragraph in doc.paragraphs:
|
||||||
data.append(paragraph.text)
|
data.append(paragraph.text)
|
||||||
content = '\n\n'.join(data)
|
content = '\n\n'.join(data)
|
||||||
|
return content
|
||||||
|
|
||||||
def extract_pptx(path):
|
def extract_pptx(path):
|
||||||
prs = Presentation(path)
|
prs = Presentation(path)
|
||||||
|
|
Loading…
Reference in New Issue