first commit

This commit is contained in:
xxl 2024-12-30 18:21:26 +08:00
parent e16850f08a
commit 86c97b6759
16 changed files with 2478 additions and 2 deletions

View File

@ -1,3 +1,29 @@
# CharacterGLM-6B_a14114209229434880147986
---
frameworks:
- Pytorch
license: other
tasks:
- text-generation
---
###### 模型文件和权重,请浏览“模型文件”页面获取。
###### 当前模型的贡献者未提供更加详细的模型介绍但是您可以通过如下git clone命令或者ModelScope SDK来下载模型。
###### Clone with HTTP
```
git clone https://www.modelscope.cn/THUCoAI/CharacterGLM-6B.git
```
###### 如果您是本模型的贡献者,我们邀请您根据[模型贡献文档说明](https://www.modelscope.cn/docs/%E5%A6%82%E4%BD%95%E6%92%B0%E5%86%99%E5%A5%BD%E7%94%A8%E7%9A%84%E6%A8%A1%E5%9E%8B%E5%8D%A1%E7%89%87),及时完善模型卡片内容。
###### [huggingface链接](https://huggingface.co/thu-coai/CharacterGLM-6B)
CharacterGLM-6B
代码示例
```python
>>> from transformers import AutoTokenizer, AutoModel
>>> # 假设您通过git将模型下载到了./CharacterGLM-6B目录
>>> tokenizer = AutoTokenizer.from_pretrained("./CharacterGLM-6B", trust_remote_code=True)
>>> model = AutoModel.from_pretrained("./CharacterGLM-6b", trust_remote_code=True, device='cuda')
>>> model = model.eval()
>>> session_meta = {'user_info': '我是陆星辰,是一个男性,是一位知名导演,也是苏梦远的合作导演。我擅长拍摄音乐题材的电影。苏梦远对我的态度是尊敬的,并视我为良师益友。', 'bot_info': '苏梦远,本名苏远心,是一位当红的国内女歌手及演员。在参加选秀节目后,凭借独特的嗓音及出众的舞台魅力迅速成名,进入娱乐圈。她外表美丽动人,但真正的魅力在于她的才华和勤奋。苏梦远是音乐学院毕业的优秀生,善于创作,拥有多首热门原创歌曲。除了音乐方面的成就,她还热衷于慈善事业,积极参加公益活动,用实际行动传递正能量。在工作中,她对待工作非常敬业,拍戏时总是全身心投入角色,赢得了业内人士的赞誉和粉丝的喜爱。虽然在娱乐圈,但她始终保持低调、谦逊的态度,深得同行尊重。在表达时,苏梦远喜欢使用“我们”和“一起”,强调团队精神。', 'bot_name': '苏梦远', 'user_name': '陆星辰'}
>>> response, history = model.chat(tokenizer, session_meta, "你好", history=[])
>>>>>>> add model
```

View File

@ -0,0 +1,121 @@
import torch
from typing import TypedDict, Literal, List, Optional, Tuple, Iterator
#### data types #########
# 下面的数据类型定义与CharacterGLM API一致但与modeling_chatglm.py的chat方法不一致
# 参考 https://open.bigmodel.cn/dev/api#characterglm
RoleType = Literal["user", "assistant"]
class Msg(TypedDict):
role: RoleType
content: str
class SessionMeta(TypedDict):
user_name: str
bot_name: str
bot_info: str
user_info: Optional[str]
HistoryType = List[Msg]
class CharacterGLMGenerationUtils:
@staticmethod
def convert_chatglm_history_to_characterglm_history(user_query: str, history: List[Tuple[str, str]]) -> HistoryType:
characterglm_history: HistoryType = []
for i, (query, response) in enumerate(history):
if i == 0 and query == '':
# first empty query is an placeholder
pass
else:
characterglm_history.append({
"role": "user",
"content": query
})
characterglm_history.append({
"role": "assistant",
"content": response
})
characterglm_history.append({
"role": "user",
"content": user_query
})
return characterglm_history
@staticmethod
def build_inputs(session_meta: SessionMeta, history: HistoryType) -> str:
"""
注意这里假设history最后一条消息是用户query
"""
texts = []
texts.append(
f"以下是一段{session_meta['bot_name']}{session_meta['user_name']}之间的对话。")
if session_meta.get("bot_info"):
texts.append(f"关于{session_meta['bot_name']}的信息:{session_meta['bot_info']}")
if session_meta.get("user_info"):
texts.append(
f"关于{session_meta['user_name']}的信息:{session_meta['user_info']}")
assert history and history[-1]['role'] == 'user'
for msg in history:
name = session_meta['user_name'] if msg['role'] == 'user' else session_meta['bot_name']
texts.append(f"[{name}]" + msg['content'].strip())
texts = [text.replace('\n', ' ') for text in texts]
texts.append(f"[{session_meta['bot_name']}]")
return '\n'.join(texts)
class CharacterGLMAPI:
@staticmethod
def build_api_arguments(session_meta: SessionMeta, history: HistoryType) -> dict:
return {
"model": "characterglm",
"meta": session_meta,
"prompt": history
}
@classmethod
def async_invoke(cls, session_meta: SessionMeta, history: HistoryType):
"""
注意
1. 先设置zhipuai.api_key
2. 建议传入`return_type='text'`否则返回结果是json字符串
参考
https://open.bigmodel.cn/dev/api#characterglm
"""
import zhipuai
kwargs = cls.build_api_arguments(session_meta, history)
return zhipuai.model_api.async_invoke(**kwargs, return_type='text')
@classmethod
def invoke(cls, session_meta: SessionMeta, history: HistoryType):
"""
注意
1. 先设置zhipuai.api_key
2. 建议传入`return_type='text'`否则返回结果是json字符串
3. 需要再次调用`zhipuai.model_api.query_async_invoke_result`才能获取生成结果
参考
https://open.bigmodel.cn/dev/api#characterglm
"""
import zhipuai
kwargs = cls.build_api_arguments(session_meta, history)
return zhipuai.model_api.invoke(**kwargs, return_type='text')
@classmethod
def generate(cls, session_meta: SessionMeta, history: HistoryType) -> str:
result = cls.invoke(session_meta, history)
if not result['success']:
raise RuntimeError(result)
return result['data']['choices'][0]['content']
@classmethod
def stream_generate(cls, session_meta: SessionMeta, history: HistoryType) -> Iterator[str]:
# 伪流式生成
return iter(cls.generate(session_meta, history))

42
config.json Normal file
View File

@ -0,0 +1,42 @@
{
"_name_or_path": "thu-coai/CharacterGLM-6B",
"model_type": "chatglm",
"architectures": [
"ChatGLMModel"
],
"auto_map": {
"AutoConfig": "configuration_chatglm.ChatGLMConfig",
"AutoModel": "modeling_characterglm.CharacterGLMForConditionalGeneration",
"AutoModelForCausalLM": "modeling_characterglm.CharacterGLMForConditionalGeneration",
"AutoModelForSeq2SeqLM": "modeling_characterglm.CharacterGLMForConditionalGeneration",
"AutoModelForSequenceClassification": "modeling_chatglm.ChatGLMForSequenceClassification"
},
"add_bias_linear": false,
"add_qkv_bias": true,
"apply_query_key_layer_scaling": true,
"apply_residual_connection_post_layernorm": false,
"attention_dropout": 0.0,
"attention_softmax_in_fp32": true,
"bias_dropout_fusion": true,
"ffn_hidden_size": 13696,
"fp32_residual_connection": false,
"hidden_dropout": 0.0,
"hidden_size": 4096,
"kv_channels": 128,
"layernorm_epsilon": 1e-05,
"multi_query_attention": true,
"multi_query_group_num": 2,
"num_attention_heads": 32,
"num_layers": 28,
"original_rope": true,
"padded_vocab_size": 65024,
"post_layer_norm": true,
"rmsnorm": true,
"seq_length": 32768,
"use_cache": true,
"torch_dtype": "float16",
"transformers_version": "4.27.1",
"tie_word_embeddings": false,
"eos_token_id": 2,
"pad_token_id": 0
}

1
configuration.json Normal file
View File

@ -0,0 +1 @@
{"framework":"Pytorch","task":"text-generation"}

61
configuration_chatglm.py Normal file
View File

@ -0,0 +1,61 @@
from transformers import PretrainedConfig
class ChatGLMConfig(PretrainedConfig):
model_type = "chatglm"
def __init__(
self,
num_layers=28,
padded_vocab_size=65024,
hidden_size=4096,
ffn_hidden_size=13696,
kv_channels=128,
num_attention_heads=32,
seq_length=2048,
hidden_dropout=0.0,
classifier_dropout=None,
attention_dropout=0.0,
layernorm_epsilon=1e-5,
rmsnorm=True,
apply_residual_connection_post_layernorm=False,
post_layer_norm=True,
add_bias_linear=False,
add_qkv_bias=False,
bias_dropout_fusion=True,
multi_query_attention=False,
multi_query_group_num=1,
apply_query_key_layer_scaling=True,
attention_softmax_in_fp32=True,
fp32_residual_connection=False,
quantization_bit=0,
pre_seq_len=None,
prefix_projection=False,
**kwargs
):
self.num_layers = num_layers
self.vocab_size = padded_vocab_size
self.padded_vocab_size = padded_vocab_size
self.hidden_size = hidden_size
self.ffn_hidden_size = ffn_hidden_size
self.kv_channels = kv_channels
self.num_attention_heads = num_attention_heads
self.seq_length = seq_length
self.hidden_dropout = hidden_dropout
self.classifier_dropout = classifier_dropout
self.attention_dropout = attention_dropout
self.layernorm_epsilon = layernorm_epsilon
self.rmsnorm = rmsnorm
self.apply_residual_connection_post_layernorm = apply_residual_connection_post_layernorm
self.post_layer_norm = post_layer_norm
self.add_bias_linear = add_bias_linear
self.add_qkv_bias = add_qkv_bias
self.bias_dropout_fusion = bias_dropout_fusion
self.multi_query_attention = multi_query_attention
self.multi_query_group_num = multi_query_group_num
self.apply_query_key_layer_scaling = apply_query_key_layer_scaling
self.attention_softmax_in_fp32 = attention_softmax_in_fp32
self.fp32_residual_connection = fp32_residual_connection
self.quantization_bit = quantization_bit
self.pre_seq_len = pre_seq_len
self.prefix_projection = prefix_projection
super().__init__(**kwargs)

6
generation_config.json Normal file
View File

@ -0,0 +1,6 @@
{
"_from_model_config": true,
"eos_token_id": 2,
"pad_token_id": 0,
"transformers_version": "4.31.0"
}

218
modeling_characterglm.py Normal file
View File

@ -0,0 +1,218 @@
import copy
import warnings
import logging
from typing import List, Tuple, Optional, Callable
import torch
from torch import nn
from transformers.utils import logging
from transformers.generation.utils import LogitsProcessorList, StoppingCriteriaList, GenerationConfig
from .modeling_chatglm import ChatGLMForConditionalGeneration, InvalidScoreLogitsProcessor
from .characterglm_generation_utils import CharacterGLMGenerationUtils, SessionMeta
logger = logging.get_logger(__name__)
default_generation_config = {
"do_sample": True,
"top_k": 100,
"top_p": 0.9,
"no_repeat_ngram_size": 0,
"temperature": 0.9,
"num_beams": 1,
"length_penalty": 1.6,
"repetition_penalty": 1.3,
"eos_token_id": 13
}
class CharacterGLMForConditionalGeneration(ChatGLMForConditionalGeneration):
"""
CharacterGLM的prompt格式与chatglm有差异
CharacterGLMForConditionalGeneration复用了ChatGLMForConditionalGeneration的forward方法
重新实现了`build_inputs``build_stream_inputs`,
调整了`chat``stream_chat`方法的函数签名增加session_meta参数并修改解码参数的默认值
"""
def build_inputs(self, tokenizer, session_meta: SessionMeta, query: str, history: Optional[List[Tuple[str, str]]] = None):
character_glm_history = CharacterGLMGenerationUtils.convert_chatglm_history_to_characterglm_history(query, history or [])
prompt = CharacterGLMGenerationUtils.build_inputs(session_meta, character_glm_history)
inputs = tokenizer([prompt], return_tensors="pt")
inputs = inputs.to(self.device)
return inputs
def build_stream_inputs(self, tokenizer, session_meta: SessionMeta, query: str, history: Optional[List[Tuple[str, str]]] = None):
prompt = "\n[{}]{}\n[{}]".format(
session_meta['user_name'],
query.replace('\n', ' '),
session_meta['bot_name']
)
input_ids = tokenizer.encode(prompt, add_special_tokens=False)
input_ids = input_ids[1:]
inputs = tokenizer.batch_encode_plus([(input_ids, None)], return_tensors="pt", add_special_tokens=False)
inputs = inputs.to(self.device)
return inputs
@torch.inference_mode()
def chat(self, tokenizer, session_meta: SessionMeta, query: str, history: List[Tuple[str, str]] = None, max_length: int = 8192, num_beams=1,
do_sample=True, top_p=0.9, temperature=0.9, repetition_penalty=1.6, logits_processor=None, **kwargs):
if history is None:
history = []
if logits_processor is None:
logits_processor = LogitsProcessorList()
logits_processor.append(InvalidScoreLogitsProcessor())
gen_kwargs = {"max_length": max_length, "num_beams": num_beams, "do_sample": do_sample, "top_p": top_p,
"temperature": temperature, "logits_processor": logits_processor, "repetition_penalty": repetition_penalty, **kwargs}
gen_kwargs.update({k: v for k, v in default_generation_config.items() if k not in gen_kwargs})
inputs = self.build_inputs(tokenizer, session_meta, query, history=history)
outputs = self.generate(**inputs, **gen_kwargs)
outputs = outputs.tolist()[0][len(inputs["input_ids"][0]):]
response = tokenizer.decode(outputs)
response = self.process_response(response)
history = history + [(query, response)]
return response, history
@torch.inference_mode()
def stream_chat(self, tokenizer, session_meta: SessionMeta, query: str, history: List[Tuple[str, str]] = None, past_key_values=None,
max_length: int = 8192, do_sample=True, top_p=0.9, temperature=0.9, repetition_penalty=1.0, logits_processor=None,
return_past_key_values=False, **kwargs):
if history is None:
history = []
if logits_processor is None:
logits_processor = LogitsProcessorList()
logits_processor.append(InvalidScoreLogitsProcessor())
gen_kwargs = {"max_length": max_length, "do_sample": do_sample, "top_p": top_p,
"temperature": temperature, "logits_processor": logits_processor, "repetition_penalty": repetition_penalty, **kwargs}
gen_kwargs.update({k: v for k, v in default_generation_config.items() if k not in gen_kwargs})
gen_kwargs.pop('repetition_penalty', None)
if past_key_values is None:
inputs = self.build_inputs(tokenizer, session_meta, query, history=history)
else:
inputs = self.build_stream_inputs(tokenizer, session_meta, query, history=history)
if past_key_values is not None:
past_length = past_key_values[0][0].shape[0]
if self.transformer.pre_seq_len is not None:
past_length -= self.transformer.pre_seq_len
inputs.position_ids += past_length
attention_mask = inputs.attention_mask
attention_mask = torch.cat((attention_mask.new_ones(1, past_length), attention_mask), dim=1)
inputs['attention_mask'] = attention_mask
for outputs in self.stream_generate(**inputs, past_key_values=past_key_values,
return_past_key_values=return_past_key_values, **gen_kwargs):
if return_past_key_values:
outputs, past_key_values = outputs
outputs = outputs.tolist()[0][len(inputs["input_ids"][0]):]
response = tokenizer.decode(outputs)
if response and response[-1] != "<EFBFBD>":
response = self.process_response(response)
new_history = history + [(query, response)]
if return_past_key_values:
yield response, new_history, past_key_values
else:
yield response, new_history
@torch.inference_mode()
def stream_generate(
self,
input_ids,
generation_config: Optional[GenerationConfig] = None,
logits_processor: Optional[LogitsProcessorList] = None,
stopping_criteria: Optional[StoppingCriteriaList] = None,
prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor], List[int]]] = None,
return_past_key_values=False,
**kwargs,
):
batch_size, input_ids_seq_length = input_ids.shape[0], input_ids.shape[-1]
if generation_config is None:
generation_config = self.generation_config
generation_config = copy.deepcopy(generation_config)
model_kwargs = generation_config.update(**kwargs)
model_kwargs["use_cache"] = generation_config.use_cache
bos_token_id, eos_token_id = generation_config.bos_token_id, generation_config.eos_token_id
if isinstance(eos_token_id, int):
eos_token_id = [eos_token_id]
has_default_max_length = kwargs.get("max_length") is None and generation_config.max_length is not None
if has_default_max_length and generation_config.max_new_tokens is None:
warnings.warn(
f"Using `max_length`'s default ({generation_config.max_length}) to control the generation length. "
"This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we"
" recommend using `max_new_tokens` to control the maximum length of the generation.",
UserWarning,
)
elif generation_config.max_new_tokens is not None:
generation_config.max_length = generation_config.max_new_tokens + input_ids_seq_length
if not has_default_max_length:
logger.warn(
f"Both `max_new_tokens` (={generation_config.max_new_tokens}) and `max_length`(="
f"{generation_config.max_length}) seem to have been set. `max_new_tokens` will take precedence. "
"Please refer to the documentation for more information. "
"(https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)",
UserWarning,
)
if input_ids_seq_length >= generation_config.max_length:
input_ids_string = "decoder_input_ids" if self.config.is_encoder_decoder else "input_ids"
logger.warning(
f"Input length of {input_ids_string} is {input_ids_seq_length}, but `max_length` is set to"
f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider"
" increasing `max_new_tokens`."
)
# 2. Set generation parameters if not already defined
logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList()
stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList()
logits_processor = self._get_logits_processor(
generation_config=generation_config,
input_ids_seq_length=input_ids_seq_length,
encoder_input_ids=input_ids,
prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
logits_processor=logits_processor,
)
stopping_criteria = self._get_stopping_criteria(
generation_config=generation_config, stopping_criteria=stopping_criteria
)
logits_warper = self._get_logits_warper(generation_config)
unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1)
scores = None
while True:
model_inputs = self.prepare_inputs_for_generation(input_ids, **model_kwargs)
# forward pass to get next token
outputs = self(
**model_inputs,
return_dict=True,
output_attentions=False,
output_hidden_states=False,
)
next_token_logits = outputs.logits[:, -1, :]
# pre-process distribution
next_token_scores = logits_processor(input_ids, next_token_logits)
next_token_scores = logits_warper(input_ids, next_token_scores)
# sample
probs = nn.functional.softmax(next_token_scores, dim=-1)
if generation_config.do_sample:
next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
else:
next_tokens = torch.argmax(probs, dim=-1)
# update generated ids, model inputs, and length for next step
input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)
model_kwargs = self._update_model_kwargs_for_generation(
outputs, model_kwargs, is_encoder_decoder=self.config.is_encoder_decoder
)
unfinished_sequences = unfinished_sequences.mul((sum(next_tokens != i for i in eos_token_id)).long())
if return_past_key_values:
yield input_ids, outputs.past_key_values
else:
yield input_ids
# stop when each sentence is finished, or if we exceed the maximum length
if unfinished_sequences.max() == 0 or stopping_criteria(input_ids, scores):
break

1285
modeling_chatglm.py Normal file

File diff suppressed because it is too large Load Diff

BIN
pytorch_model-00001-of-00002.bin (Stored with Git LFS) Normal file

Binary file not shown.

BIN
pytorch_model-00002-of-00002.bin (Stored with Git LFS) Normal file

Binary file not shown.

View File

@ -0,0 +1,207 @@
{
"metadata": {
"total_size": 12487168064
},
"weight_map": {
"transformer.embedding.word_embeddings.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.final_layernorm.weight": "pytorch_model-00002-of-00002.bin",
"transformer.encoder.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.0.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.0.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.0.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.0.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.0.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.1.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.1.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.1.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.1.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.1.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.10.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.10.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.10.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.10.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.10.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.10.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.10.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.11.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.11.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.11.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.11.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.11.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.11.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.11.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.12.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.12.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.12.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.12.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.12.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.12.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.12.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.13.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.13.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.13.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.13.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.13.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.13.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.13.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.14.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.14.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.14.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.14.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.14.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.14.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.14.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.15.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.15.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.15.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.15.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.15.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.15.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.15.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.16.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.16.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.16.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.16.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.16.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.16.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.16.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.17.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.17.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.17.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.17.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.17.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.17.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.17.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.18.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.18.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.18.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.18.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.18.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.18.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.18.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.19.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.19.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.19.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.19.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.19.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.19.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.19.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.2.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.2.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.2.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.2.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.2.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.2.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.2.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.20.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.20.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.20.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.20.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.20.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.20.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.20.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.21.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.21.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.21.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.21.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.21.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.21.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.21.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.22.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.22.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.22.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.22.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.22.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.22.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.22.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.23.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.23.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00002.bin",
"transformer.encoder.layers.23.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00002.bin",
"transformer.encoder.layers.23.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.23.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.23.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.23.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.24.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
"transformer.encoder.layers.24.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00002.bin",
"transformer.encoder.layers.24.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00002.bin",
"transformer.encoder.layers.24.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
"transformer.encoder.layers.24.self_attention.dense.weight": "pytorch_model-00002-of-00002.bin",
"transformer.encoder.layers.24.self_attention.query_key_value.bias": "pytorch_model-00002-of-00002.bin",
"transformer.encoder.layers.24.self_attention.query_key_value.weight": "pytorch_model-00002-of-00002.bin",
"transformer.encoder.layers.25.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
"transformer.encoder.layers.25.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00002.bin",
"transformer.encoder.layers.25.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00002.bin",
"transformer.encoder.layers.25.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
"transformer.encoder.layers.25.self_attention.dense.weight": "pytorch_model-00002-of-00002.bin",
"transformer.encoder.layers.25.self_attention.query_key_value.bias": "pytorch_model-00002-of-00002.bin",
"transformer.encoder.layers.25.self_attention.query_key_value.weight": "pytorch_model-00002-of-00002.bin",
"transformer.encoder.layers.26.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
"transformer.encoder.layers.26.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00002.bin",
"transformer.encoder.layers.26.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00002.bin",
"transformer.encoder.layers.26.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
"transformer.encoder.layers.26.self_attention.dense.weight": "pytorch_model-00002-of-00002.bin",
"transformer.encoder.layers.26.self_attention.query_key_value.bias": "pytorch_model-00002-of-00002.bin",
"transformer.encoder.layers.26.self_attention.query_key_value.weight": "pytorch_model-00002-of-00002.bin",
"transformer.encoder.layers.27.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
"transformer.encoder.layers.27.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00002.bin",
"transformer.encoder.layers.27.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00002.bin",
"transformer.encoder.layers.27.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
"transformer.encoder.layers.27.self_attention.dense.weight": "pytorch_model-00002-of-00002.bin",
"transformer.encoder.layers.27.self_attention.query_key_value.bias": "pytorch_model-00002-of-00002.bin",
"transformer.encoder.layers.27.self_attention.query_key_value.weight": "pytorch_model-00002-of-00002.bin",
"transformer.encoder.layers.3.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.3.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.3.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.3.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.3.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.3.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.3.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.4.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.4.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.4.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.4.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.4.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.4.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.4.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.5.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.5.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.5.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.5.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.5.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.5.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.5.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.6.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.6.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.6.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.6.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.6.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.6.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.6.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.7.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.7.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.7.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.7.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.7.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.7.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.7.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.8.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.8.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.8.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.8.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.8.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.8.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.8.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.9.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.9.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.9.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.9.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.9.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.9.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
"transformer.encoder.layers.9.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.output_layer.weight": "pytorch_model-00002-of-00002.bin",
"transformer.rotary_pos_emb.inv_freq": "pytorch_model-00001-of-00002.bin"
}
}

188
quantization.py Normal file

File diff suppressed because one or more lines are too long

1
special_tokens_map.json Normal file
View File

@ -0,0 +1 @@
{}

278
tokenization_chatglm.py Normal file
View File

@ -0,0 +1,278 @@
import os
import re
from typing import List, Optional, Union, Dict
from sentencepiece import SentencePieceProcessor
from transformers import PreTrainedTokenizer
from transformers.utils import logging, PaddingStrategy
from transformers.tokenization_utils_base import EncodedInput, BatchEncoding
class SPTokenizer:
def __init__(self, model_path: str):
# reload tokenizer
assert os.path.isfile(model_path), model_path
self.sp_model = SentencePieceProcessor(model_file=model_path)
# BOS / EOS token IDs
self.n_words: int = self.sp_model.vocab_size()
self.bos_id: int = self.sp_model.bos_id()
self.eos_id: int = self.sp_model.eos_id()
self.pad_id: int = self.sp_model.unk_id()
assert self.sp_model.vocab_size() == self.sp_model.get_piece_size()
special_tokens = ["[MASK]", "[gMASK]", "[sMASK]", "sop", "eop"]
self.special_tokens = {}
self.index_special_tokens = {}
for token in special_tokens:
self.special_tokens[token] = self.n_words
self.index_special_tokens[self.n_words] = token
self.n_words += 1
self.role_special_token_expression = "|".join([re.escape(token) for token in special_tokens]) # for apply_chat_template
def tokenize(self, s: str, encode_special_tokens=False):
if encode_special_tokens:
last_index = 0
t = []
for match in re.finditer(self.role_special_token_expression, s):
if last_index < match.start():
t.extend(self.sp_model.EncodeAsPieces(s[last_index:match.start()]))
t.append(s[match.start():match.end()])
last_index = match.end()
if last_index < len(s):
t.extend(self.sp_model.EncodeAsPieces(s[last_index:]))
return t
else:
return self.sp_model.EncodeAsPieces(s)
def encode(self, s: str, bos: bool = False, eos: bool = False) -> List[int]:
assert type(s) is str
t = self.sp_model.encode(s)
if bos:
t = [self.bos_id] + t
if eos:
t = t + [self.eos_id]
return t
def decode(self, t: List[int]) -> str:
text, buffer = "", []
for token in t:
if token in self.index_special_tokens:
if buffer:
text += self.sp_model.decode(buffer)
buffer = []
text += self.index_special_tokens[token]
else:
buffer.append(token)
if buffer:
text += self.sp_model.decode(buffer)
return text
def decode_tokens(self, tokens: List[str]) -> str:
text = self.sp_model.DecodePieces(tokens)
return text
def convert_token_to_id(self, token):
""" Converts a token (str) in an id using the vocab. """
if token in self.special_tokens:
return self.special_tokens[token]
return self.sp_model.PieceToId(token)
def convert_id_to_token(self, index):
"""Converts an index (integer) in a token (str) using the vocab."""
if index in self.index_special_tokens or index in [self.eos_id, self.bos_id, self.pad_id] or index < 0:
return ""
return self.sp_model.IdToPiece(index)
class ChatGLMTokenizer(PreTrainedTokenizer):
vocab_files_names = {"vocab_file": "tokenizer.model"}
model_input_names = ["input_ids", "attention_mask", "position_ids"]
def __init__(self, vocab_file, padding_side="left", clean_up_tokenization_spaces=False, encode_special_tokens=False, **kwargs):
self.name = "GLMTokenizer"
self.vocab_file = vocab_file
self.tokenizer = SPTokenizer(vocab_file)
self.special_tokens = {
"<bos>": self.tokenizer.bos_id,
"<eos>": self.tokenizer.eos_id,
"<pad>": self.tokenizer.pad_id
}
self.encode_special_tokens = encode_special_tokens
super().__init__(padding_side=padding_side, clean_up_tokenization_spaces=clean_up_tokenization_spaces, **kwargs)
def get_command(self, token):
if token in self.special_tokens:
return self.special_tokens[token]
assert token in self.tokenizer.special_tokens, f"{token} is not a special token for {self.name}"
return self.tokenizer.special_tokens[token]
@property
def pad_token(self) -> str:
return "<unk>"
@property
def pad_token_id(self):
return self.get_command("<pad>")
@property
def eos_token(self) -> str:
return "</s>"
@property
def eos_token_id(self):
return self.get_command("<eos>")
@property
def vocab_size(self):
return self.tokenizer.n_words
def get_vocab(self):
""" Returns vocab as a dict """
vocab = {self._convert_id_to_token(i): i for i in range(self.vocab_size)}
vocab.update(self.added_tokens_encoder)
return vocab
def _tokenize(self, text, **kwargs):
return self.tokenizer.tokenize(text, encode_special_tokens=self.encode_special_tokens)
def _convert_token_to_id(self, token):
""" Converts a token (str) in an id using the vocab. """
return self.tokenizer.convert_token_to_id(token)
def _convert_id_to_token(self, index):
"""Converts an index (integer) in a token (str) using the vocab."""
return self.tokenizer.convert_id_to_token(index)
def convert_tokens_to_string(self, tokens: List[str]) -> str:
return self.tokenizer.decode_tokens(tokens)
def save_vocabulary(self, save_directory, filename_prefix=None):
"""
Save the vocabulary and special tokens file to a directory.
Args:
save_directory (`str`):
The directory in which to save the vocabulary.
filename_prefix (`str`, *optional*):
An optional prefix to add to the named of the saved files.
Returns:
`Tuple(str)`: Paths to the files saved.
"""
if os.path.isdir(save_directory):
vocab_file = os.path.join(
save_directory, self.vocab_files_names["vocab_file"]
)
else:
vocab_file = save_directory
with open(self.vocab_file, 'rb') as fin:
proto_str = fin.read()
with open(vocab_file, "wb") as writer:
writer.write(proto_str)
return (vocab_file,)
def get_prefix_tokens(self):
prefix_tokens = [self.get_command("[gMASK]"), self.get_command("sop")]
return prefix_tokens
def build_prompt(self, query, history=None):
if history is None:
history = []
prompt = ""
for i, (old_query, response) in enumerate(history):
prompt += "[Round {}]\n\n问:{}\n\n答:{}\n\n".format(i + 1, old_query, response)
prompt += "[Round {}]\n\n问:{}\n\n答:".format(len(history) + 1, query)
return prompt
def build_inputs_with_special_tokens(
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
) -> List[int]:
"""
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
adding special tokens. A BERT sequence has the following format:
- single sequence: `[CLS] X [SEP]`
- pair of sequences: `[CLS] A [SEP] B [SEP]`
Args:
token_ids_0 (`List[int]`):
List of IDs to which the special tokens will be added.
token_ids_1 (`List[int]`, *optional*):
Optional second list of IDs for sequence pairs.
Returns:
`List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
"""
prefix_tokens = self.get_prefix_tokens()
token_ids_0 = prefix_tokens + token_ids_0
if token_ids_1 is not None:
token_ids_0 = token_ids_0 + token_ids_1 + [self.get_command("<eos>")]
return token_ids_0
def _pad(
self,
encoded_inputs: Union[Dict[str, EncodedInput], BatchEncoding],
max_length: Optional[int] = None,
padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
pad_to_multiple_of: Optional[int] = None,
return_attention_mask: Optional[bool] = None,
) -> dict:
"""
Pad encoded inputs (on left/right and up to predefined length or max length in the batch)
Args:
encoded_inputs:
Dictionary of tokenized inputs (`List[int]`) or batch of tokenized inputs (`List[List[int]]`).
max_length: maximum length of the returned list and optionally padding length (see below).
Will truncate by taking into account the special tokens.
padding_strategy: PaddingStrategy to use for padding.
- PaddingStrategy.LONGEST Pad to the longest sequence in the batch
- PaddingStrategy.MAX_LENGTH: Pad to the max length (default)
- PaddingStrategy.DO_NOT_PAD: Do not pad
The tokenizer padding sides are defined in self.padding_side:
- 'left': pads on the left of the sequences
- 'right': pads on the right of the sequences
pad_to_multiple_of: (optional) Integer if set will pad the sequence to a multiple of the provided value.
This is especially useful to enable the use of Tensor Core on NVIDIA hardware with compute capability
`>= 7.5` (Volta).
return_attention_mask:
(optional) Set to False to avoid returning attention mask (default: set to model specifics)
"""
# Load from model defaults
assert self.padding_side == "left"
required_input = encoded_inputs[self.model_input_names[0]]
seq_length = len(required_input)
if padding_strategy == PaddingStrategy.LONGEST:
max_length = len(required_input)
if max_length is not None and pad_to_multiple_of is not None and (max_length % pad_to_multiple_of != 0):
max_length = ((max_length // pad_to_multiple_of) + 1) * pad_to_multiple_of
needs_to_be_padded = padding_strategy != PaddingStrategy.DO_NOT_PAD and len(required_input) != max_length
# Initialize attention mask if not present.
if "attention_mask" not in encoded_inputs:
encoded_inputs["attention_mask"] = [1] * seq_length
if "position_ids" not in encoded_inputs:
encoded_inputs["position_ids"] = list(range(seq_length))
if needs_to_be_padded:
difference = max_length - len(required_input)
if "attention_mask" in encoded_inputs:
encoded_inputs["attention_mask"] = [0] * difference + encoded_inputs["attention_mask"]
if "position_ids" in encoded_inputs:
encoded_inputs["position_ids"] = [0] * difference + encoded_inputs["position_ids"]
encoded_inputs[self.model_input_names[0]] = [self.pad_token_id] * difference + required_input
return encoded_inputs

BIN
tokenizer.model (Stored with Git LFS) Normal file

Binary file not shown.

33
tokenizer_config.json Normal file
View File

@ -0,0 +1,33 @@
{
"added_tokens_decoder": {
"64790": {
"content": "[gMASK]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"64792": {
"content": "sop",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
}
},
"auto_map": {
"AutoTokenizer": [
"tokenization_chatglm.ChatGLMTokenizer",
null
]
},
"chat_template": "{% set ns = namespace() %}[gMASK]sop{% for message in messages %}{% if loop.first %}{% set ns.bot_name = message['bot_name'] %}{% set ns.user_name = message['user_name'] %}以下是一段{{ message['bot_name'] }}和{{ message['user_name'] }}之间的对话。{%+ if message['bot_profile'] is defined and message['bot_profile']|length +%}\n关于{{ message['bot_name'] }}的信息:{{ message['bot_profile']|replace('\n', ' ') }}{% endif %}{%+ if message['user_profile'] is defined and message['user_profile']|length +%}\n关于{{ message['user_name'] }}的信息:{{ message['user_profile']|replace('\n', ' ') }}{% endif %}{%+ else +%}\n[{% if message['role'] == 'user' %}{{ ns.user_name }}{% else %}{{ ns.bot_name }}{% endif %}]{{ message['content']|replace('\n', ' ') }}{% endif %}{% endfor %}{%+ if add_generation_prompt +%}\n[{{ ns.bot_name }}]{% endif %}",
"clean_up_tokenization_spaces": true,
"do_lower_case": false,
"model_max_length": 1000000000000000019884624838656,
"padding_side": "left",
"remove_space": false,
"tokenizer_class": "ChatGLMTokenizer"
}