first commit
This commit is contained in:
parent
e16850f08a
commit
86c97b6759
30
README.md
30
README.md
|
@ -1,3 +1,29 @@
|
||||||
# CharacterGLM-6B_a14114209229434880147986
|
---
|
||||||
|
frameworks:
|
||||||
|
- Pytorch
|
||||||
|
license: other
|
||||||
|
tasks:
|
||||||
|
- text-generation
|
||||||
|
---
|
||||||
|
###### 模型文件和权重,请浏览“模型文件”页面获取。
|
||||||
|
###### 当前模型的贡献者未提供更加详细的模型介绍,但是您可以通过如下git clone命令,或者ModelScope SDK来下载模型。
|
||||||
|
###### Clone with HTTP
|
||||||
|
```
|
||||||
|
git clone https://www.modelscope.cn/THUCoAI/CharacterGLM-6B.git
|
||||||
|
```
|
||||||
|
###### 如果您是本模型的贡献者,我们邀请您根据[模型贡献文档说明](https://www.modelscope.cn/docs/%E5%A6%82%E4%BD%95%E6%92%B0%E5%86%99%E5%A5%BD%E7%94%A8%E7%9A%84%E6%A8%A1%E5%9E%8B%E5%8D%A1%E7%89%87),及时完善模型卡片内容。
|
||||||
|
###### [huggingface链接](https://huggingface.co/thu-coai/CharacterGLM-6B)
|
||||||
|
|
||||||
CharacterGLM-6B
|
|
||||||
|
代码示例
|
||||||
|
|
||||||
|
```python
|
||||||
|
>>> from transformers import AutoTokenizer, AutoModel
|
||||||
|
>>> # 假设您通过git将模型下载到了./CharacterGLM-6B目录
|
||||||
|
>>> tokenizer = AutoTokenizer.from_pretrained("./CharacterGLM-6B", trust_remote_code=True)
|
||||||
|
>>> model = AutoModel.from_pretrained("./CharacterGLM-6b", trust_remote_code=True, device='cuda')
|
||||||
|
>>> model = model.eval()
|
||||||
|
>>> session_meta = {'user_info': '我是陆星辰,是一个男性,是一位知名导演,也是苏梦远的合作导演。我擅长拍摄音乐题材的电影。苏梦远对我的态度是尊敬的,并视我为良师益友。', 'bot_info': '苏梦远,本名苏远心,是一位当红的国内女歌手及演员。在参加选秀节目后,凭借独特的嗓音及出众的舞台魅力迅速成名,进入娱乐圈。她外表美丽动人,但真正的魅力在于她的才华和勤奋。苏梦远是音乐学院毕业的优秀生,善于创作,拥有多首热门原创歌曲。除了音乐方面的成就,她还热衷于慈善事业,积极参加公益活动,用实际行动传递正能量。在工作中,她对待工作非常敬业,拍戏时总是全身心投入角色,赢得了业内人士的赞誉和粉丝的喜爱。虽然在娱乐圈,但她始终保持低调、谦逊的态度,深得同行尊重。在表达时,苏梦远喜欢使用“我们”和“一起”,强调团队精神。', 'bot_name': '苏梦远', 'user_name': '陆星辰'}
|
||||||
|
>>> response, history = model.chat(tokenizer, session_meta, "你好", history=[])
|
||||||
|
>>>>>>> add model
|
||||||
|
```
|
||||||
|
|
|
@ -0,0 +1,121 @@
|
||||||
|
import torch
|
||||||
|
from typing import TypedDict, Literal, List, Optional, Tuple, Iterator
|
||||||
|
|
||||||
|
|
||||||
|
#### data types #########
|
||||||
|
# 下面的数据类型定义与CharacterGLM API一致,但与modeling_chatglm.py的chat方法不一致
|
||||||
|
# 参考 https://open.bigmodel.cn/dev/api#characterglm
|
||||||
|
RoleType = Literal["user", "assistant"]
|
||||||
|
|
||||||
|
class Msg(TypedDict):
|
||||||
|
role: RoleType
|
||||||
|
content: str
|
||||||
|
|
||||||
|
|
||||||
|
class SessionMeta(TypedDict):
|
||||||
|
user_name: str
|
||||||
|
bot_name: str
|
||||||
|
bot_info: str
|
||||||
|
user_info: Optional[str]
|
||||||
|
|
||||||
|
|
||||||
|
HistoryType = List[Msg]
|
||||||
|
|
||||||
|
|
||||||
|
class CharacterGLMGenerationUtils:
|
||||||
|
@staticmethod
|
||||||
|
def convert_chatglm_history_to_characterglm_history(user_query: str, history: List[Tuple[str, str]]) -> HistoryType:
|
||||||
|
characterglm_history: HistoryType = []
|
||||||
|
for i, (query, response) in enumerate(history):
|
||||||
|
if i == 0 and query == '':
|
||||||
|
# first empty query is an placeholder
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
characterglm_history.append({
|
||||||
|
"role": "user",
|
||||||
|
"content": query
|
||||||
|
})
|
||||||
|
characterglm_history.append({
|
||||||
|
"role": "assistant",
|
||||||
|
"content": response
|
||||||
|
})
|
||||||
|
|
||||||
|
characterglm_history.append({
|
||||||
|
"role": "user",
|
||||||
|
"content": user_query
|
||||||
|
})
|
||||||
|
return characterglm_history
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def build_inputs(session_meta: SessionMeta, history: HistoryType) -> str:
|
||||||
|
"""
|
||||||
|
注意:这里假设history最后一条消息是用户query
|
||||||
|
"""
|
||||||
|
texts = []
|
||||||
|
texts.append(
|
||||||
|
f"以下是一段{session_meta['bot_name']}和{session_meta['user_name']}之间的对话。")
|
||||||
|
if session_meta.get("bot_info"):
|
||||||
|
texts.append(f"关于{session_meta['bot_name']}的信息:{session_meta['bot_info']}")
|
||||||
|
if session_meta.get("user_info"):
|
||||||
|
texts.append(
|
||||||
|
f"关于{session_meta['user_name']}的信息:{session_meta['user_info']}")
|
||||||
|
|
||||||
|
assert history and history[-1]['role'] == 'user'
|
||||||
|
for msg in history:
|
||||||
|
name = session_meta['user_name'] if msg['role'] == 'user' else session_meta['bot_name']
|
||||||
|
texts.append(f"[{name}]" + msg['content'].strip())
|
||||||
|
|
||||||
|
texts = [text.replace('\n', ' ') for text in texts]
|
||||||
|
texts.append(f"[{session_meta['bot_name']}]")
|
||||||
|
return '\n'.join(texts)
|
||||||
|
|
||||||
|
|
||||||
|
class CharacterGLMAPI:
|
||||||
|
@staticmethod
|
||||||
|
def build_api_arguments(session_meta: SessionMeta, history: HistoryType) -> dict:
|
||||||
|
return {
|
||||||
|
"model": "characterglm",
|
||||||
|
"meta": session_meta,
|
||||||
|
"prompt": history
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def async_invoke(cls, session_meta: SessionMeta, history: HistoryType):
|
||||||
|
"""
|
||||||
|
注意:
|
||||||
|
1. 先设置zhipuai.api_key
|
||||||
|
2. 建议传入`return_type='text'`,否则返回结果是json字符串
|
||||||
|
|
||||||
|
参考:
|
||||||
|
https://open.bigmodel.cn/dev/api#characterglm
|
||||||
|
"""
|
||||||
|
import zhipuai
|
||||||
|
kwargs = cls.build_api_arguments(session_meta, history)
|
||||||
|
return zhipuai.model_api.async_invoke(**kwargs, return_type='text')
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def invoke(cls, session_meta: SessionMeta, history: HistoryType):
|
||||||
|
"""
|
||||||
|
注意:
|
||||||
|
1. 先设置zhipuai.api_key
|
||||||
|
2. 建议传入`return_type='text'`,否则返回结果是json字符串
|
||||||
|
3. 需要再次调用`zhipuai.model_api.query_async_invoke_result`才能获取生成结果
|
||||||
|
|
||||||
|
参考:
|
||||||
|
https://open.bigmodel.cn/dev/api#characterglm
|
||||||
|
"""
|
||||||
|
import zhipuai
|
||||||
|
kwargs = cls.build_api_arguments(session_meta, history)
|
||||||
|
return zhipuai.model_api.invoke(**kwargs, return_type='text')
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def generate(cls, session_meta: SessionMeta, history: HistoryType) -> str:
|
||||||
|
result = cls.invoke(session_meta, history)
|
||||||
|
if not result['success']:
|
||||||
|
raise RuntimeError(result)
|
||||||
|
return result['data']['choices'][0]['content']
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def stream_generate(cls, session_meta: SessionMeta, history: HistoryType) -> Iterator[str]:
|
||||||
|
# 伪流式生成
|
||||||
|
return iter(cls.generate(session_meta, history))
|
|
@ -0,0 +1,42 @@
|
||||||
|
{
|
||||||
|
"_name_or_path": "thu-coai/CharacterGLM-6B",
|
||||||
|
"model_type": "chatglm",
|
||||||
|
"architectures": [
|
||||||
|
"ChatGLMModel"
|
||||||
|
],
|
||||||
|
"auto_map": {
|
||||||
|
"AutoConfig": "configuration_chatglm.ChatGLMConfig",
|
||||||
|
"AutoModel": "modeling_characterglm.CharacterGLMForConditionalGeneration",
|
||||||
|
"AutoModelForCausalLM": "modeling_characterglm.CharacterGLMForConditionalGeneration",
|
||||||
|
"AutoModelForSeq2SeqLM": "modeling_characterglm.CharacterGLMForConditionalGeneration",
|
||||||
|
"AutoModelForSequenceClassification": "modeling_chatglm.ChatGLMForSequenceClassification"
|
||||||
|
},
|
||||||
|
"add_bias_linear": false,
|
||||||
|
"add_qkv_bias": true,
|
||||||
|
"apply_query_key_layer_scaling": true,
|
||||||
|
"apply_residual_connection_post_layernorm": false,
|
||||||
|
"attention_dropout": 0.0,
|
||||||
|
"attention_softmax_in_fp32": true,
|
||||||
|
"bias_dropout_fusion": true,
|
||||||
|
"ffn_hidden_size": 13696,
|
||||||
|
"fp32_residual_connection": false,
|
||||||
|
"hidden_dropout": 0.0,
|
||||||
|
"hidden_size": 4096,
|
||||||
|
"kv_channels": 128,
|
||||||
|
"layernorm_epsilon": 1e-05,
|
||||||
|
"multi_query_attention": true,
|
||||||
|
"multi_query_group_num": 2,
|
||||||
|
"num_attention_heads": 32,
|
||||||
|
"num_layers": 28,
|
||||||
|
"original_rope": true,
|
||||||
|
"padded_vocab_size": 65024,
|
||||||
|
"post_layer_norm": true,
|
||||||
|
"rmsnorm": true,
|
||||||
|
"seq_length": 32768,
|
||||||
|
"use_cache": true,
|
||||||
|
"torch_dtype": "float16",
|
||||||
|
"transformers_version": "4.27.1",
|
||||||
|
"tie_word_embeddings": false,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"pad_token_id": 0
|
||||||
|
}
|
|
@ -0,0 +1 @@
|
||||||
|
{"framework":"Pytorch","task":"text-generation"}
|
|
@ -0,0 +1,61 @@
|
||||||
|
from transformers import PretrainedConfig
|
||||||
|
|
||||||
|
|
||||||
|
class ChatGLMConfig(PretrainedConfig):
|
||||||
|
model_type = "chatglm"
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
num_layers=28,
|
||||||
|
padded_vocab_size=65024,
|
||||||
|
hidden_size=4096,
|
||||||
|
ffn_hidden_size=13696,
|
||||||
|
kv_channels=128,
|
||||||
|
num_attention_heads=32,
|
||||||
|
seq_length=2048,
|
||||||
|
hidden_dropout=0.0,
|
||||||
|
classifier_dropout=None,
|
||||||
|
attention_dropout=0.0,
|
||||||
|
layernorm_epsilon=1e-5,
|
||||||
|
rmsnorm=True,
|
||||||
|
apply_residual_connection_post_layernorm=False,
|
||||||
|
post_layer_norm=True,
|
||||||
|
add_bias_linear=False,
|
||||||
|
add_qkv_bias=False,
|
||||||
|
bias_dropout_fusion=True,
|
||||||
|
multi_query_attention=False,
|
||||||
|
multi_query_group_num=1,
|
||||||
|
apply_query_key_layer_scaling=True,
|
||||||
|
attention_softmax_in_fp32=True,
|
||||||
|
fp32_residual_connection=False,
|
||||||
|
quantization_bit=0,
|
||||||
|
pre_seq_len=None,
|
||||||
|
prefix_projection=False,
|
||||||
|
**kwargs
|
||||||
|
):
|
||||||
|
self.num_layers = num_layers
|
||||||
|
self.vocab_size = padded_vocab_size
|
||||||
|
self.padded_vocab_size = padded_vocab_size
|
||||||
|
self.hidden_size = hidden_size
|
||||||
|
self.ffn_hidden_size = ffn_hidden_size
|
||||||
|
self.kv_channels = kv_channels
|
||||||
|
self.num_attention_heads = num_attention_heads
|
||||||
|
self.seq_length = seq_length
|
||||||
|
self.hidden_dropout = hidden_dropout
|
||||||
|
self.classifier_dropout = classifier_dropout
|
||||||
|
self.attention_dropout = attention_dropout
|
||||||
|
self.layernorm_epsilon = layernorm_epsilon
|
||||||
|
self.rmsnorm = rmsnorm
|
||||||
|
self.apply_residual_connection_post_layernorm = apply_residual_connection_post_layernorm
|
||||||
|
self.post_layer_norm = post_layer_norm
|
||||||
|
self.add_bias_linear = add_bias_linear
|
||||||
|
self.add_qkv_bias = add_qkv_bias
|
||||||
|
self.bias_dropout_fusion = bias_dropout_fusion
|
||||||
|
self.multi_query_attention = multi_query_attention
|
||||||
|
self.multi_query_group_num = multi_query_group_num
|
||||||
|
self.apply_query_key_layer_scaling = apply_query_key_layer_scaling
|
||||||
|
self.attention_softmax_in_fp32 = attention_softmax_in_fp32
|
||||||
|
self.fp32_residual_connection = fp32_residual_connection
|
||||||
|
self.quantization_bit = quantization_bit
|
||||||
|
self.pre_seq_len = pre_seq_len
|
||||||
|
self.prefix_projection = prefix_projection
|
||||||
|
super().__init__(**kwargs)
|
|
@ -0,0 +1,6 @@
|
||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"pad_token_id": 0,
|
||||||
|
"transformers_version": "4.31.0"
|
||||||
|
}
|
|
@ -0,0 +1,218 @@
|
||||||
|
import copy
|
||||||
|
import warnings
|
||||||
|
import logging
|
||||||
|
from typing import List, Tuple, Optional, Callable
|
||||||
|
|
||||||
|
import torch
|
||||||
|
from torch import nn
|
||||||
|
from transformers.utils import logging
|
||||||
|
from transformers.generation.utils import LogitsProcessorList, StoppingCriteriaList, GenerationConfig
|
||||||
|
|
||||||
|
from .modeling_chatglm import ChatGLMForConditionalGeneration, InvalidScoreLogitsProcessor
|
||||||
|
from .characterglm_generation_utils import CharacterGLMGenerationUtils, SessionMeta
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.get_logger(__name__)
|
||||||
|
default_generation_config = {
|
||||||
|
"do_sample": True,
|
||||||
|
"top_k": 100,
|
||||||
|
"top_p": 0.9,
|
||||||
|
"no_repeat_ngram_size": 0,
|
||||||
|
"temperature": 0.9,
|
||||||
|
"num_beams": 1,
|
||||||
|
"length_penalty": 1.6,
|
||||||
|
"repetition_penalty": 1.3,
|
||||||
|
"eos_token_id": 13
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class CharacterGLMForConditionalGeneration(ChatGLMForConditionalGeneration):
|
||||||
|
"""
|
||||||
|
CharacterGLM的prompt格式与chatglm有差异。
|
||||||
|
CharacterGLMForConditionalGeneration复用了ChatGLMForConditionalGeneration的forward方法,
|
||||||
|
重新实现了`build_inputs`和`build_stream_inputs`,
|
||||||
|
调整了`chat`和`stream_chat`方法的函数签名,增加session_meta参数,并修改解码参数的默认值。
|
||||||
|
"""
|
||||||
|
|
||||||
|
def build_inputs(self, tokenizer, session_meta: SessionMeta, query: str, history: Optional[List[Tuple[str, str]]] = None):
|
||||||
|
character_glm_history = CharacterGLMGenerationUtils.convert_chatglm_history_to_characterglm_history(query, history or [])
|
||||||
|
prompt = CharacterGLMGenerationUtils.build_inputs(session_meta, character_glm_history)
|
||||||
|
inputs = tokenizer([prompt], return_tensors="pt")
|
||||||
|
inputs = inputs.to(self.device)
|
||||||
|
return inputs
|
||||||
|
|
||||||
|
def build_stream_inputs(self, tokenizer, session_meta: SessionMeta, query: str, history: Optional[List[Tuple[str, str]]] = None):
|
||||||
|
prompt = "\n[{}]{}\n[{}]".format(
|
||||||
|
session_meta['user_name'],
|
||||||
|
query.replace('\n', ' '),
|
||||||
|
session_meta['bot_name']
|
||||||
|
)
|
||||||
|
input_ids = tokenizer.encode(prompt, add_special_tokens=False)
|
||||||
|
input_ids = input_ids[1:]
|
||||||
|
inputs = tokenizer.batch_encode_plus([(input_ids, None)], return_tensors="pt", add_special_tokens=False)
|
||||||
|
inputs = inputs.to(self.device)
|
||||||
|
return inputs
|
||||||
|
|
||||||
|
@torch.inference_mode()
|
||||||
|
def chat(self, tokenizer, session_meta: SessionMeta, query: str, history: List[Tuple[str, str]] = None, max_length: int = 8192, num_beams=1,
|
||||||
|
do_sample=True, top_p=0.9, temperature=0.9, repetition_penalty=1.6, logits_processor=None, **kwargs):
|
||||||
|
if history is None:
|
||||||
|
history = []
|
||||||
|
if logits_processor is None:
|
||||||
|
logits_processor = LogitsProcessorList()
|
||||||
|
logits_processor.append(InvalidScoreLogitsProcessor())
|
||||||
|
gen_kwargs = {"max_length": max_length, "num_beams": num_beams, "do_sample": do_sample, "top_p": top_p,
|
||||||
|
"temperature": temperature, "logits_processor": logits_processor, "repetition_penalty": repetition_penalty, **kwargs}
|
||||||
|
gen_kwargs.update({k: v for k, v in default_generation_config.items() if k not in gen_kwargs})
|
||||||
|
inputs = self.build_inputs(tokenizer, session_meta, query, history=history)
|
||||||
|
outputs = self.generate(**inputs, **gen_kwargs)
|
||||||
|
outputs = outputs.tolist()[0][len(inputs["input_ids"][0]):]
|
||||||
|
response = tokenizer.decode(outputs)
|
||||||
|
response = self.process_response(response)
|
||||||
|
history = history + [(query, response)]
|
||||||
|
return response, history
|
||||||
|
|
||||||
|
@torch.inference_mode()
|
||||||
|
def stream_chat(self, tokenizer, session_meta: SessionMeta, query: str, history: List[Tuple[str, str]] = None, past_key_values=None,
|
||||||
|
max_length: int = 8192, do_sample=True, top_p=0.9, temperature=0.9, repetition_penalty=1.0, logits_processor=None,
|
||||||
|
return_past_key_values=False, **kwargs):
|
||||||
|
if history is None:
|
||||||
|
history = []
|
||||||
|
if logits_processor is None:
|
||||||
|
logits_processor = LogitsProcessorList()
|
||||||
|
logits_processor.append(InvalidScoreLogitsProcessor())
|
||||||
|
gen_kwargs = {"max_length": max_length, "do_sample": do_sample, "top_p": top_p,
|
||||||
|
"temperature": temperature, "logits_processor": logits_processor, "repetition_penalty": repetition_penalty, **kwargs}
|
||||||
|
gen_kwargs.update({k: v for k, v in default_generation_config.items() if k not in gen_kwargs})
|
||||||
|
gen_kwargs.pop('repetition_penalty', None)
|
||||||
|
if past_key_values is None:
|
||||||
|
inputs = self.build_inputs(tokenizer, session_meta, query, history=history)
|
||||||
|
else:
|
||||||
|
inputs = self.build_stream_inputs(tokenizer, session_meta, query, history=history)
|
||||||
|
if past_key_values is not None:
|
||||||
|
past_length = past_key_values[0][0].shape[0]
|
||||||
|
if self.transformer.pre_seq_len is not None:
|
||||||
|
past_length -= self.transformer.pre_seq_len
|
||||||
|
inputs.position_ids += past_length
|
||||||
|
attention_mask = inputs.attention_mask
|
||||||
|
attention_mask = torch.cat((attention_mask.new_ones(1, past_length), attention_mask), dim=1)
|
||||||
|
inputs['attention_mask'] = attention_mask
|
||||||
|
for outputs in self.stream_generate(**inputs, past_key_values=past_key_values,
|
||||||
|
return_past_key_values=return_past_key_values, **gen_kwargs):
|
||||||
|
if return_past_key_values:
|
||||||
|
outputs, past_key_values = outputs
|
||||||
|
outputs = outputs.tolist()[0][len(inputs["input_ids"][0]):]
|
||||||
|
response = tokenizer.decode(outputs)
|
||||||
|
if response and response[-1] != "<EFBFBD>":
|
||||||
|
response = self.process_response(response)
|
||||||
|
new_history = history + [(query, response)]
|
||||||
|
if return_past_key_values:
|
||||||
|
yield response, new_history, past_key_values
|
||||||
|
else:
|
||||||
|
yield response, new_history
|
||||||
|
|
||||||
|
@torch.inference_mode()
|
||||||
|
def stream_generate(
|
||||||
|
self,
|
||||||
|
input_ids,
|
||||||
|
generation_config: Optional[GenerationConfig] = None,
|
||||||
|
logits_processor: Optional[LogitsProcessorList] = None,
|
||||||
|
stopping_criteria: Optional[StoppingCriteriaList] = None,
|
||||||
|
prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor], List[int]]] = None,
|
||||||
|
return_past_key_values=False,
|
||||||
|
**kwargs,
|
||||||
|
):
|
||||||
|
batch_size, input_ids_seq_length = input_ids.shape[0], input_ids.shape[-1]
|
||||||
|
|
||||||
|
if generation_config is None:
|
||||||
|
generation_config = self.generation_config
|
||||||
|
generation_config = copy.deepcopy(generation_config)
|
||||||
|
model_kwargs = generation_config.update(**kwargs)
|
||||||
|
model_kwargs["use_cache"] = generation_config.use_cache
|
||||||
|
bos_token_id, eos_token_id = generation_config.bos_token_id, generation_config.eos_token_id
|
||||||
|
|
||||||
|
if isinstance(eos_token_id, int):
|
||||||
|
eos_token_id = [eos_token_id]
|
||||||
|
|
||||||
|
has_default_max_length = kwargs.get("max_length") is None and generation_config.max_length is not None
|
||||||
|
if has_default_max_length and generation_config.max_new_tokens is None:
|
||||||
|
warnings.warn(
|
||||||
|
f"Using `max_length`'s default ({generation_config.max_length}) to control the generation length. "
|
||||||
|
"This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we"
|
||||||
|
" recommend using `max_new_tokens` to control the maximum length of the generation.",
|
||||||
|
UserWarning,
|
||||||
|
)
|
||||||
|
elif generation_config.max_new_tokens is not None:
|
||||||
|
generation_config.max_length = generation_config.max_new_tokens + input_ids_seq_length
|
||||||
|
if not has_default_max_length:
|
||||||
|
logger.warn(
|
||||||
|
f"Both `max_new_tokens` (={generation_config.max_new_tokens}) and `max_length`(="
|
||||||
|
f"{generation_config.max_length}) seem to have been set. `max_new_tokens` will take precedence. "
|
||||||
|
"Please refer to the documentation for more information. "
|
||||||
|
"(https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)",
|
||||||
|
UserWarning,
|
||||||
|
)
|
||||||
|
|
||||||
|
if input_ids_seq_length >= generation_config.max_length:
|
||||||
|
input_ids_string = "decoder_input_ids" if self.config.is_encoder_decoder else "input_ids"
|
||||||
|
logger.warning(
|
||||||
|
f"Input length of {input_ids_string} is {input_ids_seq_length}, but `max_length` is set to"
|
||||||
|
f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider"
|
||||||
|
" increasing `max_new_tokens`."
|
||||||
|
)
|
||||||
|
|
||||||
|
# 2. Set generation parameters if not already defined
|
||||||
|
logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList()
|
||||||
|
stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList()
|
||||||
|
|
||||||
|
logits_processor = self._get_logits_processor(
|
||||||
|
generation_config=generation_config,
|
||||||
|
input_ids_seq_length=input_ids_seq_length,
|
||||||
|
encoder_input_ids=input_ids,
|
||||||
|
prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
|
||||||
|
logits_processor=logits_processor,
|
||||||
|
)
|
||||||
|
|
||||||
|
stopping_criteria = self._get_stopping_criteria(
|
||||||
|
generation_config=generation_config, stopping_criteria=stopping_criteria
|
||||||
|
)
|
||||||
|
logits_warper = self._get_logits_warper(generation_config)
|
||||||
|
|
||||||
|
unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1)
|
||||||
|
scores = None
|
||||||
|
while True:
|
||||||
|
model_inputs = self.prepare_inputs_for_generation(input_ids, **model_kwargs)
|
||||||
|
# forward pass to get next token
|
||||||
|
outputs = self(
|
||||||
|
**model_inputs,
|
||||||
|
return_dict=True,
|
||||||
|
output_attentions=False,
|
||||||
|
output_hidden_states=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
next_token_logits = outputs.logits[:, -1, :]
|
||||||
|
|
||||||
|
# pre-process distribution
|
||||||
|
next_token_scores = logits_processor(input_ids, next_token_logits)
|
||||||
|
next_token_scores = logits_warper(input_ids, next_token_scores)
|
||||||
|
|
||||||
|
# sample
|
||||||
|
probs = nn.functional.softmax(next_token_scores, dim=-1)
|
||||||
|
if generation_config.do_sample:
|
||||||
|
next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
|
||||||
|
else:
|
||||||
|
next_tokens = torch.argmax(probs, dim=-1)
|
||||||
|
|
||||||
|
# update generated ids, model inputs, and length for next step
|
||||||
|
input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)
|
||||||
|
model_kwargs = self._update_model_kwargs_for_generation(
|
||||||
|
outputs, model_kwargs, is_encoder_decoder=self.config.is_encoder_decoder
|
||||||
|
)
|
||||||
|
unfinished_sequences = unfinished_sequences.mul((sum(next_tokens != i for i in eos_token_id)).long())
|
||||||
|
if return_past_key_values:
|
||||||
|
yield input_ids, outputs.past_key_values
|
||||||
|
else:
|
||||||
|
yield input_ids
|
||||||
|
# stop when each sentence is finished, or if we exceed the maximum length
|
||||||
|
if unfinished_sequences.max() == 0 or stopping_criteria(input_ids, scores):
|
||||||
|
break
|
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,207 @@
|
||||||
|
{
|
||||||
|
"metadata": {
|
||||||
|
"total_size": 12487168064
|
||||||
|
},
|
||||||
|
"weight_map": {
|
||||||
|
"transformer.embedding.word_embeddings.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.final_layernorm.weight": "pytorch_model-00002-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.0.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.0.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.0.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.0.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.0.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.1.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.1.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.1.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.1.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.1.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.10.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.10.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.10.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.10.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.10.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.10.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.10.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.11.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.11.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.11.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.11.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.11.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.11.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.11.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.12.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.12.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.12.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.12.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.12.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.12.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.12.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.13.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.13.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.13.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.13.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.13.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.13.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.13.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.14.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.14.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.14.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.14.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.14.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.14.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.14.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.15.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.15.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.15.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.15.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.15.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.15.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.15.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.16.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.16.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.16.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.16.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.16.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.16.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.16.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.17.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.17.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.17.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.17.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.17.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.17.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.17.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.18.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.18.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.18.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.18.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.18.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.18.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.18.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.19.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.19.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.19.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.19.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.19.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.19.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.19.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.2.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.2.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.2.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.2.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.2.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.2.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.2.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.20.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.20.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.20.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.20.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.20.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.20.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.20.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.21.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.21.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.21.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.21.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.21.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.21.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.21.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.22.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.22.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.22.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.22.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.22.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.22.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.22.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.23.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.23.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.23.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.23.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.23.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.23.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.23.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.24.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.24.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.24.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.24.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.24.self_attention.dense.weight": "pytorch_model-00002-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.24.self_attention.query_key_value.bias": "pytorch_model-00002-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.24.self_attention.query_key_value.weight": "pytorch_model-00002-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.25.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.25.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.25.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.25.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.25.self_attention.dense.weight": "pytorch_model-00002-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.25.self_attention.query_key_value.bias": "pytorch_model-00002-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.25.self_attention.query_key_value.weight": "pytorch_model-00002-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.26.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.26.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.26.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.26.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.26.self_attention.dense.weight": "pytorch_model-00002-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.26.self_attention.query_key_value.bias": "pytorch_model-00002-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.26.self_attention.query_key_value.weight": "pytorch_model-00002-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.27.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.27.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.27.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.27.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.27.self_attention.dense.weight": "pytorch_model-00002-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.27.self_attention.query_key_value.bias": "pytorch_model-00002-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.27.self_attention.query_key_value.weight": "pytorch_model-00002-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.3.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.3.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.3.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.3.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.3.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.3.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.3.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.4.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.4.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.4.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.4.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.4.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.4.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.4.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.5.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.5.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.5.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.5.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.5.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.5.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.5.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.6.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.6.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.6.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.6.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.6.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.6.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.6.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.7.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.7.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.7.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.7.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.7.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.7.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.7.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.8.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.8.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.8.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.8.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.8.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.8.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.8.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.9.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.9.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.9.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.9.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.9.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.9.self_attention.query_key_value.bias": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.encoder.layers.9.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
|
||||||
|
"transformer.output_layer.weight": "pytorch_model-00002-of-00002.bin",
|
||||||
|
"transformer.rotary_pos_emb.inv_freq": "pytorch_model-00001-of-00002.bin"
|
||||||
|
}
|
||||||
|
}
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
||||||
|
{}
|
|
@ -0,0 +1,278 @@
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from typing import List, Optional, Union, Dict
|
||||||
|
from sentencepiece import SentencePieceProcessor
|
||||||
|
from transformers import PreTrainedTokenizer
|
||||||
|
from transformers.utils import logging, PaddingStrategy
|
||||||
|
from transformers.tokenization_utils_base import EncodedInput, BatchEncoding
|
||||||
|
|
||||||
|
|
||||||
|
class SPTokenizer:
|
||||||
|
def __init__(self, model_path: str):
|
||||||
|
# reload tokenizer
|
||||||
|
assert os.path.isfile(model_path), model_path
|
||||||
|
self.sp_model = SentencePieceProcessor(model_file=model_path)
|
||||||
|
|
||||||
|
# BOS / EOS token IDs
|
||||||
|
self.n_words: int = self.sp_model.vocab_size()
|
||||||
|
self.bos_id: int = self.sp_model.bos_id()
|
||||||
|
self.eos_id: int = self.sp_model.eos_id()
|
||||||
|
self.pad_id: int = self.sp_model.unk_id()
|
||||||
|
assert self.sp_model.vocab_size() == self.sp_model.get_piece_size()
|
||||||
|
|
||||||
|
special_tokens = ["[MASK]", "[gMASK]", "[sMASK]", "sop", "eop"]
|
||||||
|
self.special_tokens = {}
|
||||||
|
self.index_special_tokens = {}
|
||||||
|
for token in special_tokens:
|
||||||
|
self.special_tokens[token] = self.n_words
|
||||||
|
self.index_special_tokens[self.n_words] = token
|
||||||
|
self.n_words += 1
|
||||||
|
self.role_special_token_expression = "|".join([re.escape(token) for token in special_tokens]) # for apply_chat_template
|
||||||
|
|
||||||
|
def tokenize(self, s: str, encode_special_tokens=False):
|
||||||
|
if encode_special_tokens:
|
||||||
|
last_index = 0
|
||||||
|
t = []
|
||||||
|
for match in re.finditer(self.role_special_token_expression, s):
|
||||||
|
if last_index < match.start():
|
||||||
|
t.extend(self.sp_model.EncodeAsPieces(s[last_index:match.start()]))
|
||||||
|
t.append(s[match.start():match.end()])
|
||||||
|
last_index = match.end()
|
||||||
|
if last_index < len(s):
|
||||||
|
t.extend(self.sp_model.EncodeAsPieces(s[last_index:]))
|
||||||
|
return t
|
||||||
|
else:
|
||||||
|
return self.sp_model.EncodeAsPieces(s)
|
||||||
|
|
||||||
|
def encode(self, s: str, bos: bool = False, eos: bool = False) -> List[int]:
|
||||||
|
assert type(s) is str
|
||||||
|
t = self.sp_model.encode(s)
|
||||||
|
if bos:
|
||||||
|
t = [self.bos_id] + t
|
||||||
|
if eos:
|
||||||
|
t = t + [self.eos_id]
|
||||||
|
return t
|
||||||
|
|
||||||
|
def decode(self, t: List[int]) -> str:
|
||||||
|
text, buffer = "", []
|
||||||
|
for token in t:
|
||||||
|
if token in self.index_special_tokens:
|
||||||
|
if buffer:
|
||||||
|
text += self.sp_model.decode(buffer)
|
||||||
|
buffer = []
|
||||||
|
text += self.index_special_tokens[token]
|
||||||
|
else:
|
||||||
|
buffer.append(token)
|
||||||
|
if buffer:
|
||||||
|
text += self.sp_model.decode(buffer)
|
||||||
|
return text
|
||||||
|
|
||||||
|
def decode_tokens(self, tokens: List[str]) -> str:
|
||||||
|
text = self.sp_model.DecodePieces(tokens)
|
||||||
|
return text
|
||||||
|
|
||||||
|
def convert_token_to_id(self, token):
|
||||||
|
""" Converts a token (str) in an id using the vocab. """
|
||||||
|
if token in self.special_tokens:
|
||||||
|
return self.special_tokens[token]
|
||||||
|
return self.sp_model.PieceToId(token)
|
||||||
|
|
||||||
|
def convert_id_to_token(self, index):
|
||||||
|
"""Converts an index (integer) in a token (str) using the vocab."""
|
||||||
|
if index in self.index_special_tokens or index in [self.eos_id, self.bos_id, self.pad_id] or index < 0:
|
||||||
|
return ""
|
||||||
|
return self.sp_model.IdToPiece(index)
|
||||||
|
|
||||||
|
|
||||||
|
class ChatGLMTokenizer(PreTrainedTokenizer):
|
||||||
|
vocab_files_names = {"vocab_file": "tokenizer.model"}
|
||||||
|
|
||||||
|
model_input_names = ["input_ids", "attention_mask", "position_ids"]
|
||||||
|
|
||||||
|
def __init__(self, vocab_file, padding_side="left", clean_up_tokenization_spaces=False, encode_special_tokens=False, **kwargs):
|
||||||
|
self.name = "GLMTokenizer"
|
||||||
|
|
||||||
|
self.vocab_file = vocab_file
|
||||||
|
self.tokenizer = SPTokenizer(vocab_file)
|
||||||
|
self.special_tokens = {
|
||||||
|
"<bos>": self.tokenizer.bos_id,
|
||||||
|
"<eos>": self.tokenizer.eos_id,
|
||||||
|
"<pad>": self.tokenizer.pad_id
|
||||||
|
}
|
||||||
|
self.encode_special_tokens = encode_special_tokens
|
||||||
|
super().__init__(padding_side=padding_side, clean_up_tokenization_spaces=clean_up_tokenization_spaces, **kwargs)
|
||||||
|
|
||||||
|
def get_command(self, token):
|
||||||
|
if token in self.special_tokens:
|
||||||
|
return self.special_tokens[token]
|
||||||
|
assert token in self.tokenizer.special_tokens, f"{token} is not a special token for {self.name}"
|
||||||
|
return self.tokenizer.special_tokens[token]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def pad_token(self) -> str:
|
||||||
|
return "<unk>"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def pad_token_id(self):
|
||||||
|
return self.get_command("<pad>")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def eos_token(self) -> str:
|
||||||
|
return "</s>"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def eos_token_id(self):
|
||||||
|
return self.get_command("<eos>")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def vocab_size(self):
|
||||||
|
return self.tokenizer.n_words
|
||||||
|
|
||||||
|
def get_vocab(self):
|
||||||
|
""" Returns vocab as a dict """
|
||||||
|
vocab = {self._convert_id_to_token(i): i for i in range(self.vocab_size)}
|
||||||
|
vocab.update(self.added_tokens_encoder)
|
||||||
|
return vocab
|
||||||
|
|
||||||
|
def _tokenize(self, text, **kwargs):
|
||||||
|
return self.tokenizer.tokenize(text, encode_special_tokens=self.encode_special_tokens)
|
||||||
|
|
||||||
|
def _convert_token_to_id(self, token):
|
||||||
|
""" Converts a token (str) in an id using the vocab. """
|
||||||
|
return self.tokenizer.convert_token_to_id(token)
|
||||||
|
|
||||||
|
def _convert_id_to_token(self, index):
|
||||||
|
"""Converts an index (integer) in a token (str) using the vocab."""
|
||||||
|
return self.tokenizer.convert_id_to_token(index)
|
||||||
|
|
||||||
|
def convert_tokens_to_string(self, tokens: List[str]) -> str:
|
||||||
|
return self.tokenizer.decode_tokens(tokens)
|
||||||
|
|
||||||
|
def save_vocabulary(self, save_directory, filename_prefix=None):
|
||||||
|
"""
|
||||||
|
Save the vocabulary and special tokens file to a directory.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
save_directory (`str`):
|
||||||
|
The directory in which to save the vocabulary.
|
||||||
|
filename_prefix (`str`, *optional*):
|
||||||
|
An optional prefix to add to the named of the saved files.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
`Tuple(str)`: Paths to the files saved.
|
||||||
|
"""
|
||||||
|
if os.path.isdir(save_directory):
|
||||||
|
vocab_file = os.path.join(
|
||||||
|
save_directory, self.vocab_files_names["vocab_file"]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
vocab_file = save_directory
|
||||||
|
|
||||||
|
with open(self.vocab_file, 'rb') as fin:
|
||||||
|
proto_str = fin.read()
|
||||||
|
|
||||||
|
with open(vocab_file, "wb") as writer:
|
||||||
|
writer.write(proto_str)
|
||||||
|
|
||||||
|
return (vocab_file,)
|
||||||
|
|
||||||
|
def get_prefix_tokens(self):
|
||||||
|
prefix_tokens = [self.get_command("[gMASK]"), self.get_command("sop")]
|
||||||
|
return prefix_tokens
|
||||||
|
|
||||||
|
def build_prompt(self, query, history=None):
|
||||||
|
if history is None:
|
||||||
|
history = []
|
||||||
|
prompt = ""
|
||||||
|
for i, (old_query, response) in enumerate(history):
|
||||||
|
prompt += "[Round {}]\n\n问:{}\n\n答:{}\n\n".format(i + 1, old_query, response)
|
||||||
|
prompt += "[Round {}]\n\n问:{}\n\n答:".format(len(history) + 1, query)
|
||||||
|
return prompt
|
||||||
|
|
||||||
|
def build_inputs_with_special_tokens(
|
||||||
|
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||||
|
) -> List[int]:
|
||||||
|
"""
|
||||||
|
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
|
||||||
|
adding special tokens. A BERT sequence has the following format:
|
||||||
|
|
||||||
|
- single sequence: `[CLS] X [SEP]`
|
||||||
|
- pair of sequences: `[CLS] A [SEP] B [SEP]`
|
||||||
|
|
||||||
|
Args:
|
||||||
|
token_ids_0 (`List[int]`):
|
||||||
|
List of IDs to which the special tokens will be added.
|
||||||
|
token_ids_1 (`List[int]`, *optional*):
|
||||||
|
Optional second list of IDs for sequence pairs.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
`List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
|
||||||
|
"""
|
||||||
|
prefix_tokens = self.get_prefix_tokens()
|
||||||
|
token_ids_0 = prefix_tokens + token_ids_0
|
||||||
|
if token_ids_1 is not None:
|
||||||
|
token_ids_0 = token_ids_0 + token_ids_1 + [self.get_command("<eos>")]
|
||||||
|
return token_ids_0
|
||||||
|
|
||||||
|
def _pad(
|
||||||
|
self,
|
||||||
|
encoded_inputs: Union[Dict[str, EncodedInput], BatchEncoding],
|
||||||
|
max_length: Optional[int] = None,
|
||||||
|
padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
|
||||||
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
|
return_attention_mask: Optional[bool] = None,
|
||||||
|
) -> dict:
|
||||||
|
"""
|
||||||
|
Pad encoded inputs (on left/right and up to predefined length or max length in the batch)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
encoded_inputs:
|
||||||
|
Dictionary of tokenized inputs (`List[int]`) or batch of tokenized inputs (`List[List[int]]`).
|
||||||
|
max_length: maximum length of the returned list and optionally padding length (see below).
|
||||||
|
Will truncate by taking into account the special tokens.
|
||||||
|
padding_strategy: PaddingStrategy to use for padding.
|
||||||
|
|
||||||
|
- PaddingStrategy.LONGEST Pad to the longest sequence in the batch
|
||||||
|
- PaddingStrategy.MAX_LENGTH: Pad to the max length (default)
|
||||||
|
- PaddingStrategy.DO_NOT_PAD: Do not pad
|
||||||
|
The tokenizer padding sides are defined in self.padding_side:
|
||||||
|
|
||||||
|
- 'left': pads on the left of the sequences
|
||||||
|
- 'right': pads on the right of the sequences
|
||||||
|
pad_to_multiple_of: (optional) Integer if set will pad the sequence to a multiple of the provided value.
|
||||||
|
This is especially useful to enable the use of Tensor Core on NVIDIA hardware with compute capability
|
||||||
|
`>= 7.5` (Volta).
|
||||||
|
return_attention_mask:
|
||||||
|
(optional) Set to False to avoid returning attention mask (default: set to model specifics)
|
||||||
|
"""
|
||||||
|
# Load from model defaults
|
||||||
|
assert self.padding_side == "left"
|
||||||
|
|
||||||
|
required_input = encoded_inputs[self.model_input_names[0]]
|
||||||
|
seq_length = len(required_input)
|
||||||
|
|
||||||
|
if padding_strategy == PaddingStrategy.LONGEST:
|
||||||
|
max_length = len(required_input)
|
||||||
|
|
||||||
|
if max_length is not None and pad_to_multiple_of is not None and (max_length % pad_to_multiple_of != 0):
|
||||||
|
max_length = ((max_length // pad_to_multiple_of) + 1) * pad_to_multiple_of
|
||||||
|
|
||||||
|
needs_to_be_padded = padding_strategy != PaddingStrategy.DO_NOT_PAD and len(required_input) != max_length
|
||||||
|
|
||||||
|
# Initialize attention mask if not present.
|
||||||
|
if "attention_mask" not in encoded_inputs:
|
||||||
|
encoded_inputs["attention_mask"] = [1] * seq_length
|
||||||
|
|
||||||
|
if "position_ids" not in encoded_inputs:
|
||||||
|
encoded_inputs["position_ids"] = list(range(seq_length))
|
||||||
|
|
||||||
|
if needs_to_be_padded:
|
||||||
|
difference = max_length - len(required_input)
|
||||||
|
|
||||||
|
if "attention_mask" in encoded_inputs:
|
||||||
|
encoded_inputs["attention_mask"] = [0] * difference + encoded_inputs["attention_mask"]
|
||||||
|
if "position_ids" in encoded_inputs:
|
||||||
|
encoded_inputs["position_ids"] = [0] * difference + encoded_inputs["position_ids"]
|
||||||
|
encoded_inputs[self.model_input_names[0]] = [self.pad_token_id] * difference + required_input
|
||||||
|
|
||||||
|
return encoded_inputs
|
Binary file not shown.
|
@ -0,0 +1,33 @@
|
||||||
|
{
|
||||||
|
"added_tokens_decoder": {
|
||||||
|
"64790": {
|
||||||
|
"content": "[gMASK]",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": true,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": false
|
||||||
|
},
|
||||||
|
"64792": {
|
||||||
|
"content": "sop",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": true,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"auto_map": {
|
||||||
|
"AutoTokenizer": [
|
||||||
|
"tokenization_chatglm.ChatGLMTokenizer",
|
||||||
|
null
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"chat_template": "{% set ns = namespace() %}[gMASK]sop{% for message in messages %}{% if loop.first %}{% set ns.bot_name = message['bot_name'] %}{% set ns.user_name = message['user_name'] %}以下是一段{{ message['bot_name'] }}和{{ message['user_name'] }}之间的对话。{%+ if message['bot_profile'] is defined and message['bot_profile']|length +%}\n关于{{ message['bot_name'] }}的信息:{{ message['bot_profile']|replace('\n', ' ') }}{% endif %}{%+ if message['user_profile'] is defined and message['user_profile']|length +%}\n关于{{ message['user_name'] }}的信息:{{ message['user_profile']|replace('\n', ' ') }}{% endif %}{%+ else +%}\n[{% if message['role'] == 'user' %}{{ ns.user_name }}{% else %}{{ ns.bot_name }}{% endif %}]{{ message['content']|replace('\n', ' ') }}{% endif %}{% endfor %}{%+ if add_generation_prompt +%}\n[{{ ns.bot_name }}]{% endif %}",
|
||||||
|
"clean_up_tokenization_spaces": true,
|
||||||
|
"do_lower_case": false,
|
||||||
|
"model_max_length": 1000000000000000019884624838656,
|
||||||
|
"padding_side": "left",
|
||||||
|
"remove_space": false,
|
||||||
|
"tokenizer_class": "ChatGLMTokenizer"
|
||||||
|
}
|
Loading…
Reference in New Issue