AquilaChat2-7B_a13670421562.../predict.py

"""
Copied from https://github.com/lm-sys/FastChat.
Later we will contribute our changes into it.
"""
import dataclasses
from enum import auto, IntEnum
from typing import List, Any, Dict
import math
from typing import List, Optional, Tuple, Union
import random
import numpy as np

import torch
import torch.utils.checkpoint
from torch import nn
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss

from transformers.activations import ACT2FN
from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast, SequenceClassifierOutputWithPast
from transformers.modeling_utils import PreTrainedModel
from transformers.utils import add_start_docstrings, add_start_docstrings_to_model_forward, logging, replace_return_docstrings
from transformers import (
    LogitsProcessorList,
    MinLengthLogitsProcessor,
    TopKLogitsWarper,
    TemperatureLogitsWarper,
    TopPLogitsWarper,
    StoppingCriteriaList,
    MaxLengthCriteria,
    BitsAndBytesConfig,
)


class SeparatorStyle(IntEnum):
    """Separator styles."""

    ADD_COLON_SINGLE = auto()
    ADD_COLON_TWO = auto()
    ADD_COLON_SPACE_SINGLE = auto()
    NO_COLON_SINGLE = auto()
    NO_COLON_TWO = auto()
    ADD_NEW_LINE_SINGLE = auto()


@dataclasses.dataclass
class Conversation:
    """A class that manages prompt templates and keeps all conversation history."""

    # The name of this template
    name: str
    # The template of the system prompt
    system_template: str = "{system_message}"
    # The system message
    system_message: str = ""
    # The names of two roles
    roles: List[str] = (("USER", "ASSISTANT"),)
    # All messages. Each item is (role, message).
    messages: List[List[str]] = ()
    # The number of few shot examples
    offset: int = 0
    # The separator style and configurations
    sep_style: SeparatorStyle = SeparatorStyle.ADD_COLON_SINGLE
    sep: str = "\n"
    sep2: str = None
    # Stop criteria (the default one is EOS token)
    stop_str: str = None
    # Stops generation if meeting any token in this list
    stop_token_ids: List[int] = None

    def get_prompt(self) -> str:
        """Get the prompt for generation."""
        system_prompt = self.system_template.format(system_message=self.system_message)
        if self.sep_style == SeparatorStyle.ADD_COLON_SINGLE:
            ret = system_prompt + self.sep
            for role, message in self.messages:
                if message:
                    ret += role + ": " + message + self.sep
                else:
                    ret += role + ":"
            return ret
        elif self.sep_style == SeparatorStyle.ADD_COLON_TWO:
            seps = [self.sep, self.sep2]
            ret = system_prompt + seps[0]
            for i, (role, message) in enumerate(self.messages):
                if message:
                    ret += role + ": " + message + seps[i % 2]
                else:
                    ret += role + ":"
            return ret
        elif self.sep_style == SeparatorStyle.ADD_COLON_SPACE_SINGLE:
            ret = system_prompt + self.sep
            for role, message in self.messages:
                if message:
                    ret += role + ": " + message + self.sep
                else:
                    ret += role + ": "  # must be end with a space
            return ret
        elif self.sep_style == SeparatorStyle.ADD_NEW_LINE_SINGLE:
            ret = "" if system_prompt == "" else system_prompt + self.sep
            for role, message in self.messages:
                if message:
                    ret += role + "\n" + message + self.sep
                else:
                    ret += role + "\n"
            return ret
        elif self.sep_style == SeparatorStyle.NO_COLON_SINGLE:
            ret = system_prompt
            for role, message in self.messages:
                if message:
                    ret += role + message + self.sep
                else:
                    ret += role
            return ret
        elif self.sep_style == SeparatorStyle.NO_COLON_TWO:
            seps = [self.sep, self.sep2]
            ret = system_prompt
            for i, (role, message) in enumerate(self.messages):
                if message:
                    ret += role + message + seps[i % 2]
                else:
                    ret += role
            return ret

    def set_system_message(self, system_message: str):
        """Set the system message."""
        self.system_message = system_message

    def append_message(self, role: str, message: str):
        """Append a new message."""
        self.messages.append([role, message])

    def update_last_message(self, message: str):
        """Update the last output.

        The last message is typically set to be None when constructing the prompt,
        so we need to update it in-place after getting the response from a model.
        """
        self.messages[-1][1] = message

    def copy(self):
        return Conversation(
            name=self.name,
            system_template=self.system_template,
            system_message=self.system_message,
            roles=self.roles,
            messages=[[x, y] for x, y in self.messages],
            offset=self.offset,
            sep_style=self.sep_style,
            sep=self.sep,
            sep2=self.sep2,
            stop_str=self.stop_str,
            stop_token_ids=self.stop_token_ids,
        )

    def dict(self):
        return {
            "template_name": self.name,
            "system_message": self.system_message,
            "roles": self.roles,
            "messages": self.messages,
            "offset": self.offset,
        }


# A global registry for all conversation templates
conv_templates: Dict[str, Conversation] = {}


def register_conv_template(template: Conversation, override: bool = False):
    """Register a new conversation template."""
    if not override:
        assert (
            template.name not in conv_templates
        ), f"{template.name} has been registered."

    conv_templates[template.name] = template


def get_conv_template(name: str) -> Conversation:
    """Get a conversation template."""
    return conv_templates[name].copy()

def get_conversation_template(model_path: str) -> Conversation:
    """Get the default conversation template."""
    if "aquila-v1" in model_path:
        return get_conv_template("aquila-v1")
    elif "aquila-chat" in model_path:
        return get_conv_template("aquila-chat")
    elif "aquila-legacy" in model_path:
        return get_conv_template("aquila-legacy")
    else:
        return get_conv_template("aquila")

# AquilaChat default template
# source: https://github.com/FlagAI-Open/FlagAI/blob/master/examples/Aquila/Aquila-chat/cyg_conversation.py
register_conv_template(
    Conversation(
        name="aquila-chat",
        system_message="A chat between a curious human and an artificial intelligence assistant. "
        "The assistant gives helpful, detailed, and polite answers to the human's questions.",
        roles=("Human", "Assistant", "System"),
        messages=(),
        offset=0,
        sep_style=SeparatorStyle.ADD_COLON_SINGLE,
        sep="###",
        sep2="",
        stop_str=["###", "</s>", "[UNK]"],
    )
)

register_conv_template(
    Conversation(
        name="aquila-legacy",
        system_message="A chat between a curious human and an artificial intelligence assistant. "
        "The assistant gives helpful, detailed, and polite answers to the human's questions.\n\n",
        roles=("### Human: ", "### Assistant: ", "System"),
        messages=(),
        offset=0,
        sep_style=SeparatorStyle.NO_COLON_TWO,
        sep="\n",
        sep2="</s>",
        stop_str=["</s>", "[UNK]"],
    )
)

register_conv_template(
    Conversation(
        name="aquila",
        system_message="A chat between a curious human and an artificial intelligence assistant. "
        "The assistant gives helpful, detailed, and polite answers to the human's questions.",
        roles=("Human", "Assistant", "System"),
        messages=(),
        offset=0,
        sep_style=SeparatorStyle.ADD_COLON_TWO,
        sep="###",
        sep2="</s>",
        stop_str=["</s>", "[UNK]"],
    )
)

register_conv_template(
    Conversation(
        name="aquila-v1",
        roles=("<|startofpiece|>", "<|endofpiece|>", ""),
        messages=(),
        offset=0,
        sep_style=SeparatorStyle.NO_COLON_TWO,
        sep="",
        sep2="</s>",
        stop_str=["</s>", "<|endoftext|>"],
    )
)


if __name__ == "__main__":
    print("aquila template:")
    conv = get_conv_template("aquila")
    conv.append_message(conv.roles[0], "Hello!")
    conv.append_message(conv.roles[1], "Hi!")
    conv.append_message(conv.roles[0], "How are you?")
    conv.append_message(conv.roles[1], None)
    print(conv.get_prompt())

    print("\n")

    print("aquila-chat template:")
    conv = get_conv_template("aquila-chat")
    conv.append_message(conv.roles[0], "Hello!")
    conv.append_message(conv.roles[1], "Hi!")
    conv.append_message(conv.roles[0], "How are you?")
    conv.append_message(conv.roles[1], None)
    print(conv.get_prompt())

    print("\n")

    print("aquila-v1 template:")
    conv = get_conv_template("aquila-v1")
    conv.append_message(conv.roles[0], "Hello!")
    conv.append_message(conv.roles[1], "Hi!")
    conv.append_message(conv.roles[0], "How are you?")
    conv.append_message(conv.roles[1], None)
    print(conv.get_prompt())

    print("\n")

    print("aquila-legacy template:")
    conv = get_conv_template("aquila-legacy")
    conv.append_message(conv.roles[0], "Hello!")
    conv.append_message(conv.roles[1], "Hi!")
    conv.append_message(conv.roles[0], "How are you?")
    conv.append_message(conv.roles[1], None)
    print(conv.get_prompt())

    print("\n")

def set_random_seed(seed):
    """Set random seed for reproducability."""
    if seed is not None and seed > 0:
        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)

def covert_prompt_to_input_ids_with_history(text, history, tokenizer, max_token, convo_template="aquila-chat"):
    # aquila-chat as default
    conv = get_conv_template(convo_template)

    conv.append_message(conv.roles[1], None)
    conv.append_message(conv.roles[0], text)

    example = tokenizer.encode_plus(f"{conv.get_prompt()} ", None, max_length=None)['input_ids']

    if history is None or not isinstance(history, list):
      history = []

    while(len(history) > 0 and (len(example) < max_token)):
        tmp = history.pop()
        if tmp[0] == 'ASSISTANT':
            conv.append_message(conv.roles[1], tmp[1])
        else:
            conv.append_message(conv.roles[0], tmp[1])
        example = tokenizer.encode_plus(f"{conv.get_prompt()} ", None, max_length=None)['input_ids']

    if len(example) >= max_token:
        conv.messages.pop()
    conv.messages = conv.messages[::-1]
    print('model in:', conv.get_prompt())
    example = tokenizer.encode_plus(f"{conv.get_prompt()} ", None, max_length=None)['input_ids']

    return example

def predict(model, text, tokenizer=None,
            max_gen_len=200, top_p=0.95,
            seed=1234, topk=100,
            temperature=0.9, 
            sft=True, convo_template = "",
            device = "cuda",
            model_name="AquilaChat2-7B",
            history=None,
            **kwargs):

    vocab = tokenizer.get_vocab()

    id2word = {v:k for k, v in vocab.items()}

    
    template_map = {"AquilaChat2-7B": "aquila-v1",
                    "AquilaChat2-34B": "aquila-legacy",
                    "AquilaChat2-7B-16K": "aquila",
                    "AquilaChat2-34B-16K": "aquila"}
    if not convo_template:
        convo_template=template_map.get(model_name, "aquila-chat")

    set_random_seed(seed)
    if temperature == 0:
        topk = 1
        temperature = 1.0
    if sft:
        tokens = covert_prompt_to_input_ids_with_history(text, history=history, tokenizer=tokenizer, max_token=2048, convo_template=convo_template)
        tokens = torch.tensor(tokens)[None,].to(device)
    else :
        tokens = tokenizer.encode_plus(text)["input_ids"]
        print(tokenizer.decode(tokens))
        tokens = torch.tensor(tokens)[None,].to(device)
    input_length = len(tokens[0])
    with torch.no_grad():

        # instantiate logits processors
        logits_processor = LogitsProcessorList(
            [
                MinLengthLogitsProcessor(1, eos_token_id=100007),
            ]
        )
        # instantiate logits processors
        logits_warper = LogitsProcessorList(
            [
                TopPLogitsWarper(top_p),
                TopKLogitsWarper(topk),
                TemperatureLogitsWarper(temperature),
                
            ]
        )

        stopping_criteria = StoppingCriteriaList([MaxLengthCriteria(max_length=input_length + max_gen_len)])
        out = model.sample(
                            tokens,
                            logits_processor=logits_processor,
                            logits_warper=logits_warper,
                            stopping_criteria=stopping_criteria,
                            return_dict_in_generate=True, 
                            output_scores=True,
                        )

        
        # print(out)
        out_ids = out["sequences"][0][input_length:].cpu().numpy()

        out_scores = out["scores"]

        out_scores = torch.cat(out_scores, dim=0)
        out_scores = torch.nn.functional.softmax(out_scores, dim=-1).cpu().numpy()

        probs = []
        for i in range(len(out_ids)):
            probs.append(float(out_scores[i][out_ids[i]]))

        # print(f"probs is {probs}")

        convert_tokens = []
        for t in out_ids:
            if t == 100006:
                convert_tokens.append("[CLS]")
            else :
                convert_tokens.append(id2word.get(t, "[unkonwn_token]"))

        out_text = tokenizer.decode(out_ids.tolist())
        

        out = out_text

    if "[UNK]" in out:
        special_index = out.index("[UNK]")
        out = out[:special_index]
        token_length = len(tokenizer.encode_plus(out)["input_ids"])
        convert_tokens = convert_tokens[:token_length]
        probs = probs[:token_length]

    if "</s>" in out:
        special_index = out.index("</s>")
        out = out[: special_index]
        token_length = len(tokenizer.encode_plus(out)["input_ids"])
        convert_tokens = convert_tokens[:token_length]
        probs = probs[:token_length]

    if len(out) > 0 and out[0] == " ":
        out = out[1:]

        convert_tokens = convert_tokens[1:]
        probs = probs[1:]

    if isinstance(history, list):
        # Update history
        history.insert(0, ('ASSISTANT', out))
        history.insert(0, ('USER', text))

    return out
first commit 2024-11-21 15:49:20 +08:00			`"""`
			`Copied from https://github.com/lm-sys/FastChat.`
			`Later we will contribute our changes into it.`
			`"""`
			`import dataclasses`
			`from enum import auto, IntEnum`
			`from typing import List, Any, Dict`
			`import math`
			`from typing import List, Optional, Tuple, Union`
			`import random`
			`import numpy as np`

			`import torch`
			`import torch.utils.checkpoint`
			`from torch import nn`
			`from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss`

			`from transformers.activations import ACT2FN`
			`from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast, SequenceClassifierOutputWithPast`
			`from transformers.modeling_utils import PreTrainedModel`
			`from transformers.utils import add_start_docstrings, add_start_docstrings_to_model_forward, logging, replace_return_docstrings`
			`from transformers import (`
			`LogitsProcessorList,`
			`MinLengthLogitsProcessor,`
			`TopKLogitsWarper,`
			`TemperatureLogitsWarper,`
			`TopPLogitsWarper,`
			`StoppingCriteriaList,`
			`MaxLengthCriteria,`
			`BitsAndBytesConfig,`
			`)`



			`class SeparatorStyle(IntEnum):`
			`"""Separator styles."""`

			`ADD_COLON_SINGLE = auto()`
			`ADD_COLON_TWO = auto()`
			`ADD_COLON_SPACE_SINGLE = auto()`
			`NO_COLON_SINGLE = auto()`
			`NO_COLON_TWO = auto()`
			`ADD_NEW_LINE_SINGLE = auto()`


			`@dataclasses.dataclass`
			`class Conversation:`
			`"""A class that manages prompt templates and keeps all conversation history."""`

			`# The name of this template`
			`name: str`
			`# The template of the system prompt`
			`system_template: str = "{system_message}"`
			`# The system message`
			`system_message: str = ""`
			`# The names of two roles`
			`roles: List[str] = (("USER", "ASSISTANT"),)`
			`# All messages. Each item is (role, message).`
			`messages: List[List[str]] = ()`
			`# The number of few shot examples`
			`offset: int = 0`
			`# The separator style and configurations`
			`sep_style: SeparatorStyle = SeparatorStyle.ADD_COLON_SINGLE`
			`sep: str = "\n"`
			`sep2: str = None`
			`# Stop criteria (the default one is EOS token)`
			`stop_str: str = None`
			`# Stops generation if meeting any token in this list`
			`stop_token_ids: List[int] = None`

			`def get_prompt(self) -> str:`
			`"""Get the prompt for generation."""`
			`system_prompt = self.system_template.format(system_message=self.system_message)`
			`if self.sep_style == SeparatorStyle.ADD_COLON_SINGLE:`
			`ret = system_prompt + self.sep`
			`for role, message in self.messages:`
			`if message:`
			`ret += role + ": " + message + self.sep`
			`else:`
			`ret += role + ":"`
			`return ret`
			`elif self.sep_style == SeparatorStyle.ADD_COLON_TWO:`
			`seps = [self.sep, self.sep2]`
			`ret = system_prompt + seps[0]`
			`for i, (role, message) in enumerate(self.messages):`
			`if message:`
			`ret += role + ": " + message + seps[i % 2]`
			`else:`
			`ret += role + ":"`
			`return ret`
			`elif self.sep_style == SeparatorStyle.ADD_COLON_SPACE_SINGLE:`
			`ret = system_prompt + self.sep`
			`for role, message in self.messages:`
			`if message:`
			`ret += role + ": " + message + self.sep`
			`else:`
			`ret += role + ": " # must be end with a space`
			`return ret`
			`elif self.sep_style == SeparatorStyle.ADD_NEW_LINE_SINGLE:`
			`ret = "" if system_prompt == "" else system_prompt + self.sep`
			`for role, message in self.messages:`
			`if message:`
			`ret += role + "\n" + message + self.sep`
			`else:`
			`ret += role + "\n"`
			`return ret`
			`elif self.sep_style == SeparatorStyle.NO_COLON_SINGLE:`
			`ret = system_prompt`
			`for role, message in self.messages:`
			`if message:`
			`ret += role + message + self.sep`
			`else:`
			`ret += role`
			`return ret`
			`elif self.sep_style == SeparatorStyle.NO_COLON_TWO:`
			`seps = [self.sep, self.sep2]`
			`ret = system_prompt`
			`for i, (role, message) in enumerate(self.messages):`
			`if message:`
			`ret += role + message + seps[i % 2]`
			`else:`
			`ret += role`
			`return ret`

			`def set_system_message(self, system_message: str):`
			`"""Set the system message."""`
			`self.system_message = system_message`

			`def append_message(self, role: str, message: str):`
			`"""Append a new message."""`
			`self.messages.append([role, message])`

			`def update_last_message(self, message: str):`
			`"""Update the last output.`

			`The last message is typically set to be None when constructing the prompt,`
			`so we need to update it in-place after getting the response from a model.`
			`"""`
			`self.messages[-1][1] = message`

			`def copy(self):`
			`return Conversation(`
			`name=self.name,`
			`system_template=self.system_template,`
			`system_message=self.system_message,`
			`roles=self.roles,`
			`messages=[[x, y] for x, y in self.messages],`
			`offset=self.offset,`
			`sep_style=self.sep_style,`
			`sep=self.sep,`
			`sep2=self.sep2,`
			`stop_str=self.stop_str,`
			`stop_token_ids=self.stop_token_ids,`
			`)`

			`def dict(self):`
			`return {`
			`"template_name": self.name,`
			`"system_message": self.system_message,`
			`"roles": self.roles,`
			`"messages": self.messages,`
			`"offset": self.offset,`
			`}`


			`# A global registry for all conversation templates`
			`conv_templates: Dict[str, Conversation] = {}`


			`def register_conv_template(template: Conversation, override: bool = False):`
			`"""Register a new conversation template."""`
			`if not override:`
			`assert (`
			`template.name not in conv_templates`
			`), f"{template.name} has been registered."`

			`conv_templates[template.name] = template`


			`def get_conv_template(name: str) -> Conversation:`
			`"""Get a conversation template."""`
			`return conv_templates[name].copy()`

			`def get_conversation_template(model_path: str) -> Conversation:`
			`"""Get the default conversation template."""`
			`if "aquila-v1" in model_path:`
			`return get_conv_template("aquila-v1")`
			`elif "aquila-chat" in model_path:`
			`return get_conv_template("aquila-chat")`
			`elif "aquila-legacy" in model_path:`
			`return get_conv_template("aquila-legacy")`
			`else:`
			`return get_conv_template("aquila")`

			`# AquilaChat default template`
			`# source: https://github.com/FlagAI-Open/FlagAI/blob/master/examples/Aquila/Aquila-chat/cyg_conversation.py`
			`register_conv_template(`
			`Conversation(`
			`name="aquila-chat",`
			`system_message="A chat between a curious human and an artificial intelligence assistant. "`
			`"The assistant gives helpful, detailed, and polite answers to the human's questions.",`
			`roles=("Human", "Assistant", "System"),`
			`messages=(),`
			`offset=0,`
			`sep_style=SeparatorStyle.ADD_COLON_SINGLE,`
			`sep="###",`
			`sep2="",`
			`stop_str=["###", "</s>", "[UNK]"],`
			`)`
			`)`

			`register_conv_template(`
			`Conversation(`
			`name="aquila-legacy",`
			`system_message="A chat between a curious human and an artificial intelligence assistant. "`
			`"The assistant gives helpful, detailed, and polite answers to the human's questions.\n\n",`
			`roles=("### Human: ", "### Assistant: ", "System"),`
			`messages=(),`
			`offset=0,`
			`sep_style=SeparatorStyle.NO_COLON_TWO,`
			`sep="\n",`
			`sep2="</s>",`
			`stop_str=["</s>", "[UNK]"],`
			`)`
			`)`

			`register_conv_template(`
			`Conversation(`
			`name="aquila",`
			`system_message="A chat between a curious human and an artificial intelligence assistant. "`
			`"The assistant gives helpful, detailed, and polite answers to the human's questions.",`
			`roles=("Human", "Assistant", "System"),`
			`messages=(),`
			`offset=0,`
			`sep_style=SeparatorStyle.ADD_COLON_TWO,`
			`sep="###",`
			`sep2="</s>",`
			`stop_str=["</s>", "[UNK]"],`
			`)`
			`)`

			`register_conv_template(`
			`Conversation(`
			`name="aquila-v1",`
			`roles=("<\|startofpiece\|>", "<\|endofpiece\|>", ""),`
			`messages=(),`
			`offset=0,`
			`sep_style=SeparatorStyle.NO_COLON_TWO,`
			`sep="",`
			`sep2="</s>",`
			`stop_str=["</s>", "<\|endoftext\|>"],`
			`)`
			`)`


			`if __name__ == "__main__":`
			`print("aquila template:")`
			`conv = get_conv_template("aquila")`
			`conv.append_message(conv.roles[0], "Hello!")`
			`conv.append_message(conv.roles[1], "Hi!")`
			`conv.append_message(conv.roles[0], "How are you?")`
			`conv.append_message(conv.roles[1], None)`
			`print(conv.get_prompt())`

			`print("\n")`

			`print("aquila-chat template:")`
			`conv = get_conv_template("aquila-chat")`
			`conv.append_message(conv.roles[0], "Hello!")`
			`conv.append_message(conv.roles[1], "Hi!")`
			`conv.append_message(conv.roles[0], "How are you?")`
			`conv.append_message(conv.roles[1], None)`
			`print(conv.get_prompt())`

			`print("\n")`

			`print("aquila-v1 template:")`
			`conv = get_conv_template("aquila-v1")`
			`conv.append_message(conv.roles[0], "Hello!")`
			`conv.append_message(conv.roles[1], "Hi!")`
			`conv.append_message(conv.roles[0], "How are you?")`
			`conv.append_message(conv.roles[1], None)`
			`print(conv.get_prompt())`

			`print("\n")`

			`print("aquila-legacy template:")`
			`conv = get_conv_template("aquila-legacy")`
			`conv.append_message(conv.roles[0], "Hello!")`
			`conv.append_message(conv.roles[1], "Hi!")`
			`conv.append_message(conv.roles[0], "How are you?")`
			`conv.append_message(conv.roles[1], None)`
			`print(conv.get_prompt())`

			`print("\n")`

			`def set_random_seed(seed):`
			`"""Set random seed for reproducability."""`
			`if seed is not None and seed > 0:`
			`random.seed(seed)`
			`np.random.seed(seed)`
			`torch.manual_seed(seed)`

			`def covert_prompt_to_input_ids_with_history(text, history, tokenizer, max_token, convo_template="aquila-chat"):`
			`# aquila-chat as default`
			`conv = get_conv_template(convo_template)`

			`conv.append_message(conv.roles[1], None)`
			`conv.append_message(conv.roles[0], text)`

			`example = tokenizer.encode_plus(f"{conv.get_prompt()} ", None, max_length=None)['input_ids']`

			`if history is None or not isinstance(history, list):`
			`history = []`

			`while(len(history) > 0 and (len(example) < max_token)):`
			`tmp = history.pop()`
			`if tmp[0] == 'ASSISTANT':`
			`conv.append_message(conv.roles[1], tmp[1])`
			`else:`
			`conv.append_message(conv.roles[0], tmp[1])`
			`example = tokenizer.encode_plus(f"{conv.get_prompt()} ", None, max_length=None)['input_ids']`

			`if len(example) >= max_token:`
			`conv.messages.pop()`
			`conv.messages = conv.messages[::-1]`
			`print('model in:', conv.get_prompt())`
			`example = tokenizer.encode_plus(f"{conv.get_prompt()} ", None, max_length=None)['input_ids']`

			`return example`

			`def predict(model, text, tokenizer=None,`
			`max_gen_len=200, top_p=0.95,`
			`seed=1234, topk=100,`
			`temperature=0.9,`
			`sft=True, convo_template = "",`
			`device = "cuda",`
			`model_name="AquilaChat2-7B",`
			`history=None,`
			`**kwargs):`

			`vocab = tokenizer.get_vocab()`

			`id2word = {v:k for k, v in vocab.items()}`


			`template_map = {"AquilaChat2-7B": "aquila-v1",`
			`"AquilaChat2-34B": "aquila-legacy",`
			`"AquilaChat2-7B-16K": "aquila",`
			`"AquilaChat2-34B-16K": "aquila"}`
			`if not convo_template:`
			`convo_template=template_map.get(model_name, "aquila-chat")`

			`set_random_seed(seed)`
			`if temperature == 0:`
			`topk = 1`
			`temperature = 1.0`
			`if sft:`
			`tokens = covert_prompt_to_input_ids_with_history(text, history=history, tokenizer=tokenizer, max_token=2048, convo_template=convo_template)`
			`tokens = torch.tensor(tokens)[None,].to(device)`
			`else :`
			`tokens = tokenizer.encode_plus(text)["input_ids"]`
			`print(tokenizer.decode(tokens))`
			`tokens = torch.tensor(tokens)[None,].to(device)`
			`input_length = len(tokens[0])`
			`with torch.no_grad():`

			`# instantiate logits processors`
			`logits_processor = LogitsProcessorList(`
			`[`
			`MinLengthLogitsProcessor(1, eos_token_id=100007),`
			`]`
			`)`
			`# instantiate logits processors`
			`logits_warper = LogitsProcessorList(`
			`[`
			`TopPLogitsWarper(top_p),`
			`TopKLogitsWarper(topk),`
			`TemperatureLogitsWarper(temperature),`

			`]`
			`)`

			`stopping_criteria = StoppingCriteriaList([MaxLengthCriteria(max_length=input_length + max_gen_len)])`
			`out = model.sample(`
			`tokens,`
			`logits_processor=logits_processor,`
			`logits_warper=logits_warper,`
			`stopping_criteria=stopping_criteria,`
			`return_dict_in_generate=True,`
			`output_scores=True,`
			`)`


			`# print(out)`
			`out_ids = out["sequences"][0][input_length:].cpu().numpy()`

			`out_scores = out["scores"]`

			`out_scores = torch.cat(out_scores, dim=0)`
			`out_scores = torch.nn.functional.softmax(out_scores, dim=-1).cpu().numpy()`

			`probs = []`
			`for i in range(len(out_ids)):`
			`probs.append(float(out_scores[i][out_ids[i]]))`

			`# print(f"probs is {probs}")`

			`convert_tokens = []`
			`for t in out_ids:`
			`if t == 100006:`
			`convert_tokens.append("[CLS]")`
			`else :`
			`convert_tokens.append(id2word.get(t, "[unkonwn_token]"))`

			`out_text = tokenizer.decode(out_ids.tolist())`


			`out = out_text`

			`if "[UNK]" in out:`
			`special_index = out.index("[UNK]")`
			`out = out[:special_index]`
			`token_length = len(tokenizer.encode_plus(out)["input_ids"])`
			`convert_tokens = convert_tokens[:token_length]`
			`probs = probs[:token_length]`

			`if "</s>" in out:`
			`special_index = out.index("</s>")`
			`out = out[: special_index]`
			`token_length = len(tokenizer.encode_plus(out)["input_ids"])`
			`convert_tokens = convert_tokens[:token_length]`
			`probs = probs[:token_length]`

			`if len(out) > 0 and out[0] == " ":`
			`out = out[1:]`

			`convert_tokens = convert_tokens[1:]`
			`probs = probs[1:]`

			`if isinstance(history, list):`
			`# Update history`
			`history.insert(0, ('ASSISTANT', out))`
			`history.insert(0, ('USER', text))`

			`return out`