DeepSeek-V2-Lite-Chat_a1411.../tokenization_deepseek_fast.py

from typing import List, Optional, Union


from transformers.models.llama import LlamaTokenizerFast


class DeepseekTokenizerFast(LlamaTokenizerFast):

    def convert_ids_to_tokens(
        self, ids: Union[int, List[int]], skip_special_tokens: bool = False
    ) -> Union[str, List[str]]:
        """
        Converts a single index or a sequence of indices in a token or a sequence of tokens, using the vocabulary and
        added tokens.

        Args:
            ids (`int` or `List[int]`):
                The token id (or token ids) to convert to tokens.
            skip_special_tokens (`bool`, *optional*, defaults to `False`):
                Whether or not to remove special tokens in the decoding.

        Returns:
            `str` or `List[str]`: The decoded token(s).
        """
        if isinstance(ids, int):
            return self._convert_id_to_token(ids)
        tokens = []
        for index in ids:
            index = int(index)
            if skip_special_tokens and index in self.all_special_ids:
                continue
            token = self._tokenizer.id_to_token(index)
            tokens.append(token if token is not None else "")
        return tokens
    
    def _convert_id_to_token(self, index: int) -> Optional[str]:
        token = self._tokenizer.id_to_token(int(index))
        return token if token is not None else ""
first commit 2024-12-30 12:06:45 +08:00			`from typing import List, Optional, Union`


			`from transformers.models.llama import LlamaTokenizerFast`


			`class DeepseekTokenizerFast(LlamaTokenizerFast):`

			`def convert_ids_to_tokens(`
			`self, ids: Union[int, List[int]], skip_special_tokens: bool = False`
			`) -> Union[str, List[str]]:`
			`"""`
			`Converts a single index or a sequence of indices in a token or a sequence of tokens, using the vocabulary and`
			`added tokens.`

			`Args:`
			ids (`int` or `List[int]`):
			`The token id (or token ids) to convert to tokens.`
			skip_special_tokens (`bool`, optional, defaults to `False`):
			`Whether or not to remove special tokens in the decoding.`

			`Returns:`
			`str` or `List[str]`: The decoded token(s).
			`"""`
			`if isinstance(ids, int):`
			`return self._convert_id_to_token(ids)`
			`tokens = []`
			`for index in ids:`
			`index = int(index)`
			`if skip_special_tokens and index in self.all_special_ids:`
			`continue`
			`token = self._tokenizer.id_to_token(index)`
			`tokens.append(token if token is not None else "")`
			`return tokens`

			`def _convert_id_to_token(self, index: int) -> Optional[str]:`
			`token = self._tokenizer.id_to_token(int(index))`
			`return token if token is not None else ""`