llama

`mindnlp.transformers.models.llama.modeling_llama` ¶

modeling llama

`mindnlp.transformers.models.llama.modeling_llama.LlamaAttention` ¶

Bases: Module

Multi-headed attention from 'Attention Is All You Need' paper

Source code in mindnlp\transformers\models\llama\modeling_llama.py

class LlamaAttention(nn.Module):
    """Multi-headed attention from 'Attention Is All You Need' paper"""

    def __init__(self, config: LlamaConfig, layer_idx: Optional[int] = None):
        super().__init__()
        self.config = config
        self.layer_idx = layer_idx
        if layer_idx is None:
            logger.warning_once(
                f"Instantiating {self.__class__.__name__} without passing a `layer_idx` is not recommended and will "
                "lead to errors during the forward call if caching is used. Please make sure to provide a `layer_idx` "
                "when creating this class."
            )

        self.attention_dropout = config.attention_dropout
        self.hidden_size = config.hidden_size
        self.num_heads = config.num_attention_heads
        self.head_dim = self.hidden_size // self.num_heads
        self.num_key_value_heads = config.num_key_value_heads
        self.num_key_value_groups = self.num_heads // self.num_key_value_heads
        self.max_position_embeddings = config.max_position_embeddings
        self.rope_theta = config.rope_theta
        self.is_causal = True

        if (self.head_dim * self.num_heads) != self.hidden_size:
            raise ValueError(
                f"hidden_size must be divisible by num_heads (got `hidden_size`: {self.hidden_size}"
                f" and `num_heads`: {self.num_heads})."
            )

        self.q_proj = nn.Linear(self.hidden_size, self.num_heads * self.head_dim, bias=config.attention_bias)
        self.k_proj = nn.Linear(self.hidden_size, self.num_key_value_heads * self.head_dim, bias=config.attention_bias)
        self.v_proj = nn.Linear(self.hidden_size, self.num_key_value_heads * self.head_dim, bias=config.attention_bias)
        self.o_proj = nn.Linear(self.hidden_size, self.hidden_size, bias=config.attention_bias)

        # TODO (joao): remove in v4.45 (RoPE is computed in the model, not in the decoder layers)
        self.rotary_emb = LlamaRotaryEmbedding(config=self.config)

    def forward(
        self,
        hidden_states: mindspore.Tensor,
        attention_mask: Optional[mindspore.Tensor] = None,
        position_ids: Optional[mindspore.Tensor] = None,
        past_key_value: Optional[Cache] = None,
        output_attentions: bool = False,
        use_cache: bool = False,
        cache_position: Optional[mindspore.Tensor] = None,
        position_embeddings: Optional[Tuple[mindspore.Tensor, mindspore.Tensor]] = None,  # will become mandatory in v4.45
        **kwargs,
    ) -> Tuple[mindspore.Tensor, Optional[mindspore.Tensor], Optional[Tuple[mindspore.Tensor]]]:
        bsz, q_len, _ = hidden_states.shape

        if self.config.pretraining_tp > 1:
            key_value_slicing = (self.num_key_value_heads * self.head_dim) // self.config.pretraining_tp
            query_slices = self.q_proj.weight.split(
                (self.num_heads * self.head_dim) // self.config.pretraining_tp, dim=0
            )
            key_slices = self.k_proj.weight.split(key_value_slicing, dim=0)
            value_slices = self.v_proj.weight.split(key_value_slicing, dim=0)

            query_states = [F.linear(hidden_states, query_slices[i]) for i in range(self.config.pretraining_tp)]
            query_states = ops.cat(query_states, dim=-1)

            key_states = [F.linear(hidden_states, key_slices[i]) for i in range(self.config.pretraining_tp)]
            key_states = ops.cat(key_states, dim=-1)

            value_states = [F.linear(hidden_states, value_slices[i]) for i in range(self.config.pretraining_tp)]
            value_states = ops.cat(value_states, dim=-1)

        else:
            query_states = self.q_proj(hidden_states)
            key_states = self.k_proj(hidden_states)
            value_states = self.v_proj(hidden_states)

        query_states = ops.transpose(query_states.view(bsz, q_len, self.num_heads, self.head_dim), 1, 2)
        key_states = ops.transpose(key_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim), 1, 2)
        value_states = ops.transpose(value_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim), 1, 2)

        if position_embeddings is None:
            logger.warning_once(
                "The attention layers in this model are transitioning from computing the RoPE embeddings internally "
                "through `position_ids` (2D tensor with the indexes of the tokens), to using externally computed "
                "`position_embeddings` (Tuple of tensors, containing cos and sin). In v4.45 `position_ids` will be "
                "removed and `position_embeddings` will be mandatory."
            )
            cos, sin = self.rotary_emb(value_states, position_ids)
        else:
            cos, sin = position_embeddings
        query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin)

        if past_key_value is not None:
            # sin and cos are specific to RoPE models; cache_position needed for the static cache
            cache_kwargs = {"sin": sin, "cos": cos, "cache_position": cache_position}
            key_states, value_states = past_key_value.update(key_states, value_states, self.layer_idx, cache_kwargs)

        key_states = repeat_kv(key_states, self.num_key_value_groups)
        value_states = repeat_kv(value_states, self.num_key_value_groups)

        attn_weights = ops.matmul(query_states, ops.transpose(key_states, 2, 3)) / math.sqrt(self.head_dim)

        if attention_mask is not None:  # no matter the length, we just slice it
            # causal_mask = attention_mask[:, :, :, : key_states.shape[-2]]
            causal_mask = ops.narrow(attention_mask, 3, 0, key_states.shape[-2])
            attn_weights = attn_weights + causal_mask

        # upcast attention to fp32
        attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=mindspore.float32).to(query_states.dtype)
        attn_weights = nn.functional.dropout(attn_weights, p=self.attention_dropout, training=self.training)
        attn_output = ops.matmul(attn_weights, value_states)

        if attn_output.shape != (bsz, self.num_heads, q_len, self.head_dim):
            raise ValueError(
                f"`attn_output` should be of size {(bsz, self.num_heads, q_len, self.head_dim)}, but is"
                f" {attn_output.shape}"
            )

        attn_output = ops.transpose(attn_output, 1, 2)

        attn_output = attn_output.reshape(bsz, q_len, -1)

        if self.config.pretraining_tp > 1:
            attn_output = attn_output.split(self.hidden_size // self.config.pretraining_tp, dim=2)
            o_proj_slices = self.o_proj.weight.split(self.hidden_size // self.config.pretraining_tp, dim=1)
            attn_output = sum(F.linear(attn_output[i], o_proj_slices[i]) for i in range(self.config.pretraining_tp))
        else:
            attn_output = self.o_proj(attn_output)

        if not output_attentions:
            attn_weights = None

        return attn_output, attn_weights, past_key_value

`mindnlp.transformers.models.llama.modeling_llama.LlamaDecoderLayer` ¶

Bases: Module

Source code in mindnlp\transformers\models\llama\modeling_llama.py

class LlamaDecoderLayer(nn.Module):
    def __init__(self, config: LlamaConfig, layer_idx: int):
        super().__init__()
        self.hidden_size = config.hidden_size

        self.self_attn = LLAMA_ATTENTION_CLASSES[config._attn_implementation](config=config, layer_idx=layer_idx)

        self.mlp = LlamaMLP(config)
        self.input_layernorm = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
        self.post_attention_layernorm = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps)

    def forward(
        self,
        hidden_states: mindspore.Tensor,
        attention_mask: Optional[mindspore.Tensor] = None,
        position_ids: Optional[mindspore.Tensor] = None,
        past_key_value: Optional[Cache] = None,
        output_attentions: Optional[bool] = False,
        use_cache: Optional[bool] = False,
        cache_position: Optional[mindspore.Tensor] = None,
        position_embeddings: Optional[Tuple[mindspore.Tensor, mindspore.Tensor]] = None,  # will become mandatory in v4.45
        **kwargs,
    ) -> Tuple[mindspore.Tensor, Optional[Tuple[mindspore.Tensor, mindspore.Tensor]]]:
        """
        Args:
            hidden_states (`mindspore.Tensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
            attention_mask (`mindspore.Tensor`, *optional*):
                attention mask of size `(batch_size, sequence_length)` if flash attention is used or `(batch_size, 1,
                query_sequence_length, key_sequence_length)` if default attention is used.
            output_attentions (`bool`, *optional*):
                Whether or not to return the attentions tensors of all attention layers. See `attentions` under
                returned tensors for more detail.
            use_cache (`bool`, *optional*):
                If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
                (see `past_key_values`).
            past_key_value (`Tuple(mindspore.Tensor)`, *optional*): cached past key and value projection states
            cache_position (`mindspore.Tensor` of shape `(sequence_length)`, *optional*):
                Indices depicting the position of the input sequence tokens in the sequence
            position_embeddings (`Tuple[mindspore.Tensor, mindspore.Tensor]`, *optional*):
                Tuple containing the cosine and sine positional embeddings of shape `(batch_size, seq_len, head_dim)`,
                with `head_dim` being the embedding dimension of each attention head.
            kwargs (`dict`, *optional*):
                Arbitrary kwargs to be ignored, used for FSDP and other methods that injects code
                into the model
        """
        residual = hidden_states

        hidden_states = self.input_layernorm(hidden_states)

        # Self Attention
        hidden_states, self_attn_weights, present_key_value = self.self_attn(
            hidden_states=hidden_states,
            attention_mask=attention_mask,
            position_ids=position_ids,
            past_key_value=past_key_value,
            output_attentions=output_attentions,
            use_cache=use_cache,
            cache_position=cache_position,
            position_embeddings=position_embeddings,
            **kwargs,
        )
        hidden_states = residual + hidden_states

        # Fully Connected
        residual = hidden_states
        hidden_states = self.post_attention_layernorm(hidden_states)
        hidden_states = self.mlp(hidden_states)
        hidden_states = residual + hidden_states

        outputs = (hidden_states,)

        if output_attentions:
            outputs += (self_attn_weights,)

        if use_cache:
            outputs += (present_key_value,)

        return outputs

`mindnlp.transformers.models.llama.modeling_llama.LlamaDecoderLayer.forward(hidden_states, attention_mask=None, position_ids=None, past_key_value=None, output_attentions=False, use_cache=False, cache_position=None, position_embeddings=None, **kwargs)` ¶

PARAMETER	DESCRIPTION
`hidden_states`	input to the layer of shape `(batch, seq_len, embed_dim)` TYPE: `mindspore.Tensor`
`attention_mask`	attention mask of size `(batch_size, sequence_length)` if flash attention is used or `(batch_size, 1, query_sequence_length, key_sequence_length)` if default attention is used. TYPE: `mindspore.Tensor`, optional DEFAULT: `None`
`output_attentions`	Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned tensors for more detail. TYPE: `bool`, optional DEFAULT: `False`
`use_cache`	If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding (see `past_key_values`). TYPE: `bool`, optional DEFAULT: `False`
`past_key_value`	cached past key and value projection states TYPE: `Tuple(mindspore.Tensor)`, optional DEFAULT: `None`
`cache_position`	Indices depicting the position of the input sequence tokens in the sequence TYPE: `mindspore.Tensor` of shape `(sequence_length)`, optional DEFAULT: `None`
`position_embeddings`	Tuple containing the cosine and sine positional embeddings of shape `(batch_size, seq_len, head_dim)`, with `head_dim` being the embedding dimension of each attention head. TYPE: `Tuple[mindspore.Tensor, mindspore.Tensor]`, optional DEFAULT: `None`
`kwargs`	Arbitrary kwargs to be ignored, used for FSDP and other methods that injects code into the model TYPE: `dict`, optional DEFAULT: `{}`

Source code in mindnlp\transformers\models\llama\modeling_llama.py

def forward(
    self,
    hidden_states: mindspore.Tensor,
    attention_mask: Optional[mindspore.Tensor] = None,
    position_ids: Optional[mindspore.Tensor] = None,
    past_key_value: Optional[Cache] = None,
    output_attentions: Optional[bool] = False,
    use_cache: Optional[bool] = False,
    cache_position: Optional[mindspore.Tensor] = None,
    position_embeddings: Optional[Tuple[mindspore.Tensor, mindspore.Tensor]] = None,  # will become mandatory in v4.45
    **kwargs,
) -> Tuple[mindspore.Tensor, Optional[Tuple[mindspore.Tensor, mindspore.Tensor]]]:
    """
    Args:
        hidden_states (`mindspore.Tensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
        attention_mask (`mindspore.Tensor`, *optional*):
            attention mask of size `(batch_size, sequence_length)` if flash attention is used or `(batch_size, 1,
            query_sequence_length, key_sequence_length)` if default attention is used.
        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under
            returned tensors for more detail.
        use_cache (`bool`, *optional*):
            If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
            (see `past_key_values`).
        past_key_value (`Tuple(mindspore.Tensor)`, *optional*): cached past key and value projection states
        cache_position (`mindspore.Tensor` of shape `(sequence_length)`, *optional*):
            Indices depicting the position of the input sequence tokens in the sequence
        position_embeddings (`Tuple[mindspore.Tensor, mindspore.Tensor]`, *optional*):
            Tuple containing the cosine and sine positional embeddings of shape `(batch_size, seq_len, head_dim)`,
            with `head_dim` being the embedding dimension of each attention head.
        kwargs (`dict`, *optional*):
            Arbitrary kwargs to be ignored, used for FSDP and other methods that injects code
            into the model
    """
    residual = hidden_states

    hidden_states = self.input_layernorm(hidden_states)

    # Self Attention
    hidden_states, self_attn_weights, present_key_value = self.self_attn(
        hidden_states=hidden_states,
        attention_mask=attention_mask,
        position_ids=position_ids,
        past_key_value=past_key_value,
        output_attentions=output_attentions,
        use_cache=use_cache,
        cache_position=cache_position,
        position_embeddings=position_embeddings,
        **kwargs,
    )
    hidden_states = residual + hidden_states

    # Fully Connected
    residual = hidden_states
    hidden_states = self.post_attention_layernorm(hidden_states)
    hidden_states = self.mlp(hidden_states)
    hidden_states = residual + hidden_states

    outputs = (hidden_states,)

    if output_attentions:
        outputs += (self_attn_weights,)

    if use_cache:
        outputs += (present_key_value,)

    return outputs

`mindnlp.transformers.models.llama.modeling_llama.LlamaDynamicNTKScalingRotaryEmbedding` ¶

Bases: LlamaRotaryEmbedding

LlamaRotaryEmbedding extended with Dynamic NTK scaling. Credits to the Reddit users /u/bloc97 and /u/emozilla

Source code in mindnlp\transformers\models\llama\modeling_llama.py

class LlamaDynamicNTKScalingRotaryEmbedding(LlamaRotaryEmbedding):
    """LlamaRotaryEmbedding extended with Dynamic NTK scaling. Credits to the Reddit users /u/bloc97 and /u/emozilla"""

    def __init__(self, *args, **kwargs):
        logger.warning_once(
            "`LlamaDynamicNTKScalingRotaryEmbedding` is deprecated an will be removed in v4.45. Please use "
            "`LlamaRotaryEmbedding`, which now also does dynamic ntk scaling (simply pass the model config to "
            "__init__)."
        )
        kwargs["rope_type"] = "dynamic"
        super().__init__(*args, **kwargs)

`mindnlp.transformers.models.llama.modeling_llama.LlamaForCausalLM` ¶

Bases: LlamaPreTrainedModel

Source code in mindnlp\transformers\models\llama\modeling_llama.py

class LlamaForCausalLM(LlamaPreTrainedModel):
    _tied_weights_keys = ["lm_head.weight"]

    def __init__(self, config):
        super().__init__(config)
        self.model = LlamaModel(config)
        self.vocab_size = config.vocab_size
        self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)

        # Initialize weights and apply final processing
        self.post_init()

    def get_input_embeddings(self):
        return self.model.embed_tokens

    def set_input_embeddings(self, value):
        self.model.embed_tokens = value

    def get_output_embeddings(self):
        return self.lm_head

    def set_output_embeddings(self, new_embeddings):
        self.lm_head = new_embeddings

    def set_decoder(self, decoder):
        self.model = decoder

    def get_decoder(self):
        return self.model

    def forward(
        self,
        input_ids: mindspore.Tensor = None,
        attention_mask: Optional[mindspore.Tensor] = None,
        position_ids: Optional[mindspore.Tensor] = None,
        past_key_values: Optional[Union[Cache, List[mindspore.Tensor]]] = None,
        inputs_embeds: Optional[mindspore.Tensor] = None,
        labels: Optional[mindspore.Tensor] = None,
        use_cache: Optional[bool] = None,
        output_attentions: Optional[bool] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
        cache_position: Optional[mindspore.Tensor] = None,
        num_logits_to_keep: int = 0,
    ) -> Union[Tuple, CausalLMOutputWithPast]:
        r"""
        Args:
            labels (`mindspore.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
                Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
                config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
                (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.

        Returns:

        Example:

        ```python
        >>> from transformers import AutoTokenizer, LlamaForCausalLM

        >>> model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf")
        >>> tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")

        >>> prompt = "Hey, are you conscious? Can you talk to me?"
        >>> inputs = tokenizer(prompt, return_tensors="ms")

        >>> # Generate
        >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
        >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
        "Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you."
        ```"""
        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
        output_hidden_states = (
            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
        )
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
        outputs = self.model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            position_ids=position_ids,
            past_key_values=past_key_values,
            inputs_embeds=inputs_embeds,
            use_cache=use_cache,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
            cache_position=cache_position,
        )

        hidden_states = outputs[0]
        if self.config.pretraining_tp > 1:
            lm_head_slices = self.lm_head.weight.split(self.vocab_size // self.config.pretraining_tp, dim=0)
            logits = [F.linear(hidden_states, lm_head_slices[i]) for i in range(self.config.pretraining_tp)]
            logits = ops.cat(logits, dim=-1)
        else:
            logits = self.lm_head(hidden_states[:, -num_logits_to_keep:, :]).float()
        logits = logits.float()

        loss = None
        if labels is not None:
            # Shift so that tokens < n predict n
            shift_logits = logits[..., :-1, :]
            shift_labels = labels[..., 1:]
            # Flatten the tokens
            loss_fct = CrossEntropyLoss()
            shift_logits = shift_logits.view(-1, self.config.vocab_size)
            shift_labels = shift_labels.view(-1)
            # Enable model parallelism
            loss = loss_fct(shift_logits, shift_labels)

        if not return_dict:
            output = (logits,) + outputs[1:]
            return (loss,) + output if loss is not None else output

        return CausalLMOutputWithPast(
            loss=loss,
            logits=logits,
            past_key_values=outputs.past_key_values,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )

    def prepare_inputs_for_generation(
        self,
        input_ids,
        past_key_values=None,
        attention_mask=None,
        inputs_embeds=None,
        cache_position=None,
        position_ids=None,
        use_cache=True,
        num_logits_to_keep=None,
        **kwargs,
    ):
        # If we have cache: let's slice `input_ids` through `cache_position`, to keep only the unprocessed tokens
        # Exception 1: when passing input_embeds, input_ids may be missing entries
        # Exception 2: some generation methods do special slicing of input_ids, so we don't need to do it here
        if past_key_values is not None:
            if inputs_embeds is not None:  # Exception 1
                if 0 not in input_ids.shape:
                    # input_ids = input_ids[:, -cache_position.shape[0] :]
                    input_ids = ops.narrow(input_ids, 1, input_ids.shape[1] - cache_position.shape[0], cache_position.shape[0])
            elif input_ids.shape[1] != cache_position.shape[0]:  # Default case (the "else", a no op, is Exception 2)
                # input_ids = input_ids[:, cache_position]
                input_ids = ops.index_select(input_ids, -1, cache_position)

        if attention_mask is not None and position_ids is None:
            # create position_ids on the fly for batch generation
            position_ids = ops.cumsum(attention_mask.int(), -1) - 1
            position_ids = ops.masked_fill(position_ids, attention_mask == 0, 1)
            if past_key_values:
                # position_ids = position_ids[:, -input_ids.shape[1] :]
                position_ids = ops.narrow(position_ids, 1, position_ids.shape[1] - input_ids.shape[1], input_ids.shape[1])
        # if `inputs_embeds` are passed, we only want to use them in the 1st generation step
        if inputs_embeds is not None and cache_position[0] == 0:
            model_inputs = {"inputs_embeds": inputs_embeds}
        else:
            model_inputs = {"input_ids": input_ids}

        if isinstance(past_key_values, StaticCache) and attention_mask.ndim == 2:
            if inputs_embeds is not None:
                batch_size, sequence_length = inputs_embeds.shape
            else:
                batch_size, sequence_length = input_ids.shape

            dtype = self.lm_head.weight.dtype
            min_dtype = float(ops.finfo(dtype).min)

            attention_mask = _prepare_4d_causal_attention_mask_with_cache_position(
                attention_mask,
                sequence_length=sequence_length,
                target_length=past_key_values.get_max_length(),
                dtype=dtype,
                min_dtype=min_dtype,
                cache_position=cache_position,
                batch_size=batch_size,
            )

        if num_logits_to_keep is not None:
            model_inputs["num_logits_to_keep"] = num_logits_to_keep

        model_inputs.update(
            {
                "position_ids": position_ids,
                "cache_position": cache_position,
                "past_key_values": past_key_values,
                "use_cache": use_cache,
                "attention_mask": attention_mask,
            }
        )
        return model_inputs

`mindnlp.transformers.models.llama.modeling_llama.LlamaForCausalLM.forward(input_ids=None, attention_mask=None, position_ids=None, past_key_values=None, inputs_embeds=None, labels=None, use_cache=None, output_attentions=None, output_hidden_states=None, return_dict=None, cache_position=None, num_logits_to_keep=0)` ¶

PARAMETER	DESCRIPTION
`labels`	Labels for computing the masked language modeling loss. Indices should either be in `[0, ..., config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`. TYPE: `mindspore.Tensor` of shape `(batch_size, sequence_length)`, optional DEFAULT: `None`

Example:

>>> from transformers import AutoTokenizer, LlamaForCausalLM

>>> model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf")
>>> tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")

>>> prompt = "Hey, are you conscious? Can you talk to me?"
>>> inputs = tokenizer(prompt, return_tensors="ms")

>>> # Generate
>>> generate_ids = model.generate(inputs.input_ids, max_length=30)
>>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
"Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you."

Source code in mindnlp\transformers\models\llama\modeling_llama.py

def forward(
    self,
    input_ids: mindspore.Tensor = None,
    attention_mask: Optional[mindspore.Tensor] = None,
    position_ids: Optional[mindspore.Tensor] = None,
    past_key_values: Optional[Union[Cache, List[mindspore.Tensor]]] = None,
    inputs_embeds: Optional[mindspore.Tensor] = None,
    labels: Optional[mindspore.Tensor] = None,
    use_cache: Optional[bool] = None,
    output_attentions: Optional[bool] = None,
    output_hidden_states: Optional[bool] = None,
    return_dict: Optional[bool] = None,
    cache_position: Optional[mindspore.Tensor] = None,
    num_logits_to_keep: int = 0,
) -> Union[Tuple, CausalLMOutputWithPast]:
    r"""
    Args:
        labels (`mindspore.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
            config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
            (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.

    Returns:

    Example:

    ```python
    >>> from transformers import AutoTokenizer, LlamaForCausalLM

    >>> model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf")
    >>> tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")

    >>> prompt = "Hey, are you conscious? Can you talk to me?"
    >>> inputs = tokenizer(prompt, return_tensors="ms")

    >>> # Generate
    >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
    >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
    "Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you."
    ```"""
    output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
    output_hidden_states = (
        output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
    )
    return_dict = return_dict if return_dict is not None else self.config.use_return_dict

    # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
    outputs = self.model(
        input_ids=input_ids,
        attention_mask=attention_mask,
        position_ids=position_ids,
        past_key_values=past_key_values,
        inputs_embeds=inputs_embeds,
        use_cache=use_cache,
        output_attentions=output_attentions,
        output_hidden_states=output_hidden_states,
        return_dict=return_dict,
        cache_position=cache_position,
    )

    hidden_states = outputs[0]
    if self.config.pretraining_tp > 1:
        lm_head_slices = self.lm_head.weight.split(self.vocab_size // self.config.pretraining_tp, dim=0)
        logits = [F.linear(hidden_states, lm_head_slices[i]) for i in range(self.config.pretraining_tp)]
        logits = ops.cat(logits, dim=-1)
    else:
        logits = self.lm_head(hidden_states[:, -num_logits_to_keep:, :]).float()
    logits = logits.float()

    loss = None
    if labels is not None:
        # Shift so that tokens < n predict n
        shift_logits = logits[..., :-1, :]
        shift_labels = labels[..., 1:]
        # Flatten the tokens
        loss_fct = CrossEntropyLoss()
        shift_logits = shift_logits.view(-1, self.config.vocab_size)
        shift_labels = shift_labels.view(-1)
        # Enable model parallelism
        loss = loss_fct(shift_logits, shift_labels)

    if not return_dict:
        output = (logits,) + outputs[1:]
        return (loss,) + output if loss is not None else output

    return CausalLMOutputWithPast(
        loss=loss,
        logits=logits,
        past_key_values=outputs.past_key_values,
        hidden_states=outputs.hidden_states,
        attentions=outputs.attentions,
    )

`mindnlp.transformers.models.llama.modeling_llama.LlamaForQuestionAnswering` ¶

Bases: LlamaPreTrainedModel

Source code in mindnlp\transformers\models\llama\modeling_llama.py

class LlamaForQuestionAnswering(LlamaPreTrainedModel):
    base_model_prefix = "transformer"

    # Copied from transformers.models.bloom.modeling_bloom.BloomForQuestionAnswering.__init__ with Bloom->Llama
    def __init__(self, config):
        super().__init__(config)
        self.transformer = LlamaModel(config)
        self.qa_outputs = nn.Linear(config.hidden_size, 2)

        # Initialize weights and apply final processing
        self.post_init()

    def get_input_embeddings(self):
        return self.transformer.embed_tokens

    def set_input_embeddings(self, value):
        self.transformer.embed_tokens = value

    def forward(
        self,
        input_ids: Optional[mindspore.Tensor] = None,
        attention_mask: Optional[mindspore.Tensor] = None,
        position_ids: Optional[mindspore.Tensor] = None,
        past_key_values: Optional[Union[Cache, List[mindspore.Tensor]]] = None,
        inputs_embeds: Optional[mindspore.Tensor] = None,
        start_positions: Optional[mindspore.Tensor] = None,
        end_positions: Optional[mindspore.Tensor] = None,
        output_attentions: Optional[bool] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
    ) -> Union[Tuple, QuestionAnsweringModelOutput]:
        r"""
        start_positions (`mindspore.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for position (index) of the start of the labelled span for computing the token classification loss.
            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
            are not taken into account for computing the loss.
        end_positions (`mindspore.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for position (index) of the end of the labelled span for computing the token classification loss.
            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
            are not taken into account for computing the loss.
        """
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        outputs = self.transformer(
            input_ids,
            attention_mask=attention_mask,
            position_ids=position_ids,
            past_key_values=past_key_values,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )

        sequence_output = outputs[0]

        logits = self.qa_outputs(sequence_output)
        start_logits, end_logits = ops.split(logits, 1, dim=-1)
        start_logits = start_logits.squeeze(-1)
        end_logits = end_logits.squeeze(-1)

        total_loss = None
        if start_positions is not None and end_positions is not None:
            # If we are on multi-GPU, split add a dimension
            if len(start_positions.shape) > 1:
                start_positions = start_positions.squeeze(-1)
            if len(end_positions.shape) > 1:
                end_positions = end_positions.squeeze(-1)
            # sometimes the start/end positions are outside our model inputs, we ignore these terms
            ignored_index = start_logits.shape[1]
            start_positions = start_positions.clamp(0, ignored_index)
            end_positions = end_positions.clamp(0, ignored_index)

            loss_fct = CrossEntropyLoss(ignore_index=ignored_index)
            start_loss = loss_fct(start_logits, start_positions)
            end_loss = loss_fct(end_logits, end_positions)
            total_loss = (start_loss + end_loss) / 2

        if not return_dict:
            output = (start_logits, end_logits) + outputs[2:]
            return ((total_loss,) + output) if total_loss is not None else output

        return QuestionAnsweringModelOutput(
            loss=total_loss,
            start_logits=start_logits,
            end_logits=end_logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )

`mindnlp.transformers.models.llama.modeling_llama.LlamaForQuestionAnswering.forward(input_ids=None, attention_mask=None, position_ids=None, past_key_values=None, inputs_embeds=None, start_positions=None, end_positions=None, output_attentions=None, output_hidden_states=None, return_dict=None)` ¶

start_positions (mindspore.Tensor of shape (batch_size,), optional): Labels for position (index) of the start of the labelled span for computing the token classification loss. Positions are clamped to the length of the sequence (sequence_length). Position outside of the sequence are not taken into account for computing the loss. end_positions (mindspore.Tensor of shape (batch_size,), optional): Labels for position (index) of the end of the labelled span for computing the token classification loss. Positions are clamped to the length of the sequence (sequence_length). Position outside of the sequence are not taken into account for computing the loss.

Source code in mindnlp\transformers\models\llama\modeling_llama.py

def forward(
    self,
    input_ids: Optional[mindspore.Tensor] = None,
    attention_mask: Optional[mindspore.Tensor] = None,
    position_ids: Optional[mindspore.Tensor] = None,
    past_key_values: Optional[Union[Cache, List[mindspore.Tensor]]] = None,
    inputs_embeds: Optional[mindspore.Tensor] = None,
    start_positions: Optional[mindspore.Tensor] = None,
    end_positions: Optional[mindspore.Tensor] = None,
    output_attentions: Optional[bool] = None,
    output_hidden_states: Optional[bool] = None,
    return_dict: Optional[bool] = None,
) -> Union[Tuple, QuestionAnsweringModelOutput]:
    r"""
    start_positions (`mindspore.Tensor` of shape `(batch_size,)`, *optional*):
        Labels for position (index) of the start of the labelled span for computing the token classification loss.
        Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
        are not taken into account for computing the loss.
    end_positions (`mindspore.Tensor` of shape `(batch_size,)`, *optional*):
        Labels for position (index) of the end of the labelled span for computing the token classification loss.
        Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
        are not taken into account for computing the loss.
    """
    return_dict = return_dict if return_dict is not None else self.config.use_return_dict

    outputs = self.transformer(
        input_ids,
        attention_mask=attention_mask,
        position_ids=position_ids,
        past_key_values=past_key_values,
        inputs_embeds=inputs_embeds,
        output_attentions=output_attentions,
        output_hidden_states=output_hidden_states,
        return_dict=return_dict,
    )

    sequence_output = outputs[0]

    logits = self.qa_outputs(sequence_output)
    start_logits, end_logits = ops.split(logits, 1, dim=-1)
    start_logits = start_logits.squeeze(-1)
    end_logits = end_logits.squeeze(-1)

    total_loss = None
    if start_positions is not None and end_positions is not None:
        # If we are on multi-GPU, split add a dimension
        if len(start_positions.shape) > 1:
            start_positions = start_positions.squeeze(-1)
        if len(end_positions.shape) > 1:
            end_positions = end_positions.squeeze(-1)
        # sometimes the start/end positions are outside our model inputs, we ignore these terms
        ignored_index = start_logits.shape[1]
        start_positions = start_positions.clamp(0, ignored_index)
        end_positions = end_positions.clamp(0, ignored_index)

        loss_fct = CrossEntropyLoss(ignore_index=ignored_index)
        start_loss = loss_fct(start_logits, start_positions)
        end_loss = loss_fct(end_logits, end_positions)
        total_loss = (start_loss + end_loss) / 2

    if not return_dict:
        output = (start_logits, end_logits) + outputs[2:]
        return ((total_loss,) + output) if total_loss is not None else output

    return QuestionAnsweringModelOutput(
        loss=total_loss,
        start_logits=start_logits,
        end_logits=end_logits,
        hidden_states=outputs.hidden_states,
        attentions=outputs.attentions,
    )

`mindnlp.transformers.models.llama.modeling_llama.LlamaForSequenceClassification` ¶

Bases: LlamaPreTrainedModel

Source code in mindnlp\transformers\models\llama\modeling_llama.py

class LlamaForSequenceClassification(LlamaPreTrainedModel):
    def __init__(self, config):
        super().__init__(config)
        self.num_labels = config.num_labels
        self.model = LlamaModel(config)
        self.score = nn.Linear(config.hidden_size, self.num_labels, bias=False)

        # Initialize weights and apply final processing
        self.post_init()

    def get_input_embeddings(self):
        return self.model.embed_tokens

    def set_input_embeddings(self, value):
        self.model.embed_tokens = value

    def forward(
        self,
        input_ids: Optional[mindspore.Tensor] = None,
        attention_mask: Optional[mindspore.Tensor] = None,
        position_ids: Optional[mindspore.Tensor] = None,
        past_key_values: Optional[Union[Cache, List[mindspore.Tensor]]] = None,
        inputs_embeds: Optional[mindspore.Tensor] = None,
        labels: Optional[mindspore.Tensor] = None,
        use_cache: Optional[bool] = None,
        output_attentions: Optional[bool] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
    ) -> Union[Tuple, SequenceClassifierOutputWithPast]:
        r"""
        labels (`mindspore.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        """
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        transformer_outputs = self.model(
            input_ids,
            attention_mask=attention_mask,
            position_ids=position_ids,
            past_key_values=past_key_values,
            inputs_embeds=inputs_embeds,
            use_cache=use_cache,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )
        hidden_states = transformer_outputs[0]
        logits = self.score(hidden_states)

        if input_ids is not None:
            batch_size = input_ids.shape[0]
        else:
            batch_size = inputs_embeds.shape[0]

        if self.config.pad_token_id is None and batch_size != 1:
            raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
        if self.config.pad_token_id is None:
            sequence_lengths = -1
        else:
            if input_ids is not None:
                # if no pad token found, use modulo instead of reverse indexing for ONNX compatibility
                sequence_lengths = ops.eq(input_ids, self.config.pad_token_id).int().argmax(-1) - 1
                sequence_lengths = sequence_lengths % input_ids.shape[-1]
            else:
                sequence_lengths = -1

        if ON_ORANGE_PI:
            if isinstance(sequence_lengths, mindspore.Tensor):
                sequence_lengths = sequence_lengths.to(mindspore.int32)
            pooled_logits = ops.getitem(logits, (ops.arange(batch_size), sequence_lengths))
        else:
            pooled_logits = logits[ops.arange(batch_size), sequence_lengths]

        loss = None
        if labels is not None:
            if self.config.problem_type is None:
                if self.num_labels == 1:
                    self.config.problem_type = "regression"
                elif self.num_labels > 1 and labels.dtype in (mindspore.int64, mindspore.int32):
                    self.config.problem_type = "single_label_classification"
                else:
                    self.config.problem_type = "multi_label_classification"

            if self.config.problem_type == "regression":
                loss_fct = MSELoss()
                if self.num_labels == 1:
                    loss = loss_fct(pooled_logits.squeeze(), labels.squeeze())
                else:
                    loss = loss_fct(pooled_logits, labels)
            elif self.config.problem_type == "single_label_classification":
                loss_fct = CrossEntropyLoss()
                loss = loss_fct(pooled_logits.view(-1, self.num_labels), labels.view(-1))
            elif self.config.problem_type == "multi_label_classification":
                loss_fct = BCEWithLogitsLoss()
                loss = loss_fct(pooled_logits, labels)
        if not return_dict:
            output = (pooled_logits,) + transformer_outputs[1:]
            return ((loss,) + output) if loss is not None else output

        return SequenceClassifierOutputWithPast(
            loss=loss,
            logits=pooled_logits,
            past_key_values=transformer_outputs.past_key_values,
            hidden_states=transformer_outputs.hidden_states,
            attentions=transformer_outputs.attentions,
        )

`mindnlp.transformers.models.llama.modeling_llama.LlamaForSequenceClassification.forward(input_ids=None, attention_mask=None, position_ids=None, past_key_values=None, inputs_embeds=None, labels=None, use_cache=None, output_attentions=None, output_hidden_states=None, return_dict=None)` ¶

labels (mindspore.Tensor of shape (batch_size,), optional): Labels for computing the sequence classification/regression loss. Indices should be in [0, ..., config.num_labels - 1]. If config.num_labels == 1 a regression loss is computed (Mean-Square loss), If config.num_labels > 1 a classification loss is computed (Cross-Entropy).

Source code in mindnlp\transformers\models\llama\modeling_llama.py

def forward(
    self,
    input_ids: Optional[mindspore.Tensor] = None,
    attention_mask: Optional[mindspore.Tensor] = None,
    position_ids: Optional[mindspore.Tensor] = None,
    past_key_values: Optional[Union[Cache, List[mindspore.Tensor]]] = None,
    inputs_embeds: Optional[mindspore.Tensor] = None,
    labels: Optional[mindspore.Tensor] = None,
    use_cache: Optional[bool] = None,
    output_attentions: Optional[bool] = None,
    output_hidden_states: Optional[bool] = None,
    return_dict: Optional[bool] = None,
) -> Union[Tuple, SequenceClassifierOutputWithPast]:
    r"""
    labels (`mindspore.Tensor` of shape `(batch_size,)`, *optional*):
        Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
        config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
        `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
    """
    return_dict = return_dict if return_dict is not None else self.config.use_return_dict

    transformer_outputs = self.model(
        input_ids,
        attention_mask=attention_mask,
        position_ids=position_ids,
        past_key_values=past_key_values,
        inputs_embeds=inputs_embeds,
        use_cache=use_cache,
        output_attentions=output_attentions,
        output_hidden_states=output_hidden_states,
        return_dict=return_dict,
    )
    hidden_states = transformer_outputs[0]
    logits = self.score(hidden_states)

    if input_ids is not None:
        batch_size = input_ids.shape[0]
    else:
        batch_size = inputs_embeds.shape[0]

    if self.config.pad_token_id is None and batch_size != 1:
        raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
    if self.config.pad_token_id is None:
        sequence_lengths = -1
    else:
        if input_ids is not None:
            # if no pad token found, use modulo instead of reverse indexing for ONNX compatibility
            sequence_lengths = ops.eq(input_ids, self.config.pad_token_id).int().argmax(-1) - 1
            sequence_lengths = sequence_lengths % input_ids.shape[-1]
        else:
            sequence_lengths = -1

    if ON_ORANGE_PI:
        if isinstance(sequence_lengths, mindspore.Tensor):
            sequence_lengths = sequence_lengths.to(mindspore.int32)
        pooled_logits = ops.getitem(logits, (ops.arange(batch_size), sequence_lengths))
    else:
        pooled_logits = logits[ops.arange(batch_size), sequence_lengths]

    loss = None
    if labels is not None:
        if self.config.problem_type is None:
            if self.num_labels == 1:
                self.config.problem_type = "regression"
            elif self.num_labels > 1 and labels.dtype in (mindspore.int64, mindspore.int32):
                self.config.problem_type = "single_label_classification"
            else:
                self.config.problem_type = "multi_label_classification"

        if self.config.problem_type == "regression":
            loss_fct = MSELoss()
            if self.num_labels == 1:
                loss = loss_fct(pooled_logits.squeeze(), labels.squeeze())
            else:
                loss = loss_fct(pooled_logits, labels)
        elif self.config.problem_type == "single_label_classification":
            loss_fct = CrossEntropyLoss()
            loss = loss_fct(pooled_logits.view(-1, self.num_labels), labels.view(-1))
        elif self.config.problem_type == "multi_label_classification":
            loss_fct = BCEWithLogitsLoss()
            loss = loss_fct(pooled_logits, labels)
    if not return_dict:
        output = (pooled_logits,) + transformer_outputs[1:]
        return ((loss,) + output) if loss is not None else output

    return SequenceClassifierOutputWithPast(
        loss=loss,
        logits=pooled_logits,
        past_key_values=transformer_outputs.past_key_values,
        hidden_states=transformer_outputs.hidden_states,
        attentions=transformer_outputs.attentions,
    )

`mindnlp.transformers.models.llama.modeling_llama.LlamaForTokenClassification` ¶

Bases: LlamaPreTrainedModel

Source code in mindnlp\transformers\models\llama\modeling_llama.py

class LlamaForTokenClassification(LlamaPreTrainedModel):
    def __init__(self, config):
        super().__init__(config)
        self.num_labels = config.num_labels
        self.model = LlamaModel(config)
        if getattr(config, "classifier_dropout", None) is not None:
            classifier_dropout = config.classifier_dropout
        elif getattr(config, "hidden_dropout", None) is not None:
            classifier_dropout = config.hidden_dropout
        else:
            classifier_dropout = 0.1
        self.dropout = nn.Dropout(classifier_dropout)
        self.score = nn.Linear(config.hidden_size, config.num_labels)

        # Initialize weights and apply final processing
        self.post_init()

    def get_input_embeddings(self):
        return self.model.embed_tokens

    def set_input_embeddings(self, value):
        self.model.embed_tokens = value

    def forward(
        self,
        input_ids: Optional[mindspore.Tensor] = None,
        attention_mask: Optional[mindspore.Tensor] = None,
        position_ids: Optional[mindspore.Tensor] = None,
        past_key_values: Optional[List[mindspore.Tensor]] = None,
        inputs_embeds: Optional[mindspore.Tensor] = None,
        labels: Optional[mindspore.Tensor] = None,
        use_cache: Optional[bool] = None,
        output_attentions: Optional[bool] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
    ) -> Union[Tuple, TokenClassifierOutput]:
        r"""
        labels (`mindspore.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        """
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        outputs = self.model(
            input_ids,
            attention_mask=attention_mask,
            position_ids=position_ids,
            past_key_values=past_key_values,
            inputs_embeds=inputs_embeds,
            use_cache=use_cache,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )
        sequence_output = outputs[0]
        sequence_output = self.dropout(sequence_output)
        logits = self.score(sequence_output)

        loss = None
        if labels is not None:
            loss_fct = CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

        if not return_dict:
            output = (logits,) + outputs[2:]
            return ((loss,) + output) if loss is not None else output

        return TokenClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )

`mindnlp.transformers.models.llama.modeling_llama.LlamaForTokenClassification.forward(input_ids=None, attention_mask=None, position_ids=None, past_key_values=None, inputs_embeds=None, labels=None, use_cache=None, output_attentions=None, output_hidden_states=None, return_dict=None)` ¶

labels (mindspore.Tensor of shape (batch_size,), optional): Labels for computing the sequence classification/regression loss. Indices should be in [0, ..., config.num_labels - 1]. If config.num_labels == 1 a regression loss is computed (Mean-Square loss), If config.num_labels > 1 a classification loss is computed (Cross-Entropy).

Source code in mindnlp\transformers\models\llama\modeling_llama.py

def forward(
    self,
    input_ids: Optional[mindspore.Tensor] = None,
    attention_mask: Optional[mindspore.Tensor] = None,
    position_ids: Optional[mindspore.Tensor] = None,
    past_key_values: Optional[List[mindspore.Tensor]] = None,
    inputs_embeds: Optional[mindspore.Tensor] = None,
    labels: Optional[mindspore.Tensor] = None,
    use_cache: Optional[bool] = None,
    output_attentions: Optional[bool] = None,
    output_hidden_states: Optional[bool] = None,
    return_dict: Optional[bool] = None,
) -> Union[Tuple, TokenClassifierOutput]:
    r"""
    labels (`mindspore.Tensor` of shape `(batch_size,)`, *optional*):
        Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
        config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
        `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
    """
    return_dict = return_dict if return_dict is not None else self.config.use_return_dict

    outputs = self.model(
        input_ids,
        attention_mask=attention_mask,
        position_ids=position_ids,
        past_key_values=past_key_values,
        inputs_embeds=inputs_embeds,
        use_cache=use_cache,
        output_attentions=output_attentions,
        output_hidden_states=output_hidden_states,
        return_dict=return_dict,
    )
    sequence_output = outputs[0]
    sequence_output = self.dropout(sequence_output)
    logits = self.score(sequence_output)

    loss = None
    if labels is not None:
        loss_fct = CrossEntropyLoss()
        loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

    if not return_dict:
        output = (logits,) + outputs[2:]
        return ((loss,) + output) if loss is not None else output

    return TokenClassifierOutput(
        loss=loss,
        logits=logits,
        hidden_states=outputs.hidden_states,
        attentions=outputs.attentions,
    )

`mindnlp.transformers.models.llama.modeling_llama.LlamaLinearScalingRotaryEmbedding` ¶

Bases: LlamaRotaryEmbedding

LlamaRotaryEmbedding extended with linear scaling. Credits to the Reddit user /u/kaiokendev

Source code in mindnlp\transformers\models\llama\modeling_llama.py

class LlamaLinearScalingRotaryEmbedding(LlamaRotaryEmbedding):
    """LlamaRotaryEmbedding extended with linear scaling. Credits to the Reddit user /u/kaiokendev"""

    def __init__(self, *args, **kwargs):
        logger.warning_once(
            "`LlamaLinearScalingRotaryEmbedding` is deprecated an will be removed in v4.45. Please use "
            "`LlamaRotaryEmbedding`, which now also does linear scaling (simply pass the model config to __init__)."
        )
        kwargs["rope_type"] = "linear"
        super().__init__(*args, **kwargs)

`mindnlp.transformers.models.llama.modeling_llama.LlamaModel` ¶

Bases: LlamaPreTrainedModel

Transformer decoder consisting of config.num_hidden_layers layers. Each layer is a [LlamaDecoderLayer]

PARAMETER	DESCRIPTION
`config`	LlamaConfig TYPE: `LlamaConfig`

Source code in mindnlp\transformers\models\llama\modeling_llama.py

class LlamaModel(LlamaPreTrainedModel):
    """
    Transformer decoder consisting of *config.num_hidden_layers* layers. Each layer is a [`LlamaDecoderLayer`]

    Args:
        config: LlamaConfig
    """

    def __init__(self, config: LlamaConfig):
        super().__init__(config)
        self.padding_idx = config.pad_token_id
        self.vocab_size = config.vocab_size

        self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx)
        self.layers = nn.ModuleList(
            [LlamaDecoderLayer(config, layer_idx) for layer_idx in range(config.num_hidden_layers)]
        )
        self.norm = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
        self.rotary_emb = LlamaRotaryEmbedding(config=config)
        self.gradient_checkpointing = False

        # Initialize weights and apply final processing
        self.post_init()

    def get_input_embeddings(self):
        return self.embed_tokens

    def set_input_embeddings(self, value):
        self.embed_tokens = value

    def forward(
        self,
        input_ids: mindspore.Tensor = None,
        attention_mask: Optional[mindspore.Tensor] = None,
        position_ids: Optional[mindspore.Tensor] = None,
        past_key_values: Optional[Union[Cache, List[mindspore.Tensor]]] = None,
        inputs_embeds: Optional[mindspore.Tensor] = None,
        use_cache: Optional[bool] = None,
        output_attentions: Optional[bool] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
        cache_position: Optional[mindspore.Tensor] = None,
    ) -> Union[Tuple, BaseModelOutputWithPast]:
        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
        output_hidden_states = (
            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
        )
        use_cache = use_cache if use_cache is not None else self.config.use_cache
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        if (input_ids is None) ^ (inputs_embeds is not None):
            raise ValueError(
                "You cannot specify both input_ids and inputs_embeds at the same time, and must specify either one"
            )

        if self.gradient_checkpointing and self.training and use_cache:
            logger.warning_once(
                "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`."
            )
            use_cache = False

        if inputs_embeds is None:
            inputs_embeds = self.embed_tokens(input_ids)

        return_legacy_cache = False
        if (
            use_cache and not isinstance(past_key_values, Cache) and not self.training
        ):  # kept for BC (non `Cache` `past_key_values` inputs)
            return_legacy_cache = True
            past_key_values = DynamicCache.from_legacy_cache(past_key_values)
            logger.warning_once(
                "We detected that you are passing `past_key_values` as a tuple and this is deprecated. "
                "Please use an appropriate `Cache` class"
            )

        if cache_position is None:
            past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
            cache_position = ops.arange(
                past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1]
            )
        if position_ids is None:
            position_ids = cache_position.unsqueeze(0)

        causal_mask = self._update_causal_mask(
            attention_mask, inputs_embeds, cache_position, past_key_values, output_attentions
        )
        hidden_states = inputs_embeds

        # create position embeddings to be shared across the decoder layers
        position_embeddings = self.rotary_emb(hidden_states, position_ids)

        # decoder layers
        all_hidden_states = () if output_hidden_states else None
        all_self_attns = () if output_attentions else None
        next_decoder_cache = None

        for decoder_layer in self.layers:
            if output_hidden_states:
                all_hidden_states += (hidden_states,)

            if self.gradient_checkpointing and self.training:
                layer_outputs = self._gradient_checkpointing_func(
                    decoder_layer.__call__,
                    hidden_states,
                    causal_mask,
                    position_ids,
                    past_key_values,
                    output_attentions,
                    use_cache,
                    cache_position,
                    position_embeddings,
                )
            else:
                layer_outputs = decoder_layer(
                    hidden_states,
                    attention_mask=causal_mask,
                    position_ids=position_ids,
                    past_key_value=past_key_values,
                    output_attentions=output_attentions,
                    use_cache=use_cache,
                    cache_position=cache_position,
                    position_embeddings=position_embeddings,
                )

            hidden_states = layer_outputs[0]

            if use_cache:
                next_decoder_cache = layer_outputs[2 if output_attentions else 1]

            if output_attentions:
                all_self_attns += (layer_outputs[1],)

        hidden_states = self.norm(hidden_states)

        # add hidden states from the last decoder layer
        if output_hidden_states:
            all_hidden_states += (hidden_states,)

        next_cache = next_decoder_cache if use_cache else None
        if return_legacy_cache:
            next_cache = next_cache.to_legacy_cache()

        if not return_dict:
            return tuple(v for v in [hidden_states, next_cache, all_hidden_states, all_self_attns] if v is not None)
        return BaseModelOutputWithPast(
            last_hidden_state=hidden_states,
            past_key_values=next_cache,
            hidden_states=all_hidden_states,
            attentions=all_self_attns,
        )

    def _update_causal_mask(
        self,
        attention_mask: mindspore.Tensor,
        input_tensor: mindspore.Tensor,
        cache_position: mindspore.Tensor,
        past_key_values: Cache,
        output_attentions: bool,
    ):
        # For SDPA, when possible, we will rely on its `is_causal` argument instead of its `attn_mask` argument, in
        # order to dispatch on Flash Attention 2. This feature is not compatible with static cache, as SDPA will fail
        # to infer the attention mask.
        past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
        using_static_cache = isinstance(past_key_values, StaticCache)


        dtype = input_tensor.dtype
        min_dtype = float(ops.finfo(dtype).min)
        sequence_length = input_tensor.shape[1]
        if using_static_cache:
            target_length = past_key_values.get_max_length()
        else:
            target_length = (
                attention_mask.shape[-1]
                if isinstance(attention_mask, mindspore.Tensor)
                else past_seen_tokens + sequence_length + 1
            )

        # In case the provided `attention` mask is 2D, we generate a causal mask here (4D).
        causal_mask = _prepare_4d_causal_attention_mask_with_cache_position(
            attention_mask,
            sequence_length=sequence_length,
            target_length=target_length,
            dtype=dtype,
            min_dtype=min_dtype,
            cache_position=cache_position,
            batch_size=input_tensor.shape[0],
        )

        return causal_mask

`mindnlp.transformers.models.llama.modeling_llama.LlamaRMSNorm` ¶

Bases: Module

Source code in mindnlp\transformers\models\llama\modeling_llama.py

class LlamaRMSNorm(nn.Module):
    def __init__(self, hidden_size, eps=1e-6):
        """
        LlamaRMSNorm is equivalent to T5LayerNorm
        """
        super().__init__()
        self.weight = nn.Parameter(ops.ones(hidden_size))
        self.variance_epsilon = eps

    def forward(self, hidden_states):
        if not self.training and use_pyboost() and not ON_ORANGE_PI:
            return F.rms_norm(hidden_states, self.weight, self.variance_epsilon)
        input_dtype = hidden_states.dtype
        hidden_states = hidden_states.to(mindspore.float32)
        variance = ops.mean(hidden_states.pow(2), -1, keepdim=True)
        hidden_states = hidden_states * ops.rsqrt(variance + self.variance_epsilon)
        return self.weight * hidden_states.to(input_dtype)

    def extra_repr(self):
        return f"{tuple(self.weight.shape)}, eps={self.variance_epsilon}"

`mindnlp.transformers.models.llama.modeling_llama.LlamaRMSNorm.init(hidden_size, eps=1e-06)` ¶

LlamaRMSNorm is equivalent to T5LayerNorm

Source code in mindnlp\transformers\models\llama\modeling_llama.py

def __init__(self, hidden_size, eps=1e-6):
    """
    LlamaRMSNorm is equivalent to T5LayerNorm
    """
    super().__init__()
    self.weight = nn.Parameter(ops.ones(hidden_size))
    self.variance_epsilon = eps

`mindnlp.transformers.models.llama.modeling_llama.LlamaRotaryEmbedding` ¶

Bases: Module

Source code in mindnlp\transformers\models\llama\modeling_llama.py

class LlamaRotaryEmbedding(nn.Module):
    def __init__(
        self,
        dim=None,
        max_position_embeddings=2048,
        base=10000,
        scaling_factor=1.0,
        rope_type="default",
        config: Optional[LlamaConfig] = None,
    ):
        super().__init__()
        # TODO (joao): remove the `if` below, only used for BC
        self.rope_kwargs = {}
        if config is None:
            logger.warning_once(
                "`LlamaRotaryEmbedding` can now be fully parameterized by passing the model config through the "
                "`config` argument. All other arguments will be removed in v4.45"
            )
            self.rope_kwargs = {
                "rope_type": rope_type,
                "factor": scaling_factor,
                "dim": dim,
                "base": base,
                "max_position_embeddings": max_position_embeddings,
            }
            self.rope_type = rope_type
            self.max_seq_len_cached = max_position_embeddings
            self.original_max_seq_len = max_position_embeddings
        else:
            # BC: "rope_type" was originally "type"
            if config.rope_scaling is not None:
                self.rope_type = config.rope_scaling.get("rope_type", config.rope_scaling.get("type"))
            else:
                self.rope_type = "default"
            self.max_seq_len_cached = config.max_position_embeddings
            self.original_max_seq_len = config.max_position_embeddings

        self.config = config
        self.rope_init_fn = ROPE_INIT_FUNCTIONS[self.rope_type]

        inv_freq, self.attention_scaling = self.rope_init_fn(self.config, **self.rope_kwargs)
        self.register_buffer("inv_freq", inv_freq, persistent=False)
        self.original_inv_freq = self.inv_freq

    def _dynamic_frequency_update(self, position_ids):
        """
        dynamic RoPE layers should recompute `inv_freq` in the following situations:
        1 - growing beyond the cached sequence length (allow scaling)
        2 - the current sequence length is in the original scale (avoid losing precision with small sequences)
        """
        seq_len = ops.max(position_ids) + 1
        if seq_len > self.max_seq_len_cached:  # growth
            inv_freq, self.attention_scaling = self.rope_init_fn(
                self.config, seq_len=seq_len, **self.rope_kwargs
            )
            self.register_buffer("inv_freq", inv_freq, persistent=False)  # TODO joao: may break with compilation
            self.max_seq_len_cached = seq_len

        if seq_len < self.original_max_seq_len and self.max_seq_len_cached > self.original_max_seq_len:  # reset
            self.register_buffer("inv_freq", self.original_inv_freq, persistent=False)
            self.max_seq_len_cached = self.original_max_seq_len

    @no_grad()
    def forward(self, x, position_ids):
        if "dynamic" in self.rope_type:
            self._dynamic_frequency_update(position_ids)

        # Core RoPE block
        inv_freq_expanded = self.inv_freq.view(1, -1, 1).float().broadcast_to((position_ids.shape[0], -1, 1))
        position_ids_expanded = ops.unsqueeze(position_ids, 1).float()
        # Force float32 (see https://github.com/huggingface/transformers/pull/29285)
        freqs = ops.transpose(ops.matmul(inv_freq_expanded.float(), position_ids_expanded.float()), 1, 2)
        emb = ops.cat((freqs, freqs), dim=-1)
        cos = emb.cos()
        sin = emb.sin()

        # Advanced RoPE types (e.g. yarn) apply a post-processing scaling factor, equivalent to scaling attention
        cos = cos * self.attention_scaling
        sin = sin * self.attention_scaling

        return cos.to(dtype=x.dtype), sin.to(dtype=x.dtype)

`mindnlp.transformers.models.llama.modeling_llama.apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1)` ¶

Applies Rotary Position Embedding to the query and key tensors.

PARAMETER	DESCRIPTION
`q`	The query tensor. TYPE: `mindspore.Tensor`
`k`	The key tensor. TYPE: `mindspore.Tensor`
`cos`	The cosine part of the rotary embedding. TYPE: `mindspore.Tensor`
`sin`	The sine part of the rotary embedding. TYPE: `mindspore.Tensor`
`position_ids`	Deprecated and unused. TYPE: `mindspore.Tensor`, optional DEFAULT: `None`
`unsqueeze_dim`	The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2. TYPE: `int`, optional, defaults to 1 DEFAULT: `1`

Source code in mindnlp\transformers\models\llama\modeling_llama.py

def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
    """Applies Rotary Position Embedding to the query and key tensors.

    Args:
        q (`mindspore.Tensor`): The query tensor.
        k (`mindspore.Tensor`): The key tensor.
        cos (`mindspore.Tensor`): The cosine part of the rotary embedding.
        sin (`mindspore.Tensor`): The sine part of the rotary embedding.
        position_ids (`mindspore.Tensor`, *optional*):
            Deprecated and unused.
        unsqueeze_dim (`int`, *optional*, defaults to 1):
            The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
            sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
            that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
            k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
            cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
            the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
    Returns:
        `tuple(mindspore.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
    """
    cos = cos.unsqueeze(unsqueeze_dim)
    sin = sin.unsqueeze(unsqueeze_dim)
    q_embed = (q * cos) + (rotate_half(q) * sin)
    k_embed = (k * cos) + (rotate_half(k) * sin)
    return q_embed, k_embed

`mindnlp.transformers.models.llama.modeling_llama.repeat_kv(hidden_states, n_rep)` ¶

This is the equivalent of ops.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch, num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)

Source code in mindnlp\transformers\models\llama\modeling_llama.py

def repeat_kv(hidden_states: mindspore.Tensor, n_rep: int) -> mindspore.Tensor:
    """
    This is the equivalent of ops.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
    """
    batch, num_key_value_heads, slen, head_dim = hidden_states.shape
    if n_rep == 1:
        return hidden_states
    # hidden_states = hidden_states[:, :, None, :, :].broadcast_to((batch, num_key_value_heads, n_rep, slen, head_dim))
    hidden_states = ops.unsqueeze(hidden_states, 2).broadcast_to((batch, num_key_value_heads, n_rep, slen, head_dim))
    return hidden_states.reshape(batch, num_key_value_heads * n_rep, slen, head_dim)

`mindnlp.transformers.models.llama.modeling_llama.rotate_half(x)` ¶

Rotates half the hidden dims of the input.

Source code in mindnlp\transformers\models\llama\modeling_llama.py

def rotate_half(x):
    """Rotates half the hidden dims of the input."""
    # x1 = x[..., : x.shape[-1] // 2]
    # x2 = x[..., x.shape[-1] // 2 :]
    x1, x2 = ops.split(x, x.shape[-1] // 2, dim=-1)
    return ops.cat((-x2, x1), dim=-1)

`mindnlp.transformers.models.llama.configuration_llama` ¶

LLaMA model configuration

`mindnlp.transformers.models.llama.configuration_llama.LlamaConfig` ¶

Bases: PretrainedConfig

This is the configuration class to store the configuration of a [LlamaModel]. It is used to instantiate an LLaMA model according to the specified arguments, defining the model architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of the LLaMA-7B.

Configuration objects inherit from [PretrainedConfig] and can be used to control the model outputs. Read the documentation from [PretrainedConfig] for more information.

PARAMETER	DESCRIPTION
`vocab_size`	Vocabulary size of the LLaMA model. Defines the number of different tokens that can be represented by the `inputs_ids` passed when calling [`LlamaModel`] TYPE: `int`, optional, defaults to 32000 DEFAULT: `32000`
`hidden_size`	Dimension of the hidden representations. TYPE: `int`, optional, defaults to 4096 DEFAULT: `4096`
`intermediate_size`	Dimension of the MLP representations. TYPE: `int`, optional, defaults to 11008 DEFAULT: `11008`
`num_hidden_layers`	Number of hidden layers in the Transformer decoder. TYPE: `int`, optional, defaults to 32 DEFAULT: `32`
`num_attention_heads`	Number of attention heads for each attention layer in the Transformer decoder. TYPE: `int`, optional, defaults to 32 DEFAULT: `32`
`num_key_value_heads`	This is the number of key_value heads that should be used to implement Grouped Query Attention. If `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed by meanpooling all the original heads within that group. For more details checkout this paper. If it is not specified, will default to `num_attention_heads`. TYPE: `int`, optional DEFAULT: `None`
`hidden_act`	The non-linear activation function (function or string) in the decoder. TYPE: `str` or `function`, optional, defaults to `"silu"` DEFAULT: `'silu'`
`max_position_embeddings`	The maximum sequence length that this model might ever be used with. Llama 1 supports up to 2048 tokens, Llama 2 up to 4096, CodeLlama up to 16384. TYPE: `int`, optional, defaults to 2048 DEFAULT: `2048`
`initializer_range`	The standard deviation of the truncated_normal_initializer for initializing all weight matrices. TYPE: `float`, optional, defaults to 0.02 DEFAULT: `0.02`
`rms_norm_eps`	The epsilon used by the rms normalization layers. TYPE: `float`, optional, defaults to 1e-06 DEFAULT: `1e-06`
`use_cache`	Whether or not the model should return the last key/values attentions (not used by all models). Only relevant if `config.is_decoder=True`. TYPE: `bool`, optional, defaults to `True` DEFAULT: `True`
`pad_token_id`	Padding token id. TYPE: `int`, optional DEFAULT: `None`
`bos_token_id`	Beginning of stream token id. TYPE: `int`, optional, defaults to 1 DEFAULT: `1`
`eos_token_id`	End of stream token id. TYPE: `int`, optional, defaults to 2 DEFAULT: `2`
`pretraining_tp`	Experimental feature. Tensor parallelism rank used during pretraining. Please refer to this document to understand more about it. This value is necessary to ensure exact reproducibility of the pretraining results. Please refer to this issue. TYPE: `int`, optional, defaults to 1 DEFAULT: `1`
`tie_word_embeddings`	Whether to tie weight embeddings TYPE: `bool`, optional, defaults to `False` DEFAULT: `False`
`rope_theta`	The base period of the RoPE embeddings. TYPE: `float`, optional, defaults to 10000.0 DEFAULT: `10000.0`
`rope_scaling`	Dictionary containing the scaling configuration for the RoPE embeddings. NOTE: if you apply new rope type and you expect the model to work on longer `max_position_embeddings`, we recommend you to update this value accordingly. Expected contents: `rope_type` (`str`): The sub-variant of RoPE to use. Can be one of ['default', 'linear', 'dynamic', 'yarn', 'longrope', 'llama3'], with 'default' being the original RoPE implementation. `factor` (`float`, optional): Used with all rope types except 'default'. The scaling factor to apply to the RoPE embeddings. In most scaling types, a `factor` of x will enable the model to handle sequences of length x * original maximum pre-trained length. `original_max_position_embeddings` (`int`, optional): Used with 'dynamic', 'longrope' and 'llama3'. The original max position embeddings used during pretraining. `attention_factor` (`float`, optional): Used with 'yarn' and 'longrope'. The scaling factor to be applied on the attention computation. If unspecified, it defaults to value recommended by the implementation, using the `factor` field to infer the suggested value. `beta_fast` (`float`, optional): Only used with 'yarn'. Parameter to set the boundary for extrapolation (only) in the linear ramp function. If unspecified, it defaults to 32. `beta_slow` (`float`, optional): Only used with 'yarn'. Parameter to set the boundary for interpolation (only) in the linear ramp function. If unspecified, it defaults to 1. `short_factor` (`List[float]`, optional): Only used with 'longrope'. The scaling factor to be applied to short contexts (< `original_max_position_embeddings`). Must be a list of numbers with the same length as the hidden size divided by the number of attention heads divided by 2 `long_factor` (`List[float]`, optional): Only used with 'longrope'. The scaling factor to be applied to long contexts (< `original_max_position_embeddings`). Must be a list of numbers with the same length as the hidden size divided by the number of attention heads divided by 2 `low_freq_factor` (`float`, optional): Only used with 'llama3'. Scaling factor applied to low frequency components of the RoPE `high_freq_factor` (`float`, optional): Only used with 'llama3'. Scaling factor applied to high frequency components of the RoPE TYPE: `Dict`, optional DEFAULT: `None`
`attention_bias`	Whether to use a bias in the query, key, value and output projection layers during self-attention. TYPE: `bool`, optional, defaults to `False` DEFAULT: `False`
`attention_dropout`	The dropout ratio for the attention probabilities. TYPE: `float`, optional, defaults to 0.0 DEFAULT: `0.0`
`mlp_bias`	Whether to use a bias in up_proj, down_proj and gate_proj layers in the MLP layers. TYPE: `bool`, optional, defaults to `False` DEFAULT: `False`

>>> from transformers import LlamaModel, LlamaConfig

>>> # Initializing a LLaMA llama-7b style configuration
>>> configuration = LlamaConfig()

>>> # Initializing a model from the llama-7b style configuration
>>> model = LlamaModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config

Source code in mindnlp\transformers\models\llama\configuration_llama.py

class LlamaConfig(PretrainedConfig):
    r"""
    This is the configuration class to store the configuration of a [`LlamaModel`]. It is used to instantiate an LLaMA
    model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
    defaults will yield a similar configuration to that of the LLaMA-7B.

    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.


    Args:
        vocab_size (`int`, *optional*, defaults to 32000):
            Vocabulary size of the LLaMA model. Defines the number of different tokens that can be represented by the
            `inputs_ids` passed when calling [`LlamaModel`]
        hidden_size (`int`, *optional*, defaults to 4096):
            Dimension of the hidden representations.
        intermediate_size (`int`, *optional*, defaults to 11008):
            Dimension of the MLP representations.
        num_hidden_layers (`int`, *optional*, defaults to 32):
            Number of hidden layers in the Transformer decoder.
        num_attention_heads (`int`, *optional*, defaults to 32):
            Number of attention heads for each attention layer in the Transformer decoder.
        num_key_value_heads (`int`, *optional*):
            This is the number of key_value heads that should be used to implement Grouped Query Attention. If
            `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
            `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
            converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
            by meanpooling all the original heads within that group. For more details checkout [this
            paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
            `num_attention_heads`.
        hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
            The non-linear activation function (function or string) in the decoder.
        max_position_embeddings (`int`, *optional*, defaults to 2048):
            The maximum sequence length that this model might ever be used with. Llama 1 supports up to 2048 tokens,
            Llama 2 up to 4096, CodeLlama up to 16384.
        initializer_range (`float`, *optional*, defaults to 0.02):
            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
        rms_norm_eps (`float`, *optional*, defaults to 1e-06):
            The epsilon used by the rms normalization layers.
        use_cache (`bool`, *optional*, defaults to `True`):
            Whether or not the model should return the last key/values attentions (not used by all models). Only
            relevant if `config.is_decoder=True`.
        pad_token_id (`int`, *optional*):
            Padding token id.
        bos_token_id (`int`, *optional*, defaults to 1):
            Beginning of stream token id.
        eos_token_id (`int`, *optional*, defaults to 2):
            End of stream token id.
        pretraining_tp (`int`, *optional*, defaults to 1):
            Experimental feature. Tensor parallelism rank used during pretraining. Please refer to [this
            document](https://huggingface.co/docs/transformers/main/perf_train_gpu_many#tensor-parallelism) to
            understand more about it. This value is necessary to ensure exact reproducibility of the pretraining
            results. Please refer to [this issue](https://github.com/pytorch/pytorch/issues/76232).
        tie_word_embeddings (`bool`, *optional*, defaults to `False`):
            Whether to tie weight embeddings
        rope_theta (`float`, *optional*, defaults to 10000.0):
            The base period of the RoPE embeddings.
        rope_scaling (`Dict`, *optional*):
            Dictionary containing the scaling configuration for the RoPE embeddings. NOTE: if you apply new rope type
            and you expect the model to work on longer `max_position_embeddings`, we recommend you to update this value
            accordingly.
            Expected contents:
                `rope_type` (`str`):
                    The sub-variant of RoPE to use. Can be one of ['default', 'linear', 'dynamic', 'yarn', 'longrope',
                    'llama3'], with 'default' being the original RoPE implementation.
                `factor` (`float`, *optional*):
                    Used with all rope types except 'default'. The scaling factor to apply to the RoPE embeddings. In
                    most scaling types, a `factor` of x will enable the model to handle sequences of length x *
                    original maximum pre-trained length.
                `original_max_position_embeddings` (`int`, *optional*):
                    Used with 'dynamic', 'longrope' and 'llama3'. The original max position embeddings used during
                    pretraining.
                `attention_factor` (`float`, *optional*):
                    Used with 'yarn' and 'longrope'. The scaling factor to be applied on the attention
                    computation. If unspecified, it defaults to value recommended by the implementation, using the
                    `factor` field to infer the suggested value.
                `beta_fast` (`float`, *optional*):
                    Only used with 'yarn'. Parameter to set the boundary for extrapolation (only) in the linear
                    ramp function. If unspecified, it defaults to 32.
                `beta_slow` (`float`, *optional*):
                    Only used with 'yarn'. Parameter to set the boundary for interpolation (only) in the linear
                    ramp function. If unspecified, it defaults to 1.
                `short_factor` (`List[float]`, *optional*):
                    Only used with 'longrope'. The scaling factor to be applied to short contexts (<
                    `original_max_position_embeddings`). Must be a list of numbers with the same length as the hidden
                    size divided by the number of attention heads divided by 2
                `long_factor` (`List[float]`, *optional*):
                    Only used with 'longrope'. The scaling factor to be applied to long contexts (<
                    `original_max_position_embeddings`). Must be a list of numbers with the same length as the hidden
                    size divided by the number of attention heads divided by 2
                `low_freq_factor` (`float`, *optional*):
                    Only used with 'llama3'. Scaling factor applied to low frequency components of the RoPE
                `high_freq_factor` (`float`, *optional*):
                    Only used with 'llama3'. Scaling factor applied to high frequency components of the RoPE
        attention_bias (`bool`, *optional*, defaults to `False`):
            Whether to use a bias in the query, key, value and output projection layers during self-attention.
        attention_dropout (`float`, *optional*, defaults to 0.0):
            The dropout ratio for the attention probabilities.
        mlp_bias (`bool`, *optional*, defaults to `False`):
            Whether to use a bias in up_proj, down_proj and gate_proj layers in the MLP layers.

    ```python
    >>> from transformers import LlamaModel, LlamaConfig

    >>> # Initializing a LLaMA llama-7b style configuration
    >>> configuration = LlamaConfig()

    >>> # Initializing a model from the llama-7b style configuration
    >>> model = LlamaModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```"""

    model_type = "llama"
    keys_to_ignore_at_inference = ["past_key_values"]

    def __init__(
        self,
        vocab_size=32000,
        hidden_size=4096,
        intermediate_size=11008,
        num_hidden_layers=32,
        num_attention_heads=32,
        num_key_value_heads=None,
        hidden_act="silu",
        max_position_embeddings=2048,
        initializer_range=0.02,
        rms_norm_eps=1e-6,
        use_cache=True,
        pad_token_id=None,
        bos_token_id=1,
        eos_token_id=2,
        pretraining_tp=1,
        tie_word_embeddings=False,
        rope_theta=10000.0,
        rope_scaling=None,
        attention_bias=False,
        attention_dropout=0.0,
        mlp_bias=False,
        **kwargs,
    ):
        self.vocab_size = vocab_size
        self.max_position_embeddings = max_position_embeddings
        self.hidden_size = hidden_size
        self.intermediate_size = intermediate_size
        self.num_hidden_layers = num_hidden_layers
        self.num_attention_heads = num_attention_heads

        # for backward compatibility
        if num_key_value_heads is None:
            num_key_value_heads = num_attention_heads

        self.num_key_value_heads = num_key_value_heads
        self.hidden_act = hidden_act
        self.initializer_range = initializer_range
        self.rms_norm_eps = rms_norm_eps
        self.pretraining_tp = pretraining_tp
        self.use_cache = use_cache
        self.rope_theta = rope_theta
        self.rope_scaling = rope_scaling
        self.attention_bias = attention_bias
        self.attention_dropout = attention_dropout
        self.mlp_bias = mlp_bias

        # Validate the correctness of rotary position embeddings parameters
        # BC: if there is a 'type' field, move it to 'rope_type'.
        if self.rope_scaling is not None and "type" in self.rope_scaling:
            self.rope_scaling["rope_type"] = self.rope_scaling["type"]
        rope_config_validation(self)

        super().__init__(
            pad_token_id=pad_token_id,
            bos_token_id=bos_token_id,
            eos_token_id=eos_token_id,
            tie_word_embeddings=tie_word_embeddings,
            **kwargs,
        )

`mindnlp.transformers.models.llama.tokenization_llama` ¶

Tokenization classes for LLaMA.

`mindnlp.transformers.models.llama.tokenization_llama.LlamaTokenizer` ¶

Bases: PreTrainedTokenizer

Construct a Llama tokenizer. Based on byte-level Byte-Pair-Encoding. The default padding token is unset as there is no padding token in the original model.

PARAMETER	DESCRIPTION
`vocab_file`	Path to the vocabulary file. TYPE: `str`
`unk_token`	The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this token instead. TYPE: `str` or `tokenizers.AddedToken`, optional, defaults to `"<unk>"` DEFAULT: `'<unk>'`
`bos_token`	The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token. TYPE: `str` or `tokenizers.AddedToken`, optional, defaults to `"<s>"` DEFAULT: `'<s>'`
`eos_token`	The end of sequence token. TYPE: `str` or `tokenizers.AddedToken`, optional, defaults to `"</s>"` DEFAULT: `'</s>'`
`pad_token`	A special token used to make arrays of tokens the same size for batching purpose. Will then be ignored by attention mechanisms or loss computation. TYPE: `str` or `tokenizers.AddedToken`, optional DEFAULT: `None`
`sp_model_kwargs`	Will be passed to the `SentencePieceProcessor.__init__()` method. The Python wrapper for SentencePiece can be used, among other things, to set: `enable_sampling`: Enable subword regularization. `nbest_size`: Sampling parameters for unigram. Invalid for BPE-Dropout. `nbest_size = {0,1}`: No sampling is performed. `nbest_size > 1`: samples from the nbest_size results. `nbest_size < 0`: assuming that nbest_size is infinite and samples from the all hypothesis (lattice) using forward-filtering-and-backward-sampling algorithm. `alpha`: Smoothing parameter for unigram sampling, and dropout probability of merge operations for BPE-dropout. TYPE: `Dict[str, Any]`, `Optional`, optional DEFAULT: `None`
`add_bos_token`	Whether or not to add an `bos_token` at the start of sequences. TYPE: `bool`, optional, defaults to `True` DEFAULT: `True`
`add_eos_token`	Whether or not to add an `eos_token` at the end of sequences. TYPE: `bool`, optional, defaults to `False` DEFAULT: `False`
`clean_up_tokenization_spaces`	Whether or not to cleanup spaces after decoding, cleanup consists in removing potential artifacts like extra spaces. TYPE: `bool`, optional, defaults to `False` DEFAULT: `False`
`use_default_system_prompt`	Whether or not the default system prompt for Llama should be used. TYPE: `bool`, optional, defaults to `False` DEFAULT: `False`
`spaces_between_special_tokens`	Whether or not to add spaces between special tokens. TYPE: `bool`, optional, defaults to `False` DEFAULT: `False`
`legacy`	Whether or not the `legacy` behavior of the tokenizer should be used. Legacy is before the merge of #24622 and #25224 which includes fixes to properly handle tokens that appear after special tokens. Make sure to also set `from_slow` to `True`. A simple example: `legacy=True`: `>>> from transformers import LlamaTokenizerFast >>> tokenizer = LlamaTokenizerFast.from_pretrained("huggyllama/llama-7b", legacy=True, from_slow=True) >>> tokenizer.encode("Hello <s>.") # 869 is '▁.' [1, 15043, 29871, 1, 869]` `legacy=False`: `>>> from transformers import LlamaTokenizerFast >>> tokenizer = LlamaTokenizerFast.from_pretrained("huggyllama/llama-7b", legacy=False, from_slow=True) >>> tokenizer.encode("Hello <s>.") # 29889 is '.' [1, 15043, 29871, 1, 29889]` Checkout the pull request for more details. TYPE: `bool`, optional DEFAULT: `None`
`add_prefix_space`	Whether or not to add an initial space to the input. This allows to treat the leading word just as any other word. Again, this should be set with `from_slow=True` to make sure it's taken into account. TYPE: `bool`, optional, defaults to `True` DEFAULT: `True`

Source code in mindnlp\transformers\models\llama\tokenization_llama.py

class LlamaTokenizer(PreTrainedTokenizer):
    """
    Construct a Llama tokenizer. Based on byte-level Byte-Pair-Encoding. The default padding token is unset as there is
    no padding token in the original model.

    Args:
        vocab_file (`str`):
            Path to the vocabulary file.
        unk_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to `"<unk>"`):
            The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
            token instead.
        bos_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to `"<s>"`):
            The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token.
        eos_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to `"</s>"`):
            The end of sequence token.
        pad_token (`str` or `tokenizers.AddedToken`, *optional*):
            A special token used to make arrays of tokens the same size for batching purpose. Will then be ignored by
            attention mechanisms or loss computation.
        sp_model_kwargs (`Dict[str, Any]`, `Optional`, *optional*):
            Will be passed to the `SentencePieceProcessor.__init__()` method. The [Python wrapper for
            SentencePiece](https://github.com/google/sentencepiece/tree/master/python) can be used, among other things,
            to set:

            - `enable_sampling`: Enable subword regularization.
            - `nbest_size`: Sampling parameters for unigram. Invalid for BPE-Dropout.

              - `nbest_size = {0,1}`: No sampling is performed.
              - `nbest_size > 1`: samples from the nbest_size results.
              - `nbest_size < 0`: assuming that nbest_size is infinite and samples from the all hypothesis (lattice)
                using forward-filtering-and-backward-sampling algorithm.

            - `alpha`: Smoothing parameter for unigram sampling, and dropout probability of merge operations for
              BPE-dropout.

        add_bos_token (`bool`, *optional*, defaults to `True`):
            Whether or not to add an `bos_token` at the start of sequences.
        add_eos_token (`bool`, *optional*, defaults to `False`):
            Whether or not to add an `eos_token` at the end of sequences.
        clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
            Whether or not to cleanup spaces after decoding, cleanup consists in removing potential artifacts like
            extra spaces.
        use_default_system_prompt (`bool`, *optional*, defaults to `False`):
            Whether or not the default system prompt for Llama should be used.
        spaces_between_special_tokens (`bool`, *optional*, defaults to `False`):
            Whether or not to add spaces between special tokens.
        legacy (`bool`, *optional*):
            Whether or not the `legacy` behavior of the tokenizer should be used. Legacy is before the merge of #24622
            and #25224 which includes fixes to properly handle tokens that appear after special tokens.
            Make sure to also set `from_slow` to `True`.
            A simple example:

            - `legacy=True`:
            ```python
            >>> from transformers import LlamaTokenizerFast

            >>> tokenizer = LlamaTokenizerFast.from_pretrained("huggyllama/llama-7b", legacy=True, from_slow=True)
            >>> tokenizer.encode("Hello <s>.") # 869 is '▁.'
            [1, 15043, 29871, 1, 869]
            ```
            - `legacy=False`:
            ```python
            >>> from transformers import LlamaTokenizerFast

            >>> tokenizer = LlamaTokenizerFast.from_pretrained("huggyllama/llama-7b", legacy=False, from_slow=True)
            >>> tokenizer.encode("Hello <s>.")  # 29889 is '.'
            [1, 15043, 29871, 1, 29889]
            ```
            Checkout the [pull request](https://github.com/huggingface/transformers/pull/24565) for more details.
        add_prefix_space (`bool`, *optional*, defaults to `True`):
            Whether or not to add an initial space to the input. This allows to treat the leading word just as any
            other word. Again, this should be set with `from_slow=True` to make sure it's taken into account.
    """

    vocab_files_names = VOCAB_FILES_NAMES
    model_input_names = ["input_ids", "attention_mask"]

    def __init__(
        self,
        vocab_file,
        unk_token="<unk>",
        bos_token="<s>",
        eos_token="</s>",
        pad_token=None,
        sp_model_kwargs: Optional[Dict[str, Any]] = None,
        add_bos_token=True,
        add_eos_token=False,
        clean_up_tokenization_spaces=False,
        use_default_system_prompt=False,
        spaces_between_special_tokens=False,
        legacy=None,
        add_prefix_space=True,
        **kwargs,
    ):
        self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
        bos_token = AddedToken(bos_token, normalized=False, special=True) if isinstance(bos_token, str) else bos_token
        eos_token = AddedToken(eos_token, normalized=False, special=True) if isinstance(eos_token, str) else eos_token
        unk_token = AddedToken(unk_token, normalized=False, special=True) if isinstance(unk_token, str) else unk_token
        pad_token = AddedToken(pad_token, normalized=False, special=True) if isinstance(pad_token, str) else pad_token

        if legacy is None:
            logger.warning_once(
                f"You are using the default legacy behaviour of the {self.__class__}. This is"
                " expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you."
                " If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it"
                " means, and thoroughly read the reason why this was added as explained in"
                " https://github.com/huggingface/transformers/pull/24565 - if you loaded a llama tokenizer from a GGUF file"
                " you can ignore this message"
            )
            legacy = True

        self.legacy = legacy
        self.vocab_file = vocab_file
        self.add_bos_token = add_bos_token
        self.add_eos_token = add_eos_token
        self.use_default_system_prompt = use_default_system_prompt
        self.sp_model = self.get_spm_processor(kwargs.pop("from_slow", False))
        self.add_prefix_space = add_prefix_space

        super().__init__(
            bos_token=bos_token,
            eos_token=eos_token,
            unk_token=unk_token,
            pad_token=pad_token,
            add_bos_token=add_bos_token,
            add_eos_token=add_eos_token,
            sp_model_kwargs=self.sp_model_kwargs,
            clean_up_tokenization_spaces=clean_up_tokenization_spaces,
            use_default_system_prompt=use_default_system_prompt,
            spaces_between_special_tokens=spaces_between_special_tokens,
            legacy=legacy,
            add_prefix_space=add_prefix_space,
            **kwargs,
        )

    @property
    def unk_token_length(self):
        return len(self.sp_model.encode(str(self.unk_token)))

    # Copied from transformers.models.t5.tokenization_t5.T5Tokenizer.get_spm_processor
    def get_spm_processor(self, from_slow=False):
        tokenizer = spm.SentencePieceProcessor(**self.sp_model_kwargs)
        if self.legacy or from_slow:  # no dependency on protobuf
            tokenizer.Load(self.vocab_file)
            return tokenizer

        with open(self.vocab_file, "rb") as f:
            sp_model = f.read()
            model_pb2 = import_protobuf(f"The new behaviour of {self.__class__.__name__} (with `self.legacy = False`)")
            model = model_pb2.ModelProto.FromString(sp_model)
            normalizer_spec = model_pb2.NormalizerSpec()
            normalizer_spec.add_dummy_prefix = False
            model.normalizer_spec.MergeFrom(normalizer_spec)
            sp_model = model.SerializeToString()
            tokenizer.LoadFromSerializedProto(sp_model)
        return tokenizer

    def __getstate__(self):
        state = self.__dict__.copy()
        state["sp_model"] = None
        state["sp_model_proto"] = self.sp_model.serialized_model_proto()
        return state

    def __setstate__(self, d):
        self.__dict__ = d
        self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
        self.sp_model.LoadFromSerializedProto(self.sp_model_proto)

    @property
    def vocab_size(self):
        """Returns vocab size"""
        return self.sp_model.get_piece_size()

    def get_vocab(self):
        """Returns vocab as a dict"""
        vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)}
        vocab.update(self.added_tokens_encoder)
        return vocab

    # Copied from transformers.models.t5.tokenization_t5.T5Tokenizer.tokenize
    def tokenize(self, text: "TextInput", **kwargs) -> List[str]:
        """
        Converts a string to a list of tokens. If `self.legacy` is set to `False`, a prefix token is added unless the
        first token is special.
        """
        if self.legacy or len(text) == 0:
            return super().tokenize(text, **kwargs)

        text = text.replace(SPIECE_UNDERLINE, " ")
        if self.add_prefix_space:
            text = SPIECE_UNDERLINE + text

        tokens = super().tokenize(text, **kwargs)

        if len(tokens) > 1 and tokens[0] == SPIECE_UNDERLINE and tokens[1] in self.all_special_tokens:
            tokens = tokens[1:]
        return tokens

    # Copied from transformers.models.t5.tokenization_t5.T5Tokenizer._tokenize
    def _tokenize(self, text, **kwargs):
        """
        Returns a tokenized string.

        We de-activated the `add_dummy_prefix` option, thus the sentencepiece internals will always strip any
        SPIECE_UNDERLINE. For example: `self.sp_model.encode(f"{SPIECE_UNDERLINE}Hey", out_type = str)` will give
        `['H', 'e', 'y']` instead of `['▁He', 'y']`. Thus we always encode `f"{unk_token}text"` and strip the
        `unk_token`. Here is an example with `unk_token = "<unk>"` and `unk_token_length = 4`.
        `self.tokenizer.sp_model.encode("<unk> Hey", out_type = str)[4:]`.
        """
        if self.legacy or not text.startswith((SPIECE_UNDERLINE, " ")):
            return self.sp_model.encode(text, out_type=str)

        # 1. Encode string + prefix ex: "<unk> Hey"
        tokens = self.sp_model.encode(self.unk_token + text, out_type=str)
        # 2. Remove self.unk_token from ['<','unk','>', '▁Hey']
        return tokens[self.unk_token_length :] if len(tokens) >= self.unk_token_length else tokens

    def _convert_token_to_id(self, token):
        """Converts a token (str) in an id using the vocab."""
        return self.sp_model.piece_to_id(token)

    def _convert_id_to_token(self, index):
        """Converts an index (integer) in a token (str) using the vocab."""
        token = self.sp_model.IdToPiece(index)
        return token

    def convert_tokens_to_string(self, tokens):
        """Converts a sequence of tokens (string) in a single string."""
        # since we manually add the prefix space, we have to remove it when decoding
        if tokens[0].startswith(SPIECE_UNDERLINE) and self.add_prefix_space:
            tokens[0] = tokens[0][1:]

        current_sub_tokens = []
        out_string = ""
        prev_is_special = False
        for i, token in enumerate(tokens):
            # make sure that special tokens are not decoded using sentencepiece model
            if token in self.all_special_tokens:
                if not prev_is_special and i != 0 and self.legacy:
                    out_string += " "
                out_string += self.sp_model.decode(current_sub_tokens) + token
                prev_is_special = True
                current_sub_tokens = []
            else:
                if prev_is_special and i == 1 and self.add_prefix_space and not token.startswith(SPIECE_UNDERLINE):
                    out_string += " "
                current_sub_tokens.append(token)
                prev_is_special = False
        out_string += self.sp_model.decode(current_sub_tokens)
        return out_string

    def save_vocabulary(self, save_directory, filename_prefix: Optional[str] = None) -> Tuple[str]:
        """
        Save the vocabulary and special tokens file to a directory.

        Args:
            save_directory (`str`):
                The directory in which to save the vocabulary.

        Returns:
            `Tuple(str)`: Paths to the files saved.
        """
        if not os.path.isdir(save_directory):
            logger.error(f"Vocabulary path ({save_directory}) should be a directory")
            return
        out_vocab_file = os.path.join(
            save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
        )

        if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file) and os.path.isfile(self.vocab_file):
            copyfile(self.vocab_file, out_vocab_file)
        elif not os.path.isfile(self.vocab_file):
            with open(out_vocab_file, "wb") as fi:
                content_spiece_model = self.sp_model.serialized_model_proto()
                fi.write(content_spiece_model)

        return (out_vocab_file,)

    def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
        bos_token_id = [self.bos_token_id] if self.add_bos_token else []
        eos_token_id = [self.eos_token_id] if self.add_eos_token else []

        output = bos_token_id + token_ids_0 + eos_token_id

        if token_ids_1 is not None:
            output = output + bos_token_id + token_ids_1 + eos_token_id

        return output

    def get_special_tokens_mask(
        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
    ) -> List[int]:
        """
        Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
        special tokens using the tokenizer `prepare_for_model` method.

        Args:
            token_ids_0 (`List[int]`):
                List of IDs.
            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.
            already_has_special_tokens (`bool`, *optional*, defaults to `False`):
                Whether or not the token list is already formatted with special tokens for the model.

        Returns:
            `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
        """
        if already_has_special_tokens:
            return super().get_special_tokens_mask(
                token_ids_0=token_ids_0, token_ids_1=token_ids_1, already_has_special_tokens=True
            )

        bos_token_id = [1] if self.add_bos_token else []
        eos_token_id = [1] if self.add_eos_token else []

        if token_ids_1 is None:
            return bos_token_id + ([0] * len(token_ids_0)) + eos_token_id
        return (
            bos_token_id
            + ([0] * len(token_ids_0))
            + eos_token_id
            + bos_token_id
            + ([0] * len(token_ids_1))
            + eos_token_id
        )

    def create_token_type_ids_from_sequences(
        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
    ) -> List[int]:
        """
        Creates a mask from the two sequences passed to be used in a sequence-pair classification task. An ALBERT
        sequence pair mask has the following format:

        ```
        0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
        | first sequence    | second sequence |
        ```

        if token_ids_1 is None, only returns the first portion of the mask (0s).

        Args:
            token_ids_0 (`List[int]`):
                List of ids.
            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.

        Returns:
            `List[int]`: List of [token type IDs](../glossary#token-type-ids) according to the given sequence(s).
        """
        bos_token_id = [self.bos_token_id] if self.add_bos_token else []
        eos_token_id = [self.eos_token_id] if self.add_eos_token else []

        output = [0] * len(bos_token_id + token_ids_0 + eos_token_id)

        if token_ids_1 is not None:
            output += [1] * len(bos_token_id + token_ids_1 + eos_token_id)

        return output

`mindnlp.transformers.models.llama.tokenization_llama.LlamaTokenizer.vocab_size` `property` ¶

Returns vocab size

`mindnlp.transformers.models.llama.tokenization_llama.LlamaTokenizer.convert_tokens_to_string(tokens)` ¶

Converts a sequence of tokens (string) in a single string.

Source code in mindnlp\transformers\models\llama\tokenization_llama.py

def convert_tokens_to_string(self, tokens):
    """Converts a sequence of tokens (string) in a single string."""
    # since we manually add the prefix space, we have to remove it when decoding
    if tokens[0].startswith(SPIECE_UNDERLINE) and self.add_prefix_space:
        tokens[0] = tokens[0][1:]

    current_sub_tokens = []
    out_string = ""
    prev_is_special = False
    for i, token in enumerate(tokens):
        # make sure that special tokens are not decoded using sentencepiece model
        if token in self.all_special_tokens:
            if not prev_is_special and i != 0 and self.legacy:
                out_string += " "
            out_string += self.sp_model.decode(current_sub_tokens) + token
            prev_is_special = True
            current_sub_tokens = []
        else:
            if prev_is_special and i == 1 and self.add_prefix_space and not token.startswith(SPIECE_UNDERLINE):
                out_string += " "
            current_sub_tokens.append(token)
            prev_is_special = False
    out_string += self.sp_model.decode(current_sub_tokens)
    return out_string

`mindnlp.transformers.models.llama.tokenization_llama.LlamaTokenizer.create_token_type_ids_from_sequences(token_ids_0, token_ids_1=None)` ¶

Creates a mask from the two sequences passed to be used in a sequence-pair classification task. An ALBERT sequence pair mask has the following format:

0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
| first sequence    | second sequence |

if token_ids_1 is None, only returns the first portion of the mask (0s).

PARAMETER	DESCRIPTION
`token_ids_0`	List of ids. TYPE: `List[int]`
`token_ids_1`	Optional second list of IDs for sequence pairs. TYPE: `List[int]`, optional DEFAULT: `None`

RETURNS	DESCRIPTION
`List[int]`	`List[int]`: List of token type IDs according to the given sequence(s).

Source code in mindnlp\transformers\models\llama\tokenization_llama.py

def create_token_type_ids_from_sequences(
    self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
) -> List[int]:
    """
    Creates a mask from the two sequences passed to be used in a sequence-pair classification task. An ALBERT
    sequence pair mask has the following format:

    ```
    0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
    | first sequence    | second sequence |
    ```

    if token_ids_1 is None, only returns the first portion of the mask (0s).

    Args:
        token_ids_0 (`List[int]`):
            List of ids.
        token_ids_1 (`List[int]`, *optional*):
            Optional second list of IDs for sequence pairs.

    Returns:
        `List[int]`: List of [token type IDs](../glossary#token-type-ids) according to the given sequence(s).
    """
    bos_token_id = [self.bos_token_id] if self.add_bos_token else []
    eos_token_id = [self.eos_token_id] if self.add_eos_token else []

    output = [0] * len(bos_token_id + token_ids_0 + eos_token_id)

    if token_ids_1 is not None:
        output += [1] * len(bos_token_id + token_ids_1 + eos_token_id)

    return output

`mindnlp.transformers.models.llama.tokenization_llama.LlamaTokenizer.get_special_tokens_mask(token_ids_0, token_ids_1=None, already_has_special_tokens=False)` ¶

Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding special tokens using the tokenizer prepare_for_model method.

PARAMETER	DESCRIPTION
`token_ids_0`	List of IDs. TYPE: `List[int]`
`token_ids_1`	Optional second list of IDs for sequence pairs. TYPE: `List[int]`, optional DEFAULT: `None`
`already_has_special_tokens`	Whether or not the token list is already formatted with special tokens for the model. TYPE: `bool`, optional, defaults to `False` DEFAULT: `False`

RETURNS	DESCRIPTION
`List[int]`	`List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.

Source code in mindnlp\transformers\models\llama\tokenization_llama.py

def get_special_tokens_mask(
    self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
) -> List[int]:
    """
    Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
    special tokens using the tokenizer `prepare_for_model` method.

    Args:
        token_ids_0 (`List[int]`):
            List of IDs.
        token_ids_1 (`List[int]`, *optional*):
            Optional second list of IDs for sequence pairs.
        already_has_special_tokens (`bool`, *optional*, defaults to `False`):
            Whether or not the token list is already formatted with special tokens for the model.

    Returns:
        `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
    """
    if already_has_special_tokens:
        return super().get_special_tokens_mask(
            token_ids_0=token_ids_0, token_ids_1=token_ids_1, already_has_special_tokens=True
        )

    bos_token_id = [1] if self.add_bos_token else []
    eos_token_id = [1] if self.add_eos_token else []

    if token_ids_1 is None:
        return bos_token_id + ([0] * len(token_ids_0)) + eos_token_id
    return (
        bos_token_id
        + ([0] * len(token_ids_0))
        + eos_token_id
        + bos_token_id
        + ([0] * len(token_ids_1))
        + eos_token_id
    )

`mindnlp.transformers.models.llama.tokenization_llama.LlamaTokenizer.get_vocab()` ¶

Returns vocab as a dict

Source code in mindnlp\transformers\models\llama\tokenization_llama.py

def get_vocab(self):
    """Returns vocab as a dict"""
    vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)}
    vocab.update(self.added_tokens_encoder)
    return vocab

`mindnlp.transformers.models.llama.tokenization_llama.LlamaTokenizer.save_vocabulary(save_directory, filename_prefix=None)` ¶

Save the vocabulary and special tokens file to a directory.

PARAMETER	DESCRIPTION
`save_directory`	The directory in which to save the vocabulary. TYPE: `str`

RETURNS	DESCRIPTION
`Tuple[str]`	`Tuple(str)`: Paths to the files saved.

Source code in mindnlp\transformers\models\llama\tokenization_llama.py

def save_vocabulary(self, save_directory, filename_prefix: Optional[str] = None) -> Tuple[str]:
    """
    Save the vocabulary and special tokens file to a directory.

    Args:
        save_directory (`str`):
            The directory in which to save the vocabulary.

    Returns:
        `Tuple(str)`: Paths to the files saved.
    """
    if not os.path.isdir(save_directory):
        logger.error(f"Vocabulary path ({save_directory}) should be a directory")
        return
    out_vocab_file = os.path.join(
        save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
    )

    if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file) and os.path.isfile(self.vocab_file):
        copyfile(self.vocab_file, out_vocab_file)
    elif not os.path.isfile(self.vocab_file):
        with open(out_vocab_file, "wb") as fi:
            content_spiece_model = self.sp_model.serialized_model_proto()
            fi.write(content_spiece_model)

    return (out_vocab_file,)

`mindnlp.transformers.models.llama.tokenization_llama.LlamaTokenizer.tokenize(text, **kwargs)` ¶

Converts a string to a list of tokens. If self.legacy is set to False, a prefix token is added unless the first token is special.

Source code in mindnlp\transformers\models\llama\tokenization_llama.py

def tokenize(self, text: "TextInput", **kwargs) -> List[str]:
    """
    Converts a string to a list of tokens. If `self.legacy` is set to `False`, a prefix token is added unless the
    first token is special.
    """
    if self.legacy or len(text) == 0:
        return super().tokenize(text, **kwargs)

    text = text.replace(SPIECE_UNDERLINE, " ")
    if self.add_prefix_space:
        text = SPIECE_UNDERLINE + text

    tokens = super().tokenize(text, **kwargs)

    if len(tokens) > 1 and tokens[0] == SPIECE_UNDERLINE and tokens[1] in self.all_special_tokens:
        tokens = tokens[1:]
    return tokens

`mindnlp.transformers.models.llama.tokenization_llama_fast` ¶

tokenization llama fast

`mindnlp.transformers.models.llama.tokenization_llama_fast.LlamaTokenizerFast` ¶

Bases: PreTrainedTokenizerFast

Construct a Llama tokenizer. Based on byte-level Byte-Pair-Encoding.

This uses notably ByteFallback and no normalization.

>>> from transformers import LlamaTokenizerFast

>>> tokenizer = LlamaTokenizerFast.from_pretrained("hf-internal-testing/llama-tokenizer")
>>> tokenizer.encode("Hello this is a test")
[1, 15043, 445, 338, 263, 1243]

If you want to change the bos_token or the eos_token, make sure to specify them when initializing the model, or call tokenizer.update_post_processor() to make sure that the post-processing is correctly done (otherwise the values of the first token and final token of an encoded sequence will not be correct). For more details, checkout [post-processors] (https://huggingface.co/docs/tokenizers/api/post-processors) documentation.

This tokenizer inherits from [PreTrainedTokenizerFast] which contains most of the main methods. Users should refer to this superclass for more information regarding those methods.

PARAMETER	DESCRIPTION
`vocab_file`	SentencePiece file (generally has a .model extension) that contains the vocabulary necessary to instantiate a tokenizer. TYPE: `str`, optional DEFAULT: `None`
`tokenizer_file`	tokenizers file (generally has a .json extension) that contains everything needed to load the tokenizer. TYPE: `str`, optional DEFAULT: `None`
`clean_up_tokenization_spaces`	Whether or not to cleanup spaces after decoding, cleanup consists in removing potential artifacts like extra spaces. TYPE: `bool`, optional, defaults to `False` DEFAULT: `False`
`unk_token`	The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this token instead. TYPE: `str` or `tokenizers.AddedToken`, optional, defaults to `"<unk>"` DEFAULT: `'<unk>'`
`bos_token`	The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token. TYPE: `str` or `tokenizers.AddedToken`, optional, defaults to `"<s>"` DEFAULT: `'<s>'`
`eos_token`	The end of sequence token. TYPE: `str` or `tokenizers.AddedToken`, optional, defaults to `"</s>"` DEFAULT: `'</s>'`
`add_bos_token`	Whether or not to add an `bos_token` at the start of sequences. TYPE: `bool`, optional, defaults to `True` DEFAULT: `True`
`add_eos_token`	Whether or not to add an `eos_token` at the end of sequences. TYPE: `bool`, optional, defaults to `False` DEFAULT: `False`
`use_default_system_prompt`	Whether or not the default system prompt for Llama should be used TYPE: `bool`, optional, defaults to `False` DEFAULT: `False`
`legacy`	Whether or not the `legacy` behavior of the tokenizer should be used. Legacy is before the merge of #24622 and #25224 which includes fixes to properly handle tokens that appear after special tokens. Make sure to also set `from_slow` to `True`. A simple example: `legacy=True`: `>>> from transformers import LlamaTokenizerFast >>> tokenizer = LlamaTokenizerFast.from_pretrained("huggyllama/llama-7b", legacy=True, from_slow=True) >>> tokenizer.encode("Hello <s>.") # 869 is '▁.' [1, 15043, 29871, 1, 869]` `legacy=False`: `>>> from transformers import LlamaTokenizerFast >>> tokenizer = LlamaTokenizerFast.from_pretrained("huggyllama/llama-7b", legacy=False, from_slow=True) >>> tokenizer.encode("Hello <s>.") # 29889 is '.' [1, 15043, 29871, 1, 29889]` Checkout the pull request for more details. TYPE: `bool`, optional DEFAULT: `None`
`add_prefix_space`	Whether or not the tokenizer should automatically add a prefix space TYPE: `bool`, optional DEFAULT: `None`

Source code in mindnlp\transformers\models\llama\tokenization_llama_fast.py

class LlamaTokenizerFast(PreTrainedTokenizerFast):
    """
    Construct a Llama tokenizer. Based on byte-level Byte-Pair-Encoding.

    This uses notably ByteFallback and no normalization.

    ```python
    >>> from transformers import LlamaTokenizerFast

    >>> tokenizer = LlamaTokenizerFast.from_pretrained("hf-internal-testing/llama-tokenizer")
    >>> tokenizer.encode("Hello this is a test")
    [1, 15043, 445, 338, 263, 1243]
    ```

    If you want to change the `bos_token` or the `eos_token`, make sure to specify them when initializing the model, or
    call `tokenizer.update_post_processor()` to make sure that the post-processing is correctly done (otherwise the
    values of the first token and final token of an encoded sequence will not be correct). For more details, checkout
    [post-processors] (https://huggingface.co/docs/tokenizers/api/post-processors) documentation.


    This tokenizer inherits from [`PreTrainedTokenizerFast`] which contains most of the main methods. Users should
    refer to this superclass for more information regarding those methods.

    Args:
        vocab_file (`str`, *optional*):
            [SentencePiece](https://github.com/google/sentencepiece) file (generally has a .model extension) that
            contains the vocabulary necessary to instantiate a tokenizer.
        tokenizer_file (`str`, *optional*):
            [tokenizers](https://github.com/huggingface/tokenizers) file (generally has a .json extension) that
            contains everything needed to load the tokenizer.
        clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
            Whether or not to cleanup spaces after decoding, cleanup consists in removing potential artifacts like
            extra spaces.
        unk_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to `"<unk>"`):
            The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
            token instead.
        bos_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to `"<s>"`):
            The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token.
        eos_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to `"</s>"`):
            The end of sequence token.
        add_bos_token (`bool`, *optional*, defaults to `True`):
            Whether or not to add an `bos_token` at the start of sequences.
        add_eos_token (`bool`, *optional*, defaults to `False`):
            Whether or not to add an `eos_token` at the end of sequences.
        use_default_system_prompt (`bool`, *optional*, defaults to `False`):
            Whether or not the default system prompt for Llama should be used
        legacy (`bool`, *optional*):
            Whether or not the `legacy` behavior of the tokenizer should be used. Legacy is before the merge of #24622
            and #25224 which includes fixes to properly handle tokens that appear after special tokens.
            Make sure to also set `from_slow` to `True`.
            A simple example:

            - `legacy=True`:
            ```python
            >>> from transformers import LlamaTokenizerFast

            >>> tokenizer = LlamaTokenizerFast.from_pretrained("huggyllama/llama-7b", legacy=True, from_slow=True)
            >>> tokenizer.encode("Hello <s>.") # 869 is '▁.'
            [1, 15043, 29871, 1, 869]
            ```
            - `legacy=False`:
            ```python
            >>> from transformers import LlamaTokenizerFast

            >>> tokenizer = LlamaTokenizerFast.from_pretrained("huggyllama/llama-7b", legacy=False, from_slow=True)
            >>> tokenizer.encode("Hello <s>.")  # 29889 is '.'
            [1, 15043, 29871, 1, 29889]
            ```
            Checkout the [pull request](https://github.com/huggingface/transformers/pull/24565) for more details.
        add_prefix_space (`bool`, *optional*):
            Whether or not the tokenizer should automatically add a prefix space
    """

    vocab_files_names = VOCAB_FILES_NAMES
    slow_tokenizer_class = LlamaTokenizer
    padding_side = "left"
    model_input_names = ["input_ids", "attention_mask"]

    def __init__(
        self,
        vocab_file=None,
        tokenizer_file=None,
        clean_up_tokenization_spaces=False,
        unk_token="<unk>",
        bos_token="<s>",
        eos_token="</s>",
        add_bos_token=True,
        add_eos_token=False,
        use_default_system_prompt=False,
        legacy=None,
        add_prefix_space=None,
        **kwargs,
    ):
        if legacy is None:
            logger.warning_once(
                f"You are using the default legacy behaviour of the {self.__class__}. This is"
                " expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you."
                " If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it"
                " means, and thoroughly read the reason why this was added as explained in"
                " https://github.com/huggingface/transformers/pull/24565 - if you loaded a llama tokenizer from a GGUF file"
                " you can ignore this message."
            )
            legacy = True
        self.legacy = legacy

        if add_prefix_space is not None:
            kwargs["from_slow"] = True

        super().__init__(
            vocab_file=vocab_file,
            tokenizer_file=tokenizer_file,
            clean_up_tokenization_spaces=clean_up_tokenization_spaces,
            unk_token=unk_token,
            bos_token=bos_token,
            eos_token=eos_token,
            add_bos_token=add_bos_token,
            add_eos_token=add_eos_token,
            use_default_system_prompt=use_default_system_prompt,
            add_prefix_space=add_prefix_space,
            legacy=legacy,
            **kwargs,
        )
        self._add_bos_token = add_bos_token
        self._add_eos_token = add_eos_token
        self.update_post_processor()
        self.use_default_system_prompt = use_default_system_prompt
        self.vocab_file = vocab_file

    @property
    def can_save_slow_tokenizer(self) -> bool:
        return os.path.isfile(self.vocab_file) if self.vocab_file else False

    def update_post_processor(self):
        """
        Updates the underlying post processor with the current `bos_token` and `eos_token`.
        """
        bos = self.bos_token
        bos_token_id = self.bos_token_id
        if bos is None and self.add_bos_token:
            raise ValueError("add_bos_token = True but bos_token = None")

        eos = self.eos_token
        eos_token_id = self.eos_token_id
        if eos is None and self.add_eos_token:
            raise ValueError("add_eos_token = True but eos_token = None")

        single = f"{(bos+':0 ') if self.add_bos_token else ''}$A:0{(' '+eos+':0') if self.add_eos_token else ''}"
        pair = f"{single}{(' '+bos+':1') if self.add_bos_token else ''} $B:1{(' '+eos+':1') if self.add_eos_token else ''}"

        special_tokens = []
        if self.add_bos_token:
            special_tokens.append((bos, bos_token_id))
        if self.add_eos_token:
            special_tokens.append((eos, eos_token_id))
        self._tokenizer.post_processor = processors.TemplateProcessing(
            single=single, pair=pair, special_tokens=special_tokens
        )

    @property
    def add_eos_token(self):
        return self._add_eos_token

    @property
    def add_bos_token(self):
        return self._add_bos_token

    @add_eos_token.setter
    def add_eos_token(self, value):
        self._add_eos_token = value
        self.update_post_processor()

    @add_bos_token.setter
    def add_bos_token(self, value):
        self._add_bos_token = value
        self.update_post_processor()

    def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
        if not self.can_save_slow_tokenizer:
            raise ValueError(
                "Your fast tokenizer does not have the necessary information to save the vocabulary for a slow "
                "tokenizer."
            )

        if not os.path.isdir(save_directory):
            logger.error(f"Vocabulary path ({save_directory}) should be a directory")
            return
        out_vocab_file = os.path.join(
            save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
        )

        if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file):
            copyfile(self.vocab_file, out_vocab_file)

        return (out_vocab_file,)

    # TODO ArthurZ let's rely on the template processor instead, refactor all fast tokenizers
    # Copied from transformers.models.llama.tokenization_llama.LlamaTokenizer.build_inputs_with_special_tokens
    def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
        bos_token_id = [self.bos_token_id] if self.add_bos_token else []
        eos_token_id = [self.eos_token_id] if self.add_eos_token else []

        output = bos_token_id + token_ids_0 + eos_token_id

        if token_ids_1 is not None:
            output = output + bos_token_id + token_ids_1 + eos_token_id

        return output

`mindnlp.transformers.models.llama.tokenization_llama_fast.LlamaTokenizerFast.update_post_processor()` ¶

Updates the underlying post processor with the current bos_token and eos_token.

Source code in mindnlp\transformers\models\llama\tokenization_llama_fast.py

def update_post_processor(self):
    """
    Updates the underlying post processor with the current `bos_token` and `eos_token`.
    """
    bos = self.bos_token
    bos_token_id = self.bos_token_id
    if bos is None and self.add_bos_token:
        raise ValueError("add_bos_token = True but bos_token = None")

    eos = self.eos_token
    eos_token_id = self.eos_token_id
    if eos is None and self.add_eos_token:
        raise ValueError("add_eos_token = True but eos_token = None")

    single = f"{(bos+':0 ') if self.add_bos_token else ''}$A:0{(' '+eos+':0') if self.add_eos_token else ''}"
    pair = f"{single}{(' '+bos+':1') if self.add_bos_token else ''} $B:1{(' '+eos+':1') if self.add_eos_token else ''}"

    special_tokens = []
    if self.add_bos_token:
        special_tokens.append((bos, bos_token_id))
    if self.add_eos_token:
        special_tokens.append((eos, eos_token_id))
    self._tokenizer.post_processor = processors.TemplateProcessing(
        single=single, pair=pair, special_tokens=special_tokens
    )

`mindnlp.transformers.models.llama.tokenization_code_llama` ¶

Tokenization classes for Code LLaMA.

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer` ¶

Bases: PreTrainedTokenizer

Construct a CodeLlama tokenizer. Based on byte-level Byte-Pair-Encoding. The default padding token is unset as there is no padding token in the original model.

The default configuration match that of codellama/CodeLlama-7b-Instruct-hf which supports prompt infilling.

PARAMETER	DESCRIPTION
`vocab_file`	Path to the vocabulary file. TYPE: `str`
`unk_token`	The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this token instead. TYPE: `str`, optional, defaults to `"<unk>"` DEFAULT: `'<unk>'`
`bos_token`	The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token. TYPE: `str`, optional, defaults to `"<s>"` DEFAULT: `'<s>'`
`eos_token`	The end of sequence token. When building a sequence using special tokens, this is not the token that is used for the end of sequence. The token used is the `sep_token`. TYPE: `str`, optional, defaults to `"</s>"` DEFAULT: `'</s>'`
`prefix_token`	Prefix token used for infilling. TYPE: `str`, optional, defaults to `"▁<PRE>"` DEFAULT: `'▁<PRE>'`
`middle_token`	Middle token used for infilling. TYPE: `str`, optional, defaults to `"▁<MID>"` DEFAULT: `'▁<MID>'`
`suffix_token`	Suffix token used for infilling. TYPE: `str`, optional, defaults to `"▁<SUF>"` DEFAULT: `'▁<SUF>'`
`eot_token`	End of text token used for infilling. TYPE: `str`, optional, defaults to `"▁<EOT>"` DEFAULT: `'▁<EOT>'`
`fill_token`	The token used to split the input between the prefix and suffix. TYPE: `str`, optional, defaults to `"<FILL_ME>"` DEFAULT: `'<FILL_ME>'`
`suffix_first`	Whether the input prompt and suffix should be formatted with the suffix first. TYPE: `bool`, optional, defaults to `False` DEFAULT: `False`
`sp_model_kwargs`	Will be passed to the `SentencePieceProcessor.__init__()` method. The Python wrapper for SentencePiece can be used, among other things, to set: `enable_sampling`: Enable subword regularization. `nbest_size`: Sampling parameters for unigram. Invalid for BPE-Dropout. `nbest_size = {0,1}`: No sampling is performed. `nbest_size > 1`: samples from the nbest_size results. `nbest_size < 0`: assuming that nbest_size is infinite and samples from the all hypothesis (lattice) using forward-filtering-and-backward-sampling algorithm. `alpha`: Smoothing parameter for unigram sampling, and dropout probability of merge operations for BPE-dropout. TYPE: `dict`, optional DEFAULT: `None`
`add_bos_token`	Whether to add a beginning of sequence token at the start of sequences. TYPE: `bool`, optional, defaults to `True` DEFAULT: `True`
`add_eos_token`	Whether to add an end of sequence token at the end of sequences. TYPE: `bool`, optional, defaults to `False` DEFAULT: `False`
`clean_up_tokenization_spaces`	Whether or not to clean up the tokenization spaces. TYPE: `bool`, optional, defaults to `False` DEFAULT: `False`
`additional_special_tokens`	Additional special tokens used by the tokenizer. TYPE: `List[str]`, optional DEFAULT: `None`
`use_default_system_prompt`	Whether or not the default system prompt for Llama should be used. TYPE: `bool`, optional, defaults to `False` DEFAULT: `False`

Source code in mindnlp\transformers\models\llama\tokenization_code_llama.py

class CodeLlamaTokenizer(PreTrainedTokenizer):
    """
    Construct a CodeLlama tokenizer. Based on byte-level Byte-Pair-Encoding. The default padding token is unset as
    there is no padding token in the original model.

    The default configuration match that of
    [codellama/CodeLlama-7b-Instruct-hf](https://hf-mirror.com/codellama/CodeLlama-7b-Instruct-hf/blob/main/tokenizer_config.json)
    which supports prompt infilling.

    Args:
        vocab_file (`str`):
            Path to the vocabulary file.
        unk_token (`str`, *optional*, defaults to `"<unk>"`):
            The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
            token instead.
        bos_token (`str`, *optional*, defaults to `"<s>"`):
            The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token.
        eos_token (`str`, *optional*, defaults to `"</s>"`):
            The end of sequence token.

            <Tip>

            When building a sequence using special tokens, this is not the token that is used for the end of sequence.
            The token used is the `sep_token`.

            </Tip>

        prefix_token (`str`, *optional*, defaults to `"▁<PRE>"`):
            Prefix token used for infilling.
        middle_token (`str`, *optional*, defaults to `"▁<MID>"`):
            Middle token used for infilling.
        suffix_token (`str`, *optional*, defaults to `"▁<SUF>"`):
            Suffix token used for infilling.
        eot_token (`str`, *optional*, defaults to `"▁<EOT>"`):
            End of text token used for infilling.
        fill_token (`str`, *optional*, defaults to `"<FILL_ME>"`):
            The token used to split the input between the prefix and suffix.
        suffix_first (`bool`, *optional*, defaults to `False`):
            Whether the input prompt and suffix should be formatted with the suffix first.
        sp_model_kwargs (`dict`, *optional*):
            Will be passed to the `SentencePieceProcessor.__init__()` method. The [Python wrapper for
            SentencePiece](https://github.com/google/sentencepiece/tree/master/python) can be used, among other things,
            to set:

            - `enable_sampling`: Enable subword regularization.
            - `nbest_size`: Sampling parameters for unigram. Invalid for BPE-Dropout.

                - `nbest_size = {0,1}`: No sampling is performed.
                - `nbest_size > 1`: samples from the nbest_size results.
                - `nbest_size < 0`: assuming that nbest_size is infinite and samples from the all hypothesis (lattice)
                using forward-filtering-and-backward-sampling algorithm.
            - `alpha`: Smoothing parameter for unigram sampling, and dropout probability of merge operations for
            BPE-dropout.
        add_bos_token (`bool`, *optional*, defaults to `True`):
            Whether to add a beginning of sequence token at the start of sequences.
        add_eos_token (`bool`, *optional*, defaults to `False`):
            Whether to add an end of sequence token at the end of sequences.
        clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
            Whether or not to clean up the tokenization spaces.
        additional_special_tokens (`List[str]`, *optional*):
            Additional special tokens used by the tokenizer.
        use_default_system_prompt (`bool`, *optional*, defaults to `False`):
            Whether or not the default system prompt for Llama should be used.
    """
    vocab_files_names = VOCAB_FILES_NAMES
    pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
    max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
    model_input_names = ["input_ids", "attention_mask"]

    def __init__(
        self,
        vocab_file,
        unk_token="<unk>",
        bos_token="<s>",
        eos_token="</s>",
        prefix_token="▁<PRE>",
        middle_token="▁<MID>",
        suffix_token="▁<SUF>",
        eot_token="▁<EOT>",
        fill_token="<FILL_ME>",
        suffix_first=False,
        sp_model_kwargs: Optional[Dict[str, Any]] = None,
        add_bos_token=True,
        add_eos_token=False,
        clean_up_tokenization_spaces=False,
        additional_special_tokens=None,
        use_default_system_prompt=False,
        **kwargs,
    ):
        """
        This method initializes an instance of the CodeLlamaTokenizer class.

        Args:
            self: The instance of the class.
            vocab_file (str): The path to the vocabulary file.
            unk_token (str, optional): The unknown token, default is '<unk>'.
            bos_token (str, optional): The beginning of sequence token, default is '<s>'.
            eos_token (str, optional): The end of sequence token, default is '</s>'.
            prefix_token (str, optional): The prefix token, default is '▁<PRE>'.
            middle_token (str, optional): The middle token, default is '▁<MID>'.
            suffix_token (str, optional): The suffix token, default is '▁<SUF>'.
            eot_token (str, optional): The end of text token, default is '▁<EOT>'.
            fill_token (str, optional): The fill token, default is '<FILL_ME>'.
            suffix_first (bool): Indicates whether suffix comes before prefix.
            sp_model_kwargs (Optional[Dict[str, Any]], optional): Additional arguments for the sentencepiece model.
            add_bos_token (bool, optional): Whether to add the bos token, default is True.
            add_eos_token (bool, optional): Whether to add the eos token, default is False.
            clean_up_tokenization_spaces (bool, optional): Whether to clean up tokenization spaces, default is False.
            additional_special_tokens (list, optional): Additional special tokens to include.
            use_default_system_prompt (bool, optional): Whether to use the default system prompt.

        Returns:
            None.

        Raises:
            MissingBackendError: If the required backend 'protobuf' is not available.
        """
        requires_backends(self, "protobuf")
        self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
        bos_token = AddedToken(bos_token, normalized=False, special=True) if isinstance(bos_token, str) else bos_token
        eos_token = AddedToken(eos_token, normalized=False, special=True) if isinstance(eos_token, str) else eos_token
        unk_token = AddedToken(unk_token, normalized=False, special=True) if isinstance(unk_token, str) else unk_token

        self.use_default_system_prompt = use_default_system_prompt
        # mark tokens special to skip them
        additional_special_tokens = additional_special_tokens or []
        for token in [prefix_token, middle_token, suffix_token, eot_token]:
            additional_special_tokens += [token] if token is not None else []

        self.vocab_file = vocab_file
        self.add_bos_token = add_bos_token
        self.add_eos_token = add_eos_token
        self._prefix_token = prefix_token
        self._middle_token = middle_token
        self._suffix_token = suffix_token
        self._eot_token = eot_token
        self.fill_token = fill_token
        self.suffix_first = suffix_first
        self.sp_model = self.get_spm_processor()

        super().__init__(
            bos_token=bos_token,
            eos_token=eos_token,
            unk_token=unk_token,
            add_bos_token=add_bos_token,
            add_eos_token=add_eos_token,
            prefix_token=prefix_token,
            middle_token=middle_token,
            suffix_token=suffix_token,
            eot_token=eot_token,
            fill_token=fill_token,
            sp_model_kwargs=self.sp_model_kwargs,
            suffix_first=suffix_first,
            clean_up_tokenization_spaces=clean_up_tokenization_spaces,
            additional_special_tokens=additional_special_tokens,
            use_default_system_prompt=use_default_system_prompt,
            **kwargs,
        )

    @property
    def unk_token_length(self):
        """
        Returns the length of the unknown token in the CodeLlamaTokenizer.

        Args:
            self (CodeLlamaTokenizer): An instance of the CodeLlamaTokenizer class.

        Returns:
            int: The length of the unknown token. If the unknown token is not found, it returns 0.

        Raises:
            None.

        """
        return len(self.sp_model.encode(str(self.unk_token)))

    def get_spm_processor(self):
        """
        This method initializes and returns a SentencePieceProcessor object for tokenizing text using
        the SentencePiece library.

        Args:
            self: The instance of the CodeLlamaTokenizer class.

        Returns:
            spm.SentencePieceProcessor: A tokenizer object of type spm.SentencePieceProcessor.

        Raises:
            None:
                However, potential exceptions that may occur during the method execution include:

                - FileNotFoundError: If the specified vocab_file cannot be found.
                - IOError: If there are issues with reading the vocab_file.
                - ValueError: If the provided sp_model_kwargs are invalid or missing required information.
                - Any other relevant exceptions that may occur during the loading and initialization of the tokenizer.
        """
        tokenizer = spm.SentencePieceProcessor(**self.sp_model_kwargs)
        with open(self.vocab_file, "rb") as f:
            sp_model = f.read()
            model_pb2 = import_protobuf()
            model = model_pb2.ModelProto.FromString(sp_model)
            normalizer_spec = model_pb2.NormalizerSpec()
            normalizer_spec.add_dummy_prefix = False
            model.normalizer_spec.MergeFrom(normalizer_spec)
            sp_model = model.SerializeToString()
            tokenizer.LoadFromSerializedProto(sp_model)
        return tokenizer

    @property
    def prefix_token(self):
        """
        Returns the prefix token used for tokenizing code in the CodeLlamaTokenizer class.

        Args:
            self: An instance of the CodeLlamaTokenizer class.

        Returns:
            None.

        Raises:
            None.

        This method retrieves the prefix token that is used for tokenizing code in the CodeLlamaTokenizer class.
        The prefix token serves as a marker or indicator to identify the start of a code block or expression.
        It is used during the tokenization process to correctly identify and separate different parts of the code.

        Note that the prefix token is an internal attribute of the CodeLlamaTokenizer class, and it is not meant to
        be modified directly. To change the prefix token, use the appropriate setter method or modify the underlying
        implementation of the class if necessary.

        Example:
            ```python
            >>> tokenizer = CodeLlamaTokenizer()
            >>> prefix = tokenizer.prefix_token
            >>> print(prefix)
            >>> # Output: '>>'
            ```
        """
        return self._prefix_token

    @property
    def prefix_id(self):
        """
        Method to retrieve the ID associated with the prefix token in the CodeLlamaTokenizer class.

        Args:
            self (CodeLlamaTokenizer): The instance of the CodeLlamaTokenizer class.

        Returns:
            None: If the prefix token is None, the method returns None.
                Otherwise, it returns the ID associated with the prefix token.

        Raises:
            None
        """
        if self._prefix_token is None:
            return None
        return self.convert_tokens_to_ids(self.prefix_token)

    @property
    def middle_token(self):
        """
        This method 'middle_token' is a property method defined in the class 'CodeLlamaTokenizer' that
        retrieves the middle token stored in the instance.

        Args:
            self (CodeLlamaTokenizer): The instance of the CodeLlamaTokenizer class.
                This parameter refers to the current instance of the class.

        Returns:
            None: This method returns the middle token stored in the instance.
                If no middle token is set, it returns None.

        Raises:
            None.
        """
        return self._middle_token

    @property
    def middle_id(self):
        """
        Get the middle ID of the CodeLlamaTokenizer instance.

        Args:
            self (CodeLlamaTokenizer): The instance of the CodeLlamaTokenizer class.

        Returns:
            None: If the middle token is None.

        Raises:
            None.

        This method returns the middle ID of the CodeLlamaTokenizer instance.
        If the middle token is None, it returns None.
        The middle ID is obtained by converting the middle token to its corresponding ID using the
        'convert_tokens_to_ids' method.
        """
        if self._middle_token is None:
            return None
        return self.convert_tokens_to_ids(self.middle_token)

    @property
    def suffix_token(self):
        """
        Method to retrieve the suffix token associated with the CodeLlamaTokenizer instance.

        Args:
            self (CodeLlamaTokenizer): The instance of CodeLlamaTokenizer.
                This parameter refers to the instance of the CodeLlamaTokenizer class on which the method is being called.

        Returns:
            None: This method returns the suffix token corresponding to the CodeLlamaTokenizer instance.
                The suffix token is a property value associated with the instance.

        Raises:
            None
        """
        return self._suffix_token

    @property
    def suffix_id(self):
        """
        Returns the ID of the suffix token.

        Args:
            self (CodeLlamaTokenizer): The instance of the CodeLlamaTokenizer class.

        Returns:
            None: If the suffix token is None.

        Raises:
            None.

        This method retrieves the ID corresponding to the suffix token.
        If the suffix token is None, the method returns None.
        The suffix token is obtained by converting the suffix token to its corresponding ID using
        the convert_tokens_to_ids method.
        """
        if self._suffix_token is None:
            return None
        return self.convert_tokens_to_ids(self.suffix_token)

    @property
    def eot_token(self):
        """
        This method 'eot_token' in the class 'CodeLlamaTokenizer' retrieves the end-of-text token.

        Args:
            self (CodeLlamaTokenizer): The instance of the CodeLlamaTokenizer class.

        Returns:
            None: This method returns the end-of-text token value stored in the instance.

        Raises:
            None.
        """
        return self._eot_token

    @property
    def eot_id(self):
        """
        This method 'eot_id' is a property in the 'CodeLlamaTokenizer' class.

        Args:
            self: The instance of the 'CodeLlamaTokenizer' class.

        Returns:
            None: If the '_eot_token' attribute is None, the method returns None.
            int: If the '_eot_token' attribute is not None, the method returns the integer value obtained
                by converting the token to its corresponding ID using the 'convert_tokens_to_ids' method.

        Raises:
            No specific exceptions are raised by this method.
        """
        if self._eot_token is None:
            return None
        return self.convert_tokens_to_ids(self.eot_token)

    @property
    def vocab_size(self):
        """Returns vocab size"""
        return self.sp_model.get_piece_size()

    # Copied from transformers.models.llama.tokenization_llama.LlamaTokenizer.get_vocab
    def get_vocab(self):
        """Returns vocab as a dict"""
        vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)}
        vocab.update(self.added_tokens_encoder)
        return vocab

    def tokenize(self, prefix, suffix=None, suffix_first=False, **kwargs) -> List[int]:
        """
        Tokenizes the given prefix and suffix to generate a list of integers representing tokens.

        Args:
            self (CodeLlamaTokenizer): An instance of the CodeLlamaTokenizer class.
            prefix (str): The prefix string to tokenize.
            suffix (str, optional): The suffix string to tokenize. Defaults to None.
            suffix_first (bool, optional): Flag indicating whether to place the suffix before the prefix.
                Defaults to False.

        Returns:
            List[int]: A list of integers representing the tokens generated from the prefix and suffix.

        Raises:
            ValueError: If the input includes a prefix and a suffix used for the infilling task,
                or if the model does not support infilling.

        Note:
            - The `prefix` and `suffix` parameters are used to split the input on the `fill_token` token to
            create a suffix and prefix.
            - If only a prefix is provided, the method tokenizes the prefix and returns the resulting tokens.
            - If a prefix and suffix are provided, the method tokenizes both and returns the tokens in the
            specified order.
            - The `suffix_first` parameter takes precedence over the class attribute `suffix_first`
            if both are provided.
            - The method removes special tokens from the beginning of the tokens list if they match the
            specified conditions.
            - The method replaces occurrences of the `SPIECE_UNDERLINE` token in the prefix with a space.
        """
        # add a prefix space to `prefix`
        if self.fill_token is not None and self.fill_token in prefix and suffix is None:
            prefix, suffix = prefix.split(self.fill_token)

        if len(prefix) > 0:
            prefix = SPIECE_UNDERLINE + prefix.replace(SPIECE_UNDERLINE, " ")

        if suffix is None or len(suffix) < 1:
            tokens = super().tokenize(prefix, **kwargs)
            if len(tokens) > 1 and tokens[0] == SPIECE_UNDERLINE and tokens[1] in self.all_special_tokens:
                tokens = tokens[1:]
            return tokens

        prefix_tokens = self._tokenize(prefix)  # prefix has an extra `SPIECE_UNDERLINE`

        if None in (self.prefix_id, self.middle_id, self.suffix_id):
            raise ValueError(
                "The input either includes a `prefix` and a `suffix` used for the infilling task,"
                f"  or can be split on the {self.fill_token} token, creating a suffix and prefix,"
                " but the model does not support `infilling`."
            )
        suffix_tokens = self._tokenize(suffix)  # make sure CodeLlama sp model does not mess up

        suffix_first = suffix_first if suffix_first is not None else self.suffix_first
        if suffix_first:
            # format as " <PRE> <SUF>{suf} <MID> {pre}"
            return [self.prefix_token, self.suffix_token] + suffix_tokens + [self.middle_token] + prefix_tokens
        # format as " <PRE> {pre} <SUF>{suf} <MID>"
        return [self.prefix_token] + prefix_tokens + [self.suffix_token] + suffix_tokens + [self.middle_token]

    def _tokenize(self, text, **kwargs):
        """
        Returns a tokenized string.

        We de-activated the `add_dummy_prefix` option, thus the sentencepiece internals will always strip any
        SPIECE_UNDERLINE. For example: `self.sp_model.encode(f"{SPIECE_UNDERLINE}Hey", out_type = str)` will give
        `['H', 'e', 'y']` instead of `['▁He', 'y']`. Thus we always encode `f"{unk_token}text"` and strip the
        `unk_token`. Here is an example with `unk_token = "<unk>"` and `unk_token_length = 4`.
        `self.tokenizer.sp_model.encode("<unk> Hey", out_type = str)[4:]`.
        """
        tokens = self.sp_model.encode(text, out_type=str)
        if not text.startswith((SPIECE_UNDERLINE, " ")):
            return tokens
        # 1. Encode string + prefix ex: "<unk> Hey"
        tokens = self.sp_model.encode(self.unk_token + text, out_type=str)
        # 2. Remove self.unk_token from ['<','unk','>', '▁Hey']
        return tokens[self.unk_token_length :] if len(tokens) >= self.unk_token_length else tokens

    # Copied from transformers.models.llama.tokenization_llama.LlamaTokenizer._convert_token_to_id
    def _convert_token_to_id(self, token):
        """Converts a token (str) in an id using the vocab."""
        return self.sp_model.piece_to_id(token)

    # Copied from transformers.models.llama.tokenization_llama.LlamaTokenizer._convert_id_to_token
    def _convert_id_to_token(self, index):
        """Converts an index (integer) in a token (str) using the vocab."""
        token = self.sp_model.IdToPiece(index)
        return token

    def convert_tokens_to_string(self, tokens):
        """Converts a sequence of tokens (string) in a single string."""
        # since we manually add the prefix space, we have to remove it when decoding
        if tokens[0].startswith(SPIECE_UNDERLINE):
            tokens[0] = tokens[0][1:]

        current_sub_tokens = []
        out_string = ""
        for _, token in enumerate(tokens):
            # make sure that special tokens are not decoded using sentencepiece model
            if token in self.all_special_tokens:
                out_string += self.sp_model.decode(current_sub_tokens) + token
                current_sub_tokens = []
            else:
                current_sub_tokens.append(token)
        out_string += self.sp_model.decode(current_sub_tokens)
        return out_string

    # Copied from transformers.models.llama.tokenization_llama.LlamaTokenizer.save_vocabulary
    def save_vocabulary(self, save_directory, filename_prefix: Optional[str] = None) -> Tuple[str]:
        """
        Save the vocabulary and special tokens file to a directory.

        Args:
            save_directory (`str`):
                The directory in which to save the vocabulary.

        Returns:
            `Tuple(str)`: Paths to the files saved.
        """
        if not os.path.isdir(save_directory):
            logger.error(f"Vocabulary path ({save_directory}) should be a directory")
            return
        out_vocab_file = os.path.join(
            save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
        )

        if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file) and os.path.isfile(self.vocab_file):
            copyfile(self.vocab_file, out_vocab_file)
        elif not os.path.isfile(self.vocab_file):
            with open(out_vocab_file, "wb") as fi:
                content_spiece_model = self.sp_model.serialized_model_proto()
                fi.write(content_spiece_model)

        return (out_vocab_file,)

    # Copied from transformers.models.llama.tokenization_llama.LlamaTokenizer.build_inputs_with_special_tokens
    def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
        """
        Method to build inputs with special tokens in the CodeLlamaTokenizer class.

        Args:
            self: Reference to the current instance of the class.
            token_ids_0 (list): List of token IDs for the first input sequence.
            token_ids_1 (list, optional): List of token IDs for the second input sequence. Defaults to None.

        Returns:
            list: A list representing the input sequences with special tokens added based on the configuration settings.

        Raises:
            None.
        """
        bos_token_id = [self.bos_token_id] if self.add_bos_token else []
        eos_token_id = [self.eos_token_id] if self.add_eos_token else []

        output = bos_token_id + token_ids_0 + eos_token_id

        if token_ids_1 is not None:
            output = output + bos_token_id + token_ids_1 + eos_token_id

        return output

    # Copied from transformers.models.llama.tokenization_llama.LlamaTokenizer.get_special_tokens_mask
    def get_special_tokens_mask(
        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
    ) -> List[int]:
        """
        Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
        special tokens using the tokenizer `prepare_for_model` method.

        Args:
            token_ids_0 (`List[int]`):
                List of IDs.
            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.
            already_has_special_tokens (`bool`, *optional*, defaults to `False`):
                Whether or not the token list is already formatted with special tokens for the model.

        Returns:
            `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
        """
        if already_has_special_tokens:
            return super().get_special_tokens_mask(
                token_ids_0=token_ids_0, token_ids_1=token_ids_1, already_has_special_tokens=True
            )

        bos_token_id = [1] if self.add_bos_token else []
        eos_token_id = [1] if self.add_eos_token else []

        if token_ids_1 is None:
            return bos_token_id + ([0] * len(token_ids_0)) + eos_token_id
        return (
            bos_token_id
            + ([0] * len(token_ids_0))
            + eos_token_id
            + bos_token_id
            + ([0] * len(token_ids_1))
            + eos_token_id
        )

    # Copied from transformers.models.llama.tokenization_llama.LlamaTokenizer.create_token_type_ids_from_sequences
    def create_token_type_ids_from_sequences(
        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
    ) -> List[int]:
        """
        Creates a mask from the two sequences passed to be used in a sequence-pair classification task. An ALBERT
        sequence pair mask has the following format:
        ```
        0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
        | first sequence    | second sequence |
        ```

        if token_ids_1 is None, only returns the first portion of the mask (0s).

        Args:
            token_ids_0 (`List[int]`):
                List of ids.
            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.

        Returns:
            `List[int]`: List of [token type IDs](../glossary#token-type-ids) according to the given sequence(s).
        """
        bos_token_id = [self.bos_token_id] if self.add_bos_token else []
        eos_token_id = [self.eos_token_id] if self.add_eos_token else []

        output = [0] * len(bos_token_id + token_ids_0 + eos_token_id)

        if token_ids_1 is not None:
            output += [1] * len(bos_token_id + token_ids_1 + eos_token_id)

        return output

    @property
    # Copied from transformers.models.llama.tokenization_llama.LlamaTokenizer.default_chat_template
    def default_chat_template(self):
        """
        LLaMA uses [INST] and [/INST] to indicate user messages, and <<SYS>> and <</SYS>> to indicate system messages.
        Assistant messages do not have special tokens, because LLaMA chat models are generally trained with strict
        user/assistant/user/assistant message ordering, and so assistant messages can be identified from the ordering
        rather than needing special tokens. The system message is partly 'embedded' in the first user message, which
        results in an unusual token ordering when it is present. This template should definitely be changed if you wish
        to fine-tune a model with more flexible role ordering!

        The output should look something like:

            <bos>[INST] B_SYS SystemPrompt E_SYS Prompt [/INST] Answer <eos><bos>[INST] Prompt [/INST] Answer <eos>
            <bos>[INST] Prompt [/INST]

        The reference for this chat template is [this code
        snippet](https://github.com/facebookresearch/llama/blob/556949fdfb72da27c2f4a40b7f0e4cf0b8153a28/llama/generation.py#L320-L362)
        in the original repository.
        """
        logger.warning_once(
            "\nNo chat template is defined for this tokenizer - using the default template "
            f"for the {self.__class__.__name__} class. If the default is not appropriate for "
            "your model, please set `tokenizer.chat_template` to an appropriate template. "
            "See https://hf-mirror.com/docs/transformers/main/chat_templating for more information.\n"
        )
        template = (
            "{% if messages[0]['role'] == 'system' %}"
            "{% set loop_messages = messages[1:] %}"  # Extract system message if it's present
            "{% set system_message = messages[0]['content'] %}"
            "{% elif USE_DEFAULT_PROMPT == true and not '<<SYS>>' in messages[0]['content'] %}"
            "{% set loop_messages = messages %}"  # Or use the default system message if the flag is set
            "{% set system_message = 'DEFAULT_SYSTEM_MESSAGE' %}"
            "{% else %}"
            "{% set loop_messages = messages %}"
            "{% set system_message = false %}"
            "{% endif %}"
            "{% for message in loop_messages %}"  # Loop over all non-system messages
            "{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}"
            "{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}"
            "{% endif %}"
            "{% if loop.index0 == 0 and system_message != false %}"  # Embed system message in first message
            "{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}"
            "{% else %}"
            "{% set content = message['content'] %}"
            "{% endif %}"
            "{% if message['role'] == 'user' %}"  # After all of that, handle messages/roles in a fairly normal way
            "{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}"
            "{% elif message['role'] == 'system' %}"
            "{{ '<<SYS>>\\n' + content.strip() + '\\n<</SYS>>\\n\\n' }}"
            "{% elif message['role'] == 'assistant' %}"
            "{{ ' '  + content.strip() + ' ' + eos_token }}"
            "{% endif %}"
            "{% endfor %}"
        )
        template = template.replace("USE_DEFAULT_PROMPT", "true" if self.use_default_system_prompt else "false")
        default_message = DEFAULT_SYSTEM_PROMPT.replace("\n", "\\n").replace("'", "\\'")
        template = template.replace("DEFAULT_SYSTEM_MESSAGE", default_message)

        return template

    def __getstate__(self):
        """
        Method: __getstate__

        Description:
            This method is used to retrieve the state of the CodeLlamaTokenizer object for serialization purposes.
            It returns a dictionary representing the current state of the object.

        Args:
            self: The instance of the CodeLlamaTokenizer class.

        Returns:
            None: This method does not return any value. Instead, it modifies the state dictionary and returns None.

        Raises:
            None.
        """
        state = self.__dict__.copy()
        state["sp_model"] = None
        state["sp_model_proto"] = self.sp_model.serialized_model_proto()
        return state

    def __setstate__(self, d):
        """
        This method '__setstate__' is defined within the 'CodeLlamaTokenizer' class to set the internal state of the
        object based on the provided dictionary 'd'. It reforwards the object's state including the SentencePiece
        model by loading it from a serialized proto.

        Args:
            self (CodeLlamaTokenizer): The instance of the CodeLlamaTokenizer class.
            d (dict): A dictionary containing the state information to be set.
                It should include the necessary attributes for the object's state reforwardion.

        Returns:
            None: This method does not return any value explicitly.
                It operates by modifying the internal state of the object.

        Raises:
            None:
                However, potential exceptions that could be raised during the execution may include but are not limited to:

                - TypeError: If the input 'd' is not a dictionary.
                - ValueError: If the input 'd' does not contain the required state information.
                - Any exceptions related to the SentencePieceProcessor initialization or loading process.
        """
        self.__dict__ = d
        self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
        self.sp_model.LoadFromSerializedProto(self.sp_model_proto)

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.default_chat_template` `property` ¶

LLaMA uses [INST] and [/INST] to indicate user messages, and <> and <> to indicate system messages. Assistant messages do not have special tokens, because LLaMA chat models are generally trained with strict user/assistant/user/assistant message ordering, and so assistant messages can be identified from the ordering rather than needing special tokens. The system message is partly 'embedded' in the first user message, which results in an unusual token ordering when it is present. This template should definitely be changed if you wish to fine-tune a model with more flexible role ordering!

The output should look something like:

<bos>[INST] B_SYS SystemPrompt E_SYS Prompt [/INST] Answer <eos><bos>[INST] Prompt [/INST] Answer <eos>
<bos>[INST] Prompt [/INST]

The reference for this chat template is this code snippet in the original repository.

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.eot_id` `property` ¶

This method 'eot_id' is a property in the 'CodeLlamaTokenizer' class.

PARAMETER	DESCRIPTION
`self`	The instance of the 'CodeLlamaTokenizer' class.

RETURNS	DESCRIPTION
`None`	If the '_eot_token' attribute is None, the method returns None.
`int`	If the '_eot_token' attribute is not None, the method returns the integer value obtained by converting the token to its corresponding ID using the 'convert_tokens_to_ids' method.

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.eot_token` `property` ¶

This method 'eot_token' in the class 'CodeLlamaTokenizer' retrieves the end-of-text token.

PARAMETER	DESCRIPTION
`self`	The instance of the CodeLlamaTokenizer class. TYPE: `CodeLlamaTokenizer`

RETURNS	DESCRIPTION
`None`	This method returns the end-of-text token value stored in the instance.

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.middle_id` `property` ¶

Get the middle ID of the CodeLlamaTokenizer instance.

PARAMETER	DESCRIPTION
`self`	The instance of the CodeLlamaTokenizer class. TYPE: `CodeLlamaTokenizer`

RETURNS	DESCRIPTION
`None`	If the middle token is None.

This method returns the middle ID of the CodeLlamaTokenizer instance. If the middle token is None, it returns None. The middle ID is obtained by converting the middle token to its corresponding ID using the 'convert_tokens_to_ids' method.

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.middle_token` `property` ¶

This method 'middle_token' is a property method defined in the class 'CodeLlamaTokenizer' that retrieves the middle token stored in the instance.

PARAMETER	DESCRIPTION
`self`	The instance of the CodeLlamaTokenizer class. This parameter refers to the current instance of the class. TYPE: `CodeLlamaTokenizer`

RETURNS	DESCRIPTION
`None`	This method returns the middle token stored in the instance. If no middle token is set, it returns None.

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.prefix_id` `property` ¶

Method to retrieve the ID associated with the prefix token in the CodeLlamaTokenizer class.

PARAMETER	DESCRIPTION
`self`	The instance of the CodeLlamaTokenizer class. TYPE: `CodeLlamaTokenizer`

RETURNS	DESCRIPTION
`None`	If the prefix token is None, the method returns None. Otherwise, it returns the ID associated with the prefix token.

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.prefix_token` `property` ¶

Returns the prefix token used for tokenizing code in the CodeLlamaTokenizer class.

PARAMETER	DESCRIPTION
`self`	An instance of the CodeLlamaTokenizer class.

RETURNS	DESCRIPTION
	None.

This method retrieves the prefix token that is used for tokenizing code in the CodeLlamaTokenizer class. The prefix token serves as a marker or indicator to identify the start of a code block or expression. It is used during the tokenization process to correctly identify and separate different parts of the code.

Note that the prefix token is an internal attribute of the CodeLlamaTokenizer class, and it is not meant to be modified directly. To change the prefix token, use the appropriate setter method or modify the underlying implementation of the class if necessary.

Example

>>> tokenizer = CodeLlamaTokenizer()
>>> prefix = tokenizer.prefix_token
>>> print(prefix)
>>> # Output: '>>'

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.suffix_id` `property` ¶

Returns the ID of the suffix token.

PARAMETER	DESCRIPTION
`self`	The instance of the CodeLlamaTokenizer class. TYPE: `CodeLlamaTokenizer`

RETURNS	DESCRIPTION
`None`	If the suffix token is None.

This method retrieves the ID corresponding to the suffix token. If the suffix token is None, the method returns None. The suffix token is obtained by converting the suffix token to its corresponding ID using the convert_tokens_to_ids method.

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.suffix_token` `property` ¶

Method to retrieve the suffix token associated with the CodeLlamaTokenizer instance.

PARAMETER	DESCRIPTION
`self`	The instance of CodeLlamaTokenizer. This parameter refers to the instance of the CodeLlamaTokenizer class on which the method is being called. TYPE: `CodeLlamaTokenizer`

RETURNS	DESCRIPTION
`None`	This method returns the suffix token corresponding to the CodeLlamaTokenizer instance. The suffix token is a property value associated with the instance.

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.unk_token_length` `property` ¶

Returns the length of the unknown token in the CodeLlamaTokenizer.

PARAMETER	DESCRIPTION
`self`	An instance of the CodeLlamaTokenizer class. TYPE: `CodeLlamaTokenizer`

RETURNS	DESCRIPTION
`int`	The length of the unknown token. If the unknown token is not found, it returns 0.

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.vocab_size` `property` ¶

Returns vocab size

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.getstate()` ¶

Description

This method is used to retrieve the state of the CodeLlamaTokenizer object for serialization purposes. It returns a dictionary representing the current state of the object.

PARAMETER	DESCRIPTION
`self`	The instance of the CodeLlamaTokenizer class.

RETURNS	DESCRIPTION
`None`	This method does not return any value. Instead, it modifies the state dictionary and returns None.

Source code in mindnlp\transformers\models\llama\tokenization_code_llama.py

def __getstate__(self):
    """
    Method: __getstate__

    Description:
        This method is used to retrieve the state of the CodeLlamaTokenizer object for serialization purposes.
        It returns a dictionary representing the current state of the object.

    Args:
        self: The instance of the CodeLlamaTokenizer class.

    Returns:
        None: This method does not return any value. Instead, it modifies the state dictionary and returns None.

    Raises:
        None.
    """
    state = self.__dict__.copy()
    state["sp_model"] = None
    state["sp_model_proto"] = self.sp_model.serialized_model_proto()
    return state

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.init(vocab_file, unk_token='<unk>', bos_token='<s>', eos_token='</s>', prefix_token='▁<PRE>', middle_token='▁<MID>', suffix_token='▁<SUF>', eot_token='▁<EOT>', fill_token='<FILL_ME>', suffix_first=False, sp_model_kwargs=None, add_bos_token=True, add_eos_token=False, clean_up_tokenization_spaces=False, additional_special_tokens=None, use_default_system_prompt=False, **kwargs)` ¶

This method initializes an instance of the CodeLlamaTokenizer class.

PARAMETER	DESCRIPTION
`self`	The instance of the class.
`vocab_file`	The path to the vocabulary file. TYPE: `str`
`unk_token`	The unknown token, default is ''. TYPE: `str` DEFAULT: `'<unk>'`
`bos_token`	The beginning of sequence token, default is ''. TYPE: `str` DEFAULT: `'<s>'`
`eos_token`	The end of sequence token, default is ''. TYPE: `str` DEFAULT: `'</s>'`
`prefix_token`	The prefix token, default is '▁ '. TYPE: `str` DEFAULT: `'▁<PRE>'`
`middle_token`	The middle token, default is '▁'. TYPE: `str` DEFAULT: `'▁<MID>'`
`suffix_token`	The suffix token, default is '▁'. TYPE: `str` DEFAULT: `'▁<SUF>'`
`eot_token`	The end of text token, default is '▁'. TYPE: `str` DEFAULT: `'▁<EOT>'`
`fill_token`	The fill token, default is ''. TYPE: `str` DEFAULT: `'<FILL_ME>'`
`suffix_first`	Indicates whether suffix comes before prefix. TYPE: `bool` DEFAULT: `False`
`sp_model_kwargs`	Additional arguments for the sentencepiece model. TYPE: `Optional[Dict[str, Any]]` DEFAULT: `None`
`add_bos_token`	Whether to add the bos token, default is True. TYPE: `bool` DEFAULT: `True`
`add_eos_token`	Whether to add the eos token, default is False. TYPE: `bool` DEFAULT: `False`
`clean_up_tokenization_spaces`	Whether to clean up tokenization spaces, default is False. TYPE: `bool` DEFAULT: `False`
`additional_special_tokens`	Additional special tokens to include. TYPE: `list` DEFAULT: `None`
`use_default_system_prompt`	Whether to use the default system prompt. TYPE: `bool` DEFAULT: `False`

RETURNS	DESCRIPTION
	None.

RAISES	DESCRIPTION
`MissingBackendError`	If the required backend 'protobuf' is not available.

Source code in mindnlp\transformers\models\llama\tokenization_code_llama.py

def __init__(
    self,
    vocab_file,
    unk_token="<unk>",
    bos_token="<s>",
    eos_token="</s>",
    prefix_token="▁<PRE>",
    middle_token="▁<MID>",
    suffix_token="▁<SUF>",
    eot_token="▁<EOT>",
    fill_token="<FILL_ME>",
    suffix_first=False,
    sp_model_kwargs: Optional[Dict[str, Any]] = None,
    add_bos_token=True,
    add_eos_token=False,
    clean_up_tokenization_spaces=False,
    additional_special_tokens=None,
    use_default_system_prompt=False,
    **kwargs,
):
    """
    This method initializes an instance of the CodeLlamaTokenizer class.

    Args:
        self: The instance of the class.
        vocab_file (str): The path to the vocabulary file.
        unk_token (str, optional): The unknown token, default is '<unk>'.
        bos_token (str, optional): The beginning of sequence token, default is '<s>'.
        eos_token (str, optional): The end of sequence token, default is '</s>'.
        prefix_token (str, optional): The prefix token, default is '▁<PRE>'.
        middle_token (str, optional): The middle token, default is '▁<MID>'.
        suffix_token (str, optional): The suffix token, default is '▁<SUF>'.
        eot_token (str, optional): The end of text token, default is '▁<EOT>'.
        fill_token (str, optional): The fill token, default is '<FILL_ME>'.
        suffix_first (bool): Indicates whether suffix comes before prefix.
        sp_model_kwargs (Optional[Dict[str, Any]], optional): Additional arguments for the sentencepiece model.
        add_bos_token (bool, optional): Whether to add the bos token, default is True.
        add_eos_token (bool, optional): Whether to add the eos token, default is False.
        clean_up_tokenization_spaces (bool, optional): Whether to clean up tokenization spaces, default is False.
        additional_special_tokens (list, optional): Additional special tokens to include.
        use_default_system_prompt (bool, optional): Whether to use the default system prompt.

    Returns:
        None.

    Raises:
        MissingBackendError: If the required backend 'protobuf' is not available.
    """
    requires_backends(self, "protobuf")
    self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
    bos_token = AddedToken(bos_token, normalized=False, special=True) if isinstance(bos_token, str) else bos_token
    eos_token = AddedToken(eos_token, normalized=False, special=True) if isinstance(eos_token, str) else eos_token
    unk_token = AddedToken(unk_token, normalized=False, special=True) if isinstance(unk_token, str) else unk_token

    self.use_default_system_prompt = use_default_system_prompt
    # mark tokens special to skip them
    additional_special_tokens = additional_special_tokens or []
    for token in [prefix_token, middle_token, suffix_token, eot_token]:
        additional_special_tokens += [token] if token is not None else []

    self.vocab_file = vocab_file
    self.add_bos_token = add_bos_token
    self.add_eos_token = add_eos_token
    self._prefix_token = prefix_token
    self._middle_token = middle_token
    self._suffix_token = suffix_token
    self._eot_token = eot_token
    self.fill_token = fill_token
    self.suffix_first = suffix_first
    self.sp_model = self.get_spm_processor()

    super().__init__(
        bos_token=bos_token,
        eos_token=eos_token,
        unk_token=unk_token,
        add_bos_token=add_bos_token,
        add_eos_token=add_eos_token,
        prefix_token=prefix_token,
        middle_token=middle_token,
        suffix_token=suffix_token,
        eot_token=eot_token,
        fill_token=fill_token,
        sp_model_kwargs=self.sp_model_kwargs,
        suffix_first=suffix_first,
        clean_up_tokenization_spaces=clean_up_tokenization_spaces,
        additional_special_tokens=additional_special_tokens,
        use_default_system_prompt=use_default_system_prompt,
        **kwargs,
    )

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.setstate(d)` ¶

This method 'setstate' is defined within the 'CodeLlamaTokenizer' class to set the internal state of the object based on the provided dictionary 'd'. It reforwards the object's state including the SentencePiece model by loading it from a serialized proto.

PARAMETER	DESCRIPTION
`self`	The instance of the CodeLlamaTokenizer class. TYPE: `CodeLlamaTokenizer`
`d`	A dictionary containing the state information to be set. It should include the necessary attributes for the object's state reforwardion. TYPE: `dict`

RETURNS	DESCRIPTION
`None`	This method does not return any value explicitly. It operates by modifying the internal state of the object.

RAISES	DESCRIPTION
`None`	However, potential exceptions that could be raised during the execution may include but are not limited to: TypeError: If the input 'd' is not a dictionary. ValueError: If the input 'd' does not contain the required state information. Any exceptions related to the SentencePieceProcessor initialization or loading process.

Source code in mindnlp\transformers\models\llama\tokenization_code_llama.py

def __setstate__(self, d):
    """
    This method '__setstate__' is defined within the 'CodeLlamaTokenizer' class to set the internal state of the
    object based on the provided dictionary 'd'. It reforwards the object's state including the SentencePiece
    model by loading it from a serialized proto.

    Args:
        self (CodeLlamaTokenizer): The instance of the CodeLlamaTokenizer class.
        d (dict): A dictionary containing the state information to be set.
            It should include the necessary attributes for the object's state reforwardion.

    Returns:
        None: This method does not return any value explicitly.
            It operates by modifying the internal state of the object.

    Raises:
        None:
            However, potential exceptions that could be raised during the execution may include but are not limited to:

            - TypeError: If the input 'd' is not a dictionary.
            - ValueError: If the input 'd' does not contain the required state information.
            - Any exceptions related to the SentencePieceProcessor initialization or loading process.
    """
    self.__dict__ = d
    self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
    self.sp_model.LoadFromSerializedProto(self.sp_model_proto)

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.build_inputs_with_special_tokens(token_ids_0, token_ids_1=None)` ¶

Method to build inputs with special tokens in the CodeLlamaTokenizer class.

PARAMETER	DESCRIPTION
`self`	Reference to the current instance of the class.
`token_ids_0`	List of token IDs for the first input sequence. TYPE: `list`
`token_ids_1`	List of token IDs for the second input sequence. Defaults to None. TYPE: `list` DEFAULT: `None`

RETURNS	DESCRIPTION
`list`	A list representing the input sequences with special tokens added based on the configuration settings.

Source code in mindnlp\transformers\models\llama\tokenization_code_llama.py

def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
    """
    Method to build inputs with special tokens in the CodeLlamaTokenizer class.

    Args:
        self: Reference to the current instance of the class.
        token_ids_0 (list): List of token IDs for the first input sequence.
        token_ids_1 (list, optional): List of token IDs for the second input sequence. Defaults to None.

    Returns:
        list: A list representing the input sequences with special tokens added based on the configuration settings.

    Raises:
        None.
    """
    bos_token_id = [self.bos_token_id] if self.add_bos_token else []
    eos_token_id = [self.eos_token_id] if self.add_eos_token else []

    output = bos_token_id + token_ids_0 + eos_token_id

    if token_ids_1 is not None:
        output = output + bos_token_id + token_ids_1 + eos_token_id

    return output

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.convert_tokens_to_string(tokens)` ¶

Converts a sequence of tokens (string) in a single string.

Source code in mindnlp\transformers\models\llama\tokenization_code_llama.py

def convert_tokens_to_string(self, tokens):
    """Converts a sequence of tokens (string) in a single string."""
    # since we manually add the prefix space, we have to remove it when decoding
    if tokens[0].startswith(SPIECE_UNDERLINE):
        tokens[0] = tokens[0][1:]

    current_sub_tokens = []
    out_string = ""
    for _, token in enumerate(tokens):
        # make sure that special tokens are not decoded using sentencepiece model
        if token in self.all_special_tokens:
            out_string += self.sp_model.decode(current_sub_tokens) + token
            current_sub_tokens = []
        else:
            current_sub_tokens.append(token)
    out_string += self.sp_model.decode(current_sub_tokens)
    return out_string

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.create_token_type_ids_from_sequences(token_ids_0, token_ids_1=None)` ¶

Creates a mask from the two sequences passed to be used in a sequence-pair classification task. An ALBERT sequence pair mask has the following format:

0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
| first sequence    | second sequence |

if token_ids_1 is None, only returns the first portion of the mask (0s).

PARAMETER	DESCRIPTION
`token_ids_0`	List of ids. TYPE: `List[int]`
`token_ids_1`	Optional second list of IDs for sequence pairs. TYPE: `List[int]`, optional DEFAULT: `None`

RETURNS	DESCRIPTION
`List[int]`	`List[int]`: List of token type IDs according to the given sequence(s).

Source code in mindnlp\transformers\models\llama\tokenization_code_llama.py

def create_token_type_ids_from_sequences(
    self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
) -> List[int]:
    """
    Creates a mask from the two sequences passed to be used in a sequence-pair classification task. An ALBERT
    sequence pair mask has the following format:
    ```
    0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
    | first sequence    | second sequence |
    ```

    if token_ids_1 is None, only returns the first portion of the mask (0s).

    Args:
        token_ids_0 (`List[int]`):
            List of ids.
        token_ids_1 (`List[int]`, *optional*):
            Optional second list of IDs for sequence pairs.

    Returns:
        `List[int]`: List of [token type IDs](../glossary#token-type-ids) according to the given sequence(s).
    """
    bos_token_id = [self.bos_token_id] if self.add_bos_token else []
    eos_token_id = [self.eos_token_id] if self.add_eos_token else []

    output = [0] * len(bos_token_id + token_ids_0 + eos_token_id)

    if token_ids_1 is not None:
        output += [1] * len(bos_token_id + token_ids_1 + eos_token_id)

    return output

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.get_special_tokens_mask(token_ids_0, token_ids_1=None, already_has_special_tokens=False)` ¶

Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding special tokens using the tokenizer prepare_for_model method.

PARAMETER	DESCRIPTION
`token_ids_0`	List of IDs. TYPE: `List[int]`
`token_ids_1`	Optional second list of IDs for sequence pairs. TYPE: `List[int]`, optional DEFAULT: `None`
`already_has_special_tokens`	Whether or not the token list is already formatted with special tokens for the model. TYPE: `bool`, optional, defaults to `False` DEFAULT: `False`

RETURNS	DESCRIPTION
`List[int]`	`List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.

Source code in mindnlp\transformers\models\llama\tokenization_code_llama.py

def get_special_tokens_mask(
    self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
) -> List[int]:
    """
    Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
    special tokens using the tokenizer `prepare_for_model` method.

    Args:
        token_ids_0 (`List[int]`):
            List of IDs.
        token_ids_1 (`List[int]`, *optional*):
            Optional second list of IDs for sequence pairs.
        already_has_special_tokens (`bool`, *optional*, defaults to `False`):
            Whether or not the token list is already formatted with special tokens for the model.

    Returns:
        `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
    """
    if already_has_special_tokens:
        return super().get_special_tokens_mask(
            token_ids_0=token_ids_0, token_ids_1=token_ids_1, already_has_special_tokens=True
        )

    bos_token_id = [1] if self.add_bos_token else []
    eos_token_id = [1] if self.add_eos_token else []

    if token_ids_1 is None:
        return bos_token_id + ([0] * len(token_ids_0)) + eos_token_id
    return (
        bos_token_id
        + ([0] * len(token_ids_0))
        + eos_token_id
        + bos_token_id
        + ([0] * len(token_ids_1))
        + eos_token_id
    )

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.get_spm_processor()` ¶

This method initializes and returns a SentencePieceProcessor object for tokenizing text using the SentencePiece library.

PARAMETER	DESCRIPTION
`self`	The instance of the CodeLlamaTokenizer class.

RETURNS	DESCRIPTION
	spm.SentencePieceProcessor: A tokenizer object of type spm.SentencePieceProcessor.

RAISES	DESCRIPTION
`None`	However, potential exceptions that may occur during the method execution include: FileNotFoundError: If the specified vocab_file cannot be found. IOError: If there are issues with reading the vocab_file. ValueError: If the provided sp_model_kwargs are invalid or missing required information. Any other relevant exceptions that may occur during the loading and initialization of the tokenizer.

Source code in mindnlp\transformers\models\llama\tokenization_code_llama.py

def get_spm_processor(self):
    """
    This method initializes and returns a SentencePieceProcessor object for tokenizing text using
    the SentencePiece library.

    Args:
        self: The instance of the CodeLlamaTokenizer class.

    Returns:
        spm.SentencePieceProcessor: A tokenizer object of type spm.SentencePieceProcessor.

    Raises:
        None:
            However, potential exceptions that may occur during the method execution include:

            - FileNotFoundError: If the specified vocab_file cannot be found.
            - IOError: If there are issues with reading the vocab_file.
            - ValueError: If the provided sp_model_kwargs are invalid or missing required information.
            - Any other relevant exceptions that may occur during the loading and initialization of the tokenizer.
    """
    tokenizer = spm.SentencePieceProcessor(**self.sp_model_kwargs)
    with open(self.vocab_file, "rb") as f:
        sp_model = f.read()
        model_pb2 = import_protobuf()
        model = model_pb2.ModelProto.FromString(sp_model)
        normalizer_spec = model_pb2.NormalizerSpec()
        normalizer_spec.add_dummy_prefix = False
        model.normalizer_spec.MergeFrom(normalizer_spec)
        sp_model = model.SerializeToString()
        tokenizer.LoadFromSerializedProto(sp_model)
    return tokenizer

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.get_vocab()` ¶

Returns vocab as a dict

Source code in mindnlp\transformers\models\llama\tokenization_code_llama.py

def get_vocab(self):
    """Returns vocab as a dict"""
    vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)}
    vocab.update(self.added_tokens_encoder)
    return vocab

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.save_vocabulary(save_directory, filename_prefix=None)` ¶

Save the vocabulary and special tokens file to a directory.

PARAMETER	DESCRIPTION
`save_directory`	The directory in which to save the vocabulary. TYPE: `str`

RETURNS	DESCRIPTION
`Tuple[str]`	`Tuple(str)`: Paths to the files saved.

Source code in mindnlp\transformers\models\llama\tokenization_code_llama.py

def save_vocabulary(self, save_directory, filename_prefix: Optional[str] = None) -> Tuple[str]:
    """
    Save the vocabulary and special tokens file to a directory.

    Args:
        save_directory (`str`):
            The directory in which to save the vocabulary.

    Returns:
        `Tuple(str)`: Paths to the files saved.
    """
    if not os.path.isdir(save_directory):
        logger.error(f"Vocabulary path ({save_directory}) should be a directory")
        return
    out_vocab_file = os.path.join(
        save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
    )

    if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file) and os.path.isfile(self.vocab_file):
        copyfile(self.vocab_file, out_vocab_file)
    elif not os.path.isfile(self.vocab_file):
        with open(out_vocab_file, "wb") as fi:
            content_spiece_model = self.sp_model.serialized_model_proto()
            fi.write(content_spiece_model)

    return (out_vocab_file,)

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.tokenize(prefix, suffix=None, suffix_first=False, **kwargs)` ¶

Tokenizes the given prefix and suffix to generate a list of integers representing tokens.

PARAMETER	DESCRIPTION
`self`	An instance of the CodeLlamaTokenizer class. TYPE: `CodeLlamaTokenizer`
`prefix`	The prefix string to tokenize. TYPE: `str`
`suffix`	The suffix string to tokenize. Defaults to None. TYPE: `str` DEFAULT: `None`
`suffix_first`	Flag indicating whether to place the suffix before the prefix. Defaults to False. TYPE: `bool` DEFAULT: `False`

RETURNS	DESCRIPTION
`List[int]`	List[int]: A list of integers representing the tokens generated from the prefix and suffix.

RAISES	DESCRIPTION
`ValueError`	If the input includes a prefix and a suffix used for the infilling task, or if the model does not support infilling.

Note

The prefix and suffix parameters are used to split the input on the fill_token token to create a suffix and prefix.
If only a prefix is provided, the method tokenizes the prefix and returns the resulting tokens.
If a prefix and suffix are provided, the method tokenizes both and returns the tokens in the specified order.
The suffix_first parameter takes precedence over the class attribute suffix_first if both are provided.
The method removes special tokens from the beginning of the tokens list if they match the specified conditions.
The method replaces occurrences of the SPIECE_UNDERLINE token in the prefix with a space.

Source code in mindnlp\transformers\models\llama\tokenization_code_llama.py

def tokenize(self, prefix, suffix=None, suffix_first=False, **kwargs) -> List[int]:
    """
    Tokenizes the given prefix and suffix to generate a list of integers representing tokens.

    Args:
        self (CodeLlamaTokenizer): An instance of the CodeLlamaTokenizer class.
        prefix (str): The prefix string to tokenize.
        suffix (str, optional): The suffix string to tokenize. Defaults to None.
        suffix_first (bool, optional): Flag indicating whether to place the suffix before the prefix.
            Defaults to False.

    Returns:
        List[int]: A list of integers representing the tokens generated from the prefix and suffix.

    Raises:
        ValueError: If the input includes a prefix and a suffix used for the infilling task,
            or if the model does not support infilling.

    Note:
        - The `prefix` and `suffix` parameters are used to split the input on the `fill_token` token to
        create a suffix and prefix.
        - If only a prefix is provided, the method tokenizes the prefix and returns the resulting tokens.
        - If a prefix and suffix are provided, the method tokenizes both and returns the tokens in the
        specified order.
        - The `suffix_first` parameter takes precedence over the class attribute `suffix_first`
        if both are provided.
        - The method removes special tokens from the beginning of the tokens list if they match the
        specified conditions.
        - The method replaces occurrences of the `SPIECE_UNDERLINE` token in the prefix with a space.
    """
    # add a prefix space to `prefix`
    if self.fill_token is not None and self.fill_token in prefix and suffix is None:
        prefix, suffix = prefix.split(self.fill_token)

    if len(prefix) > 0:
        prefix = SPIECE_UNDERLINE + prefix.replace(SPIECE_UNDERLINE, " ")

    if suffix is None or len(suffix) < 1:
        tokens = super().tokenize(prefix, **kwargs)
        if len(tokens) > 1 and tokens[0] == SPIECE_UNDERLINE and tokens[1] in self.all_special_tokens:
            tokens = tokens[1:]
        return tokens

    prefix_tokens = self._tokenize(prefix)  # prefix has an extra `SPIECE_UNDERLINE`

    if None in (self.prefix_id, self.middle_id, self.suffix_id):
        raise ValueError(
            "The input either includes a `prefix` and a `suffix` used for the infilling task,"
            f"  or can be split on the {self.fill_token} token, creating a suffix and prefix,"
            " but the model does not support `infilling`."
        )
    suffix_tokens = self._tokenize(suffix)  # make sure CodeLlama sp model does not mess up

    suffix_first = suffix_first if suffix_first is not None else self.suffix_first
    if suffix_first:
        # format as " <PRE> <SUF>{suf} <MID> {pre}"
        return [self.prefix_token, self.suffix_token] + suffix_tokens + [self.middle_token] + prefix_tokens
    # format as " <PRE> {pre} <SUF>{suf} <MID>"
    return [self.prefix_token] + prefix_tokens + [self.suffix_token] + suffix_tokens + [self.middle_token]

`mindnlp.transformers.models.llama.tokenization_code_llama_fast` ¶

Fast Tokenization classes for Code LLaMA.

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast` ¶

Bases: PreTrainedTokenizerFast

Construct a Llama tokenizer. Based on byte-level Byte-Pair-Encoding. This uses notably ByteFallback and no normalization.

Example

>>> from transformers import CodeLlamaTokenizerFast
...
>>> tokenizer = CodeLlamaTokenizerFast.from_pretrained("hf-internal-testing/llama-tokenizer")
>>> tokenizer.encode("Hello this is a test")
[1, 15043, 445, 338, 263, 1243]

If you want to change the bos_token or the eos_token, make sure to specify them when initializing the model, or call tokenizer.update_post_processor() to make sure that the post-processing is correctly done (otherwise the values of the first token and final token of an encoded sequence will not be correct). For more details, checkout [post-processors] (https://hf-mirror.com/docs/tokenizers/api/post-processors) documentation.

This tokenizer inherits from [PreTrainedTokenizerFast] which contains most of the main methods. Users should refer to this superclass for more information regarding those methods. The default configuration match that of codellama/CodeLlama-7b-Instruct-hf which supports prompt infilling.

PARAMETER	DESCRIPTION
`vocab_file`	SentencePiece file (generally has a .model extension) that contains the vocabulary necessary to instantiate a tokenizer. TYPE: `str`, optional DEFAULT: `None`
`tokenizer_file`	tokenizers file (generally has a .json extension) that contains everything needed to load the tokenizer. TYPE: `str`, optional DEFAULT: `None`
`clean_up_tokenization_spaces`	Wether to cleanup spaces after decoding, cleanup consists in removing potential artifacts like extra spaces. TYPE: `str`, optional, defaults to `False` DEFAULT: `False`
`unk_token`	The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this token instead. TYPE: `str`, optional, defaults to `"<unk>"` DEFAULT: `'<unk>'`
`bos_token`	The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token. TYPE: `str`, optional, defaults to `"<s>"` DEFAULT: `'<s>'`
`eos_token`	The end of sequence token. TYPE: `str`, optional, defaults to `"</s>"` DEFAULT: `'</s>'`
`prefix_token`	Prefix token used for infilling. TYPE: `str`, optional, defaults to `"▁<PRE>"` DEFAULT: `'▁<PRE>'`
`middle_token`	Middle token used for infilling. TYPE: `str`, optional, defaults to `"▁<MID>"` DEFAULT: `'▁<MID>'`
`suffix_token`	Suffix token used for infilling. TYPE: `str`, optional, defaults to `"▁<SUF>"` DEFAULT: `'▁<SUF>'`
`eot_token`	End of text token used for infilling. TYPE: `str`, optional, defaults to `"▁<EOT>"` DEFAULT: `'▁<EOT>'`
`fill_token`	The token used to split the input between the prefix and suffix. TYPE: `str`, optional, defaults to `"<FILL_ME>"` DEFAULT: `'<FILL_ME>'`
`additional_special_tokens`	Additional special tokens used by the tokenizer. TYPE: `List[str]`, optional DEFAULT: `None`
`add_bos_token`	Whether to add a beginning of sequence token at the start of sequences. TYPE: `bool`, optional, defaults to `True` DEFAULT: `True`
`add_eos_token`	Whether to add an end of sequence token at the end of sequences. TYPE: `bool`, optional, defaults to `False` DEFAULT: `False`
`use_default_system_prompt`	Whether or not the default system prompt for Llama should be used. TYPE: `bool`, optional, defaults to `False` DEFAULT: `False`

Source code in mindnlp\transformers\models\llama\tokenization_code_llama_fast.py

class CodeLlamaTokenizerFast(PreTrainedTokenizerFast):
    """
    Construct a Llama tokenizer. Based on byte-level Byte-Pair-Encoding.
    This uses notably ByteFallback and no normalization.

    Example:
        ```python
        >>> from transformers import CodeLlamaTokenizerFast
        ...
        >>> tokenizer = CodeLlamaTokenizerFast.from_pretrained("hf-internal-testing/llama-tokenizer")
        >>> tokenizer.encode("Hello this is a test")
        [1, 15043, 445, 338, 263, 1243]
        ```

    If you want to change the `bos_token` or the `eos_token`, make sure to specify them when initializing the model, or
    call `tokenizer.update_post_processor()` to make sure that the post-processing is correctly done (otherwise the
    values of the first token and final token of an encoded sequence will not be correct). For more details, checkout
    [post-processors] (https://hf-mirror.com/docs/tokenizers/api/post-processors) documentation.


    This tokenizer inherits from [`PreTrainedTokenizerFast`] which contains most of the main methods. Users should
    refer to this superclass for more information regarding those methods. The default configuration match that of
    [codellama/CodeLlama-7b-Instruct-hf](https://hf-mirror.com/codellama/CodeLlama-7b-Instruct-hf/blob/main/tokenizer_config.json)
    which supports prompt infilling.

    Args:
        vocab_file (`str`, *optional*):
            [SentencePiece](https://github.com/google/sentencepiece) file (generally has a .model extension) that
            contains the vocabulary necessary to instantiate a tokenizer.
        tokenizer_file (`str`, *optional*):
            [tokenizers](https://github.com/huggingface/tokenizers) file (generally has a .json extension) that
            contains everything needed to load the tokenizer.
        clean_up_tokenization_spaces (`str`, *optional*, defaults to `False`):
            Wether to cleanup spaces after decoding, cleanup consists in removing potential artifacts like extra
            spaces.
        unk_token (`str`, *optional*, defaults to `"<unk>"`):
            The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
            token instead.
        bos_token (`str`, *optional*, defaults to `"<s>"`):
            The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token.
        eos_token (`str`, *optional*, defaults to `"</s>"`):
            The end of sequence token.
        prefix_token (`str`, *optional*, defaults to `"▁<PRE>"`):
            Prefix token used for infilling.
        middle_token (`str`, *optional*, defaults to `"▁<MID>"`):
            Middle token used for infilling.
        suffix_token (`str`, *optional*, defaults to `"▁<SUF>"`):
            Suffix token used for infilling.
        eot_token (`str`, *optional*, defaults to `"▁<EOT>"`):
            End of text token used for infilling.
        fill_token (`str`, *optional*, defaults to `"<FILL_ME>"`):
            The token used to split the input between the prefix and suffix.
        additional_special_tokens (`List[str]`, *optional*):
            Additional special tokens used by the tokenizer.
        add_bos_token (`bool`, *optional*, defaults to `True`):
            Whether to add a beginning of sequence token at the start of sequences.
        add_eos_token (`bool`, *optional*, defaults to `False`):
            Whether to add an end of sequence token at the end of sequences.
        use_default_system_prompt (`bool`, *optional*, defaults to `False`):
            Whether or not the default system prompt for Llama should be used.
    """
    vocab_files_names = VOCAB_FILES_NAMES
    slow_tokenizer_class = CodeLlamaTokenizer
    padding_side = "left"
    model_input_names = ["input_ids", "attention_mask"]

    def __init__(
        self,
        vocab_file=None,
        tokenizer_file=None,
        clean_up_tokenization_spaces=False,
        unk_token="<unk>",
        bos_token="<s>",
        eos_token="</s>",
        prefix_token="▁<PRE>",
        middle_token="▁<MID>",
        suffix_token="▁<SUF>",
        eot_token="▁<EOT>",
        fill_token="<FILL_ME>",
        additional_special_tokens=None,
        add_bos_token=True,
        add_eos_token=False,
        use_default_system_prompt=False,
        **kwargs,
    ):
        """
        Initializes an instance of the CodeLlamaTokenizerFast class.

        Args:
            self: The instance of the class.
            vocab_file (str, optional): Path to the vocabulary file. Defaults to None.
            tokenizer_file (str, optional): Path to the tokenizer file. Defaults to None.
            clean_up_tokenization_spaces (bool, optional): Whether to clean up tokenization spaces. Defaults to False.
            unk_token (str, optional): Unknown token. Defaults to '<unk>'.
            bos_token (str, optional): Beginning of sentence token. Defaults to '<s>'.
            eos_token (str, optional): End of sentence token. Defaults to '</s>'.
            prefix_token (str, optional): Prefix token. Defaults to '▁<PRE>'.
            middle_token (str, optional): Middle token. Defaults to '▁<MID>'.
            suffix_token (str, optional): Suffix token. Defaults to '▁<SUF>'.
            eot_token (str, optional): End of text token. Defaults to '▁<EOT>'.
            fill_token (str, optional): Fill token. Defaults to '<FILL_ME>'.
            additional_special_tokens (List[str], optional): Additional special tokens. Defaults to None.
            add_bos_token (bool, optional): Whether to add the beginning of sentence token. Defaults to True.
            add_eos_token (bool, optional): Whether to add the end of sentence token. Defaults to False.
            use_default_system_prompt (bool, optional): Whether to use the default system prompt. Defaults to False.
            **kwargs: Additional keyword arguments.

        Returns:
            None

        Raises:
            None
        """
        # mark tokens special to skip them
        additional_special_tokens = additional_special_tokens or []
        for token in [prefix_token, middle_token, suffix_token, eot_token]:
            additional_special_tokens += [token] if token is not None else []
        self.use_default_system_prompt = use_default_system_prompt

        super().__init__(
            vocab_file=vocab_file,
            tokenizer_file=tokenizer_file,
            clean_up_tokenization_spaces=clean_up_tokenization_spaces,
            additional_special_tokens=additional_special_tokens,
            unk_token=unk_token,
            bos_token=bos_token,
            eos_token=eos_token,
            add_bos_token=add_bos_token,
            add_eos_token=add_eos_token,
            prefix_token=prefix_token,
            middle_token=middle_token,
            suffix_token=suffix_token,
            eot_token=eot_token,
            fill_token=fill_token,
            use_default_system_prompt=use_default_system_prompt,
            **kwargs,
        )
        self._add_bos_token = add_bos_token
        self._add_eos_token = add_eos_token
        self.update_post_processor()

        self.vocab_file = vocab_file

        self._prefix_token = prefix_token
        self._middle_token = middle_token
        self._suffix_token = suffix_token
        self._eot_token = eot_token
        self.fill_token = fill_token

    @property
    def can_save_slow_tokenizer(self) -> bool:
        """
        Checks if the slow tokenizer can be saved.

        Args:
            self (CodeLlamaTokenizerFast): An instance of the CodeLlamaTokenizerFast class.

        Returns:
            bool: True if the slow tokenizer can be saved, False otherwise.

        Raises:
            None.

        This method checks if the slow tokenizer can be saved by verifying if the vocab_file attribute exists.
        If the vocab_file attribute is not None and it corresponds to an existing file, the method returns True.
        Otherwise, it returns False.
        """
        return os.path.isfile(self.vocab_file) if self.vocab_file else False

    # Copied from transformers.models.llama.tokenization_llama_fast.LlamaTokenizerFast.update_post_processor
    def update_post_processor(self):
        """
        Updates the underlying post processor with the current `bos_token` and `eos_token`.
        """
        bos = self.bos_token
        bos_token_id = self.bos_token_id
        if bos is None and self.add_bos_token:
            raise ValueError("add_bos_token = True but bos_token = None")

        eos = self.eos_token
        eos_token_id = self.eos_token_id
        if eos is None and self.add_eos_token:
            raise ValueError("add_eos_token = True but eos_token = None")

        single = f"{(bos+':0 ') if self.add_bos_token else ''}$A:0{(' '+eos+':0') if self.add_eos_token else ''}"
        pair = f"{single}{(' '+bos+':1') if self.add_bos_token else ''} $B:1{(' '+eos+':1') if self.add_eos_token else ''}"

        special_tokens = []
        if self.add_bos_token:
            special_tokens.append((bos, bos_token_id))
        if self.add_eos_token:
            special_tokens.append((eos, eos_token_id))
        self._tokenizer.post_processor = processors.TemplateProcessing(
            single=single, pair=pair, special_tokens=special_tokens
        )

    @property
    def prefix_token(self):
        '''
        Returns the prefix token for the CodeLlamaTokenizerFast class.

        Args:
            self (CodeLlamaTokenizerFast): The instance of the CodeLlamaTokenizerFast class.

        Returns:
            None.

        Raises:
            None.
        '''
        return self._prefix_token

    @property
    def prefix_id(self):
        """
        Returns the prefix token converted to its corresponding ID.

        Args:
            self (CodeLlamaTokenizerFast): An instance of the CodeLlamaTokenizerFast class.

        Returns:
            None: If the prefix token is None.

        Raises:
            None.

        """
        if self._prefix_token is None:
            return None
        return self.convert_tokens_to_ids(self.prefix_token)

    @property
    def middle_token(self):
        """
        This method 'middle_token' is a property method in the class 'CodeLlamaTokenizerFast' that returns the middle token.

        Args:
            self: The instance of the class.

        Returns:
            None: This method returns the middle token or None if there is no middle token.

        Raises:
            None.
        """
        return self._middle_token

    @property
    def middle_id(self):
        """
        Returns the middle token ID of the CodeLlamaTokenizerFast instance.

        Args:
            self (CodeLlamaTokenizerFast): The instance of the CodeLlamaTokenizerFast class.

        Returns:
            None: If the middle token is not set or is set to None.
            int: The ID of the middle token.

        Raises:
            None.

        This method retrieves the ID of the middle token in the CodeLlamaTokenizerFast instance.
        If the middle token is not set or is set to None, None is returned. Otherwise, the method calls the
        'convert_tokens_to_ids' function to convert the middle token into its corresponding ID and returns the ID value.
        """
        if self._middle_token is None:
            return None
        return self.convert_tokens_to_ids(self.middle_token)

    @property
    def suffix_token(self):
        """
        This method, 'suffix_token', is a property method defined in the 'CodeLlamaTokenizerFast' class.

        Args:
            self: An instance of the 'CodeLlamaTokenizerFast' class.
                It is used to access the attributes and methods of the class within this method.

        Returns:
            None.

        Raises:
            None.

        """
        return self._suffix_token

    @property
    def suffix_id(self):
        """
        This method is defined in the `CodeLlamaTokenizerFast` class and is named `suffix_id`.
        It takes one parameter, `self`, which refers to the instance of the class.

        Args:
            self: An instance of the `CodeLlamaTokenizerFast` class.

        Returns:
            None: If the `_suffix_token` attribute is `None`, the method returns `None`.

        Raises:
            None.

        Description:
            This method retrieves the suffix ID associated with the `_suffix_token` attribute.
            If the `_suffix_token` is `None`, indicating the absence of a suffix token, the method returns `None`.
            Otherwise, it calls the `convert_tokens_to_ids` method to convert the `_suffix_token` to its
            corresponding ID and returns the result.

        Note:
            - The `_suffix_token` attribute should be set before calling this method to ensure accurate results.
            - The return value is of type `None`.
        """
        if self._suffix_token is None:
            return None
        return self.convert_tokens_to_ids(self.suffix_token)

    @property
    def eot_id(self):
        """
        Returns the ID representation of the end-of-text (EOT) token in the CodeLlamaTokenizerFast class.

        Args:
            self: An instance of the CodeLlamaTokenizerFast class.

        Returns:
            None: If the EOT token is not set.
            int: The ID representation of the EOT token.

        Raises:
            None.

        This method retrieves the ID representation of the EOT token.
        If the EOT token is not set (None), it returns None.
        Otherwise, it uses the 'convert_tokens_to_ids' method to convert the EOT token to its corresponding ID
        representation and returns it.
        """
        if self._eot_token is None:
            return None
        return self.convert_tokens_to_ids(self.eot_token)

    @property
    def eot_token(self):
        """
        eot_token method in the CodeLlamaTokenizerFast class.

        Args:
            self: The instance of the CodeLlamaTokenizerFast class.

        Returns:
            The value of the _eot_token attribute.

        Raises:
            None.
        """
        return self._eot_token

    @property
    def add_eos_token(self):
        """
        Adds an end-of-sequence (EOS) token to the tokenizer.

        Args:
            self: The instance of the CodeLlamaTokenizerFast class.

        Returns:
            None.

        Raises:
            None.
        """
        return self._add_eos_token

    @property
    def add_bos_token(self):
        """
        Method to add a beginning of sentence (BOS) token to the tokenizer.

        Args:
            self: An instance of the CodeLlamaTokenizerFast class.
                It is used to access the tokenizer object.

        Returns:
            None.

        Raises:
            None
        """
        return self._add_bos_token

    @add_eos_token.setter
    def add_eos_token(self, value):
        """
        This method 'add_eos_token' is a setter method for the 'add_eos_token' property in the
        'CodeLlamaTokenizerFast' class.

        Args:
            self (CodeLlamaTokenizerFast): The instance of the CodeLlamaTokenizerFast class.
            value (bool): A boolean value indicating whether to add an end-of-sequence token.

        Returns:
            None.

        Raises:
            None.
        """
        self._add_eos_token = value
        self.update_post_processor()

    @add_bos_token.setter
    def add_bos_token(self, value):
        """
        Sets the value of the 'add_bos_token' attribute in the CodeLlamaTokenizerFast class.

        Args:
            self (CodeLlamaTokenizerFast): An instance of the CodeLlamaTokenizerFast class.
            value: The value to be assigned to the 'add_bos_token' attribute. It can be of any type.

        Returns:
            None.

        Raises:
            None.

        This method updates the 'add_bos_token' attribute with the provided value and triggers the
        'update_post_processor' method.
        """
        self._add_bos_token = value
        self.update_post_processor()

    def set_infilling_processor(self, reset, suffix_first=False, add_special_tokens=True):
        r"""
        Updates the normalizer to make sure the prompt format for `infilling` is respected. The infilling format is the
        following:

        - if suffix_first

            - `" <PRE> <SUF>{suf} <MID> {pre}"`

        - else:

            - `" <PRE> {pre} <SUF>{suf} <MID>"`

        If `reset` is set to `True`, the `normalizer` and `post_processor` are reset to their "normal" behaviour, which
        is to add a prefix space for the normalizer, and add a `bos_token` to the input text for the `post_processor`.
        """
        if reset:
            self._tokenizer.normalizer = normalizers.Sequence(
                [
                    normalizers.Prepend(prepend="▁"),
                    normalizers.Replace(pattern=" ", content="▁"),
                ]
            )
            self.update_post_processor()
            return

        self._tokenizer.normalizer = normalizers.Replace(pattern=" ", content="▁")
        pair = [self.bos_token] if self.add_bos_token and add_special_tokens else []
        special_tokens = [(self.bos_token, self.bos_token_id)] if self.add_bos_token and add_special_tokens else []
        if suffix_first:
            # format as " <PRE> <SUF>{suf} <MID> {pre}"
            pair += [self.prefix_token, self.suffix_token, "$B", self.middle_token, "$A"]
            special_tokens += [
                (self.prefix_token, self.prefix_id),
                (self.suffix_token, self.suffix_id),
                (self.middle_token, self.middle_id),
            ]
        else:
            # format as " <PRE> {pre} <SUF>{suf} <MID>"
            pair += [self.prefix_token, "$A", self.suffix_token, "$B", self.middle_token]
            special_tokens += [
                (self.prefix_token, self.prefix_id),
                (self.suffix_token, self.suffix_id),
                (self.middle_token, self.middle_id),
            ]

        if self.add_eos_token and add_special_tokens:
            pair += [self.eos_token]
            special_tokens += [(self.eos_token, self.eos_token_id)]
        self._tokenizer.post_processor = processors.TemplateProcessing(
            single="$A", pair=pair, special_tokens=special_tokens
        )

    def encode_plus(self, text, text_pair=None, suffix_first=False, add_special_tokens=True, **kwargs):
        """
        Encodes the given text and text pair into tokens using the CodeLlamaTokenizerFast class.

        Args:
            self (CodeLlamaTokenizerFast): An instance of the CodeLlamaTokenizerFast class.
            text (str): The input text to be encoded.
            text_pair (str, optional): The optional second input text to be encoded. Defaults to None.
            suffix_first (bool, optional): Specifies whether the suffix should be placed first. Defaults to False.
            add_special_tokens (bool, optional): Specifies whether to add special tokens. Defaults to True.

        Returns:
            tokens: The encoded tokens. This is an instance of a class defined in the CodeLlamaTokenizerFast class.

        Raises:
            ValueError: If the input includes a `prefix` and a `suffix` used for the infilling task,
                the `prefix_id, middle_id, suffix_id` must all be initialized.
                Current values: (self.prefix_id, self.middle_id, self.suffix_id)
        """
        # hack to make sure the input is pre-process but outside rust
        text_pair = kwargs.pop("suffix", text_pair)
        if self.fill_token is not None and self.fill_token in text and text_pair is None:
            text, text_pair = text.split(self.fill_token)

        if text_pair is None or len(text_pair) < 1:
            return super().encode_plus(text, text_pair, add_special_tokens=add_special_tokens, **kwargs)

        if None in (self.prefix_id, self.middle_id, self.suffix_id):
            raise ValueError(
                "Then input includes a `prefix` and a `suffix` used for the infilling task,"
                " the `prefix_id, middle_id, suffix_id` must all be initialized. Current"
                f" values : {self.prefix_id, self.middle_id, self.suffix_id}"
            )

        self.set_infilling_processor(False, suffix_first=suffix_first, add_special_tokens=add_special_tokens)
        tokens = super().encode_plus(" " + text, text_pair=text_pair, add_special_tokens=True, **kwargs)
        self.set_infilling_processor(True)
        return tokens

    # Copied from transformers.models.llama.tokenization_llama_fast.LlamaTokenizerFast.save_vocabulary
    def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
        """
        Save the vocabulary for a fast tokenizer.

        Args:
            self (CodeLlamaTokenizerFast): An instance of the CodeLlamaTokenizerFast class.
            save_directory (str): The directory path where the vocabulary will be saved.
            filename_prefix (Optional[str], optional): A prefix to be added to the filename. Defaults to None.

        Returns:
            Tuple[str]: A tuple containing the path to the saved vocabulary file.

        Raises:
            ValueError: If the fast tokenizer does not have the necessary information to save the vocabulary
                for a slow tokenizer.
            FileNotFoundError: If the save_directory does not exist.
            IsADirectoryError: If the save_directory is not a directory.

        """
        if not self.can_save_slow_tokenizer:
            raise ValueError(
                "Your fast tokenizer does not have the necessary information to save the vocabulary for a slow "
                "tokenizer."
            )

        if not os.path.isdir(save_directory):
            logger.error(f"Vocabulary path ({save_directory}) should be a directory")
            return
        out_vocab_file = os.path.join(
            save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
        )

        if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file):
            copyfile(self.vocab_file, out_vocab_file)

        return (out_vocab_file,)

    @property
    # Copied from transformers.models.llama.tokenization_llama.LlamaTokenizer.default_chat_template
    def default_chat_template(self):
        """
        LLaMA uses [INST] and [/INST] to indicate user messages, and <<SYS>> and <</SYS>> to indicate system messages.
        Assistant messages do not have special tokens, because LLaMA chat models are generally trained with strict
        user/assistant/user/assistant message ordering, and so assistant messages can be identified from the ordering
        rather than needing special tokens. The system message is partly 'embedded' in the first user message, which
        results in an unusual token ordering when it is present. This template should definitely be changed if you wish
        to fine-tune a model with more flexible role ordering!

        The output should look something like:

        <bos>[INST] B_SYS SystemPrompt E_SYS Prompt [/INST] Answer <eos><bos>[INST] Prompt [/INST] Answer <eos>
        <bos>[INST] Prompt [/INST]

        The reference for this chat template is [this code
        snippet](https://github.com/facebookresearch/llama/blob/556949fdfb72da27c2f4a40b7f0e4cf0b8153a28/llama/generation.py#L320-L362)
        in the original repository.
        """
        logger.warning_once(
            "\nNo chat template is defined for this tokenizer - using the default template "
            f"for the {self.__class__.__name__} class. If the default is not appropriate for "
            "your model, please set `tokenizer.chat_template` to an appropriate template. "
            "See https://hf-mirror.com/docs/transformers/main/chat_templating for more information.\n"
        )
        template = (
            "{% if messages[0]['role'] == 'system' %}"
            "{% set loop_messages = messages[1:] %}"  # Extract system message if it's present
            "{% set system_message = messages[0]['content'] %}"
            "{% elif USE_DEFAULT_PROMPT == true and not '<<SYS>>' in messages[0]['content'] %}"
            "{% set loop_messages = messages %}"  # Or use the default system message if the flag is set
            "{% set system_message = 'DEFAULT_SYSTEM_MESSAGE' %}"
            "{% else %}"
            "{% set loop_messages = messages %}"
            "{% set system_message = false %}"
            "{% endif %}"
            "{% for message in loop_messages %}"  # Loop over all non-system messages
            "{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}"
            "{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}"
            "{% endif %}"
            "{% if loop.index0 == 0 and system_message != false %}"  # Embed system message in first message
            "{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}"
            "{% else %}"
            "{% set content = message['content'] %}"
            "{% endif %}"
            "{% if message['role'] == 'user' %}"  # After all of that, handle messages/roles in a fairly normal way
            "{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}"
            "{% elif message['role'] == 'system' %}"
            "{{ '<<SYS>>\\n' + content.strip() + '\\n<</SYS>>\\n\\n' }}"
            "{% elif message['role'] == 'assistant' %}"
            "{{ ' '  + content.strip() + ' ' + eos_token }}"
            "{% endif %}"
            "{% endfor %}"
        )
        template = template.replace("USE_DEFAULT_PROMPT", "true" if self.use_default_system_prompt else "false")
        default_message = DEFAULT_SYSTEM_PROMPT.replace("\n", "\\n").replace("'", "\\'")
        template = template.replace("DEFAULT_SYSTEM_MESSAGE", default_message)

        return template

    def build_inputs_with_special_tokens(
        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
    ) -> List[int]:
        """
        Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
        adding special tokens. The special tokens depend on calling set_lang.

        An NLLB sequence has the following format, where `X` represents the sequence:

        - `input_ids` (for encoder) `X [eos, src_lang_code]`
        - `decoder_input_ids`: (for decoder) `X [eos, tgt_lang_code]`

        BOS is never used. Pairs of sequences are not the expected use case, but they will be handled without a
        separator.

        Args:
            token_ids_0 (`List[int]`):
                List of IDs to which the special tokens will be added.
            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.

        Returns:
            `List[int]`: list of [input IDs](../glossary#input-ids) with the appropriate special tokens.
        """
        if token_ids_1 is None:
            return self.bos_token_id + token_ids_0 + self.eos_token_id
        return self.bos_token_id + token_ids_0 + token_ids_1 + self.eos_token_id

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.add_bos_token` `property` `writable` ¶

Method to add a beginning of sentence (BOS) token to the tokenizer.

PARAMETER	DESCRIPTION
`self`	An instance of the CodeLlamaTokenizerFast class. It is used to access the tokenizer object.

RETURNS	DESCRIPTION
	None.

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.add_eos_token` `property` `writable` ¶

Adds an end-of-sequence (EOS) token to the tokenizer.

PARAMETER	DESCRIPTION
`self`	The instance of the CodeLlamaTokenizerFast class.

RETURNS	DESCRIPTION
	None.

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.can_save_slow_tokenizer: bool` `property` ¶

Checks if the slow tokenizer can be saved.

PARAMETER	DESCRIPTION
`self`	An instance of the CodeLlamaTokenizerFast class. TYPE: `CodeLlamaTokenizerFast`

RETURNS	DESCRIPTION
`bool`	True if the slow tokenizer can be saved, False otherwise. TYPE: `bool`

This method checks if the slow tokenizer can be saved by verifying if the vocab_file attribute exists. If the vocab_file attribute is not None and it corresponds to an existing file, the method returns True. Otherwise, it returns False.

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.default_chat_template` `property` ¶

LLaMA uses [INST] and [/INST] to indicate user messages, and <> and <> to indicate system messages. Assistant messages do not have special tokens, because LLaMA chat models are generally trained with strict user/assistant/user/assistant message ordering, and so assistant messages can be identified from the ordering rather than needing special tokens. The system message is partly 'embedded' in the first user message, which results in an unusual token ordering when it is present. This template should definitely be changed if you wish to fine-tune a model with more flexible role ordering!

The output should look something like:

[INST] B_SYS SystemPrompt E_SYS Prompt [/INST] Answer [INST] Prompt [/INST] Answer [INST] Prompt [/INST]

The reference for this chat template is this code snippet in the original repository.

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.eot_id` `property` ¶

Returns the ID representation of the end-of-text (EOT) token in the CodeLlamaTokenizerFast class.

PARAMETER	DESCRIPTION
`self`	An instance of the CodeLlamaTokenizerFast class.

RETURNS	DESCRIPTION
`None`	If the EOT token is not set.
`int`	The ID representation of the EOT token.

This method retrieves the ID representation of the EOT token. If the EOT token is not set (None), it returns None. Otherwise, it uses the 'convert_tokens_to_ids' method to convert the EOT token to its corresponding ID representation and returns it.

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.eot_token` `property` ¶

eot_token method in the CodeLlamaTokenizerFast class.

PARAMETER	DESCRIPTION
`self`	The instance of the CodeLlamaTokenizerFast class.

RETURNS	DESCRIPTION
	The value of the _eot_token attribute.

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.middle_id` `property` ¶

Returns the middle token ID of the CodeLlamaTokenizerFast instance.

PARAMETER	DESCRIPTION
`self`	The instance of the CodeLlamaTokenizerFast class. TYPE: `CodeLlamaTokenizerFast`

RETURNS	DESCRIPTION
`None`	If the middle token is not set or is set to None.
`int`	The ID of the middle token.

This method retrieves the ID of the middle token in the CodeLlamaTokenizerFast instance. If the middle token is not set or is set to None, None is returned. Otherwise, the method calls the 'convert_tokens_to_ids' function to convert the middle token into its corresponding ID and returns the ID value.

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.middle_token` `property` ¶

This method 'middle_token' is a property method in the class 'CodeLlamaTokenizerFast' that returns the middle token.

PARAMETER	DESCRIPTION
`self`	The instance of the class.

RETURNS	DESCRIPTION
`None`	This method returns the middle token or None if there is no middle token.

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.prefix_id` `property` ¶

Returns the prefix token converted to its corresponding ID.

PARAMETER	DESCRIPTION
`self`	An instance of the CodeLlamaTokenizerFast class. TYPE: `CodeLlamaTokenizerFast`

RETURNS	DESCRIPTION
`None`	If the prefix token is None.

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.prefix_token` `property` ¶

Returns the prefix token for the CodeLlamaTokenizerFast class.

PARAMETER	DESCRIPTION
`self`	The instance of the CodeLlamaTokenizerFast class. TYPE: `CodeLlamaTokenizerFast`

RETURNS	DESCRIPTION
	None.

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.suffix_id` `property` ¶

This method is defined in the CodeLlamaTokenizerFast class and is named suffix_id. It takes one parameter, self, which refers to the instance of the class.

PARAMETER	DESCRIPTION
`self`	An instance of the `CodeLlamaTokenizerFast` class.

RETURNS	DESCRIPTION
`None`	If the `_suffix_token` attribute is `None`, the method returns `None`.

Description

This method retrieves the suffix ID associated with the _suffix_token attribute. If the _suffix_token is None, indicating the absence of a suffix token, the method returns None. Otherwise, it calls the convert_tokens_to_ids method to convert the _suffix_token to its corresponding ID and returns the result.

Note

The _suffix_token attribute should be set before calling this method to ensure accurate results.
The return value is of type None.

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.suffix_token` `property` ¶

This method, 'suffix_token', is a property method defined in the 'CodeLlamaTokenizerFast' class.

PARAMETER	DESCRIPTION
`self`	An instance of the 'CodeLlamaTokenizerFast' class. It is used to access the attributes and methods of the class within this method.

RETURNS	DESCRIPTION
	None.

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.init(vocab_file=None, tokenizer_file=None, clean_up_tokenization_spaces=False, unk_token='<unk>', bos_token='<s>', eos_token='</s>', prefix_token='▁<PRE>', middle_token='▁<MID>', suffix_token='▁<SUF>', eot_token='▁<EOT>', fill_token='<FILL_ME>', additional_special_tokens=None, add_bos_token=True, add_eos_token=False, use_default_system_prompt=False, **kwargs)` ¶

Initializes an instance of the CodeLlamaTokenizerFast class.

PARAMETER	DESCRIPTION
`self`	The instance of the class.
`vocab_file`	Path to the vocabulary file. Defaults to None. TYPE: `str` DEFAULT: `None`
`tokenizer_file`	Path to the tokenizer file. Defaults to None. TYPE: `str` DEFAULT: `None`
`clean_up_tokenization_spaces`	Whether to clean up tokenization spaces. Defaults to False. TYPE: `bool` DEFAULT: `False`
`unk_token`	Unknown token. Defaults to ''. TYPE: `str` DEFAULT: `'<unk>'`
`bos_token`	Beginning of sentence token. Defaults to ''. TYPE: `str` DEFAULT: `'<s>'`
`eos_token`	End of sentence token. Defaults to ''. TYPE: `str` DEFAULT: `'</s>'`
`prefix_token`	Prefix token. Defaults to '▁ '. TYPE: `str` DEFAULT: `'▁<PRE>'`
`middle_token`	Middle token. Defaults to '▁'. TYPE: `str` DEFAULT: `'▁<MID>'`
`suffix_token`	Suffix token. Defaults to '▁'. TYPE: `str` DEFAULT: `'▁<SUF>'`
`eot_token`	End of text token. Defaults to '▁'. TYPE: `str` DEFAULT: `'▁<EOT>'`
`fill_token`	Fill token. Defaults to ''. TYPE: `str` DEFAULT: `'<FILL_ME>'`
`additional_special_tokens`	Additional special tokens. Defaults to None. TYPE: `List[str]` DEFAULT: `None`
`add_bos_token`	Whether to add the beginning of sentence token. Defaults to True. TYPE: `bool` DEFAULT: `True`
`add_eos_token`	Whether to add the end of sentence token. Defaults to False. TYPE: `bool` DEFAULT: `False`
`use_default_system_prompt`	Whether to use the default system prompt. Defaults to False. TYPE: `bool` DEFAULT: `False`
`**kwargs`	Additional keyword arguments. DEFAULT: `{}`

RETURNS	DESCRIPTION
	None

Source code in mindnlp\transformers\models\llama\tokenization_code_llama_fast.py

def __init__(
    self,
    vocab_file=None,
    tokenizer_file=None,
    clean_up_tokenization_spaces=False,
    unk_token="<unk>",
    bos_token="<s>",
    eos_token="</s>",
    prefix_token="▁<PRE>",
    middle_token="▁<MID>",
    suffix_token="▁<SUF>",
    eot_token="▁<EOT>",
    fill_token="<FILL_ME>",
    additional_special_tokens=None,
    add_bos_token=True,
    add_eos_token=False,
    use_default_system_prompt=False,
    **kwargs,
):
    """
    Initializes an instance of the CodeLlamaTokenizerFast class.

    Args:
        self: The instance of the class.
        vocab_file (str, optional): Path to the vocabulary file. Defaults to None.
        tokenizer_file (str, optional): Path to the tokenizer file. Defaults to None.
        clean_up_tokenization_spaces (bool, optional): Whether to clean up tokenization spaces. Defaults to False.
        unk_token (str, optional): Unknown token. Defaults to '<unk>'.
        bos_token (str, optional): Beginning of sentence token. Defaults to '<s>'.
        eos_token (str, optional): End of sentence token. Defaults to '</s>'.
        prefix_token (str, optional): Prefix token. Defaults to '▁<PRE>'.
        middle_token (str, optional): Middle token. Defaults to '▁<MID>'.
        suffix_token (str, optional): Suffix token. Defaults to '▁<SUF>'.
        eot_token (str, optional): End of text token. Defaults to '▁<EOT>'.
        fill_token (str, optional): Fill token. Defaults to '<FILL_ME>'.
        additional_special_tokens (List[str], optional): Additional special tokens. Defaults to None.
        add_bos_token (bool, optional): Whether to add the beginning of sentence token. Defaults to True.
        add_eos_token (bool, optional): Whether to add the end of sentence token. Defaults to False.
        use_default_system_prompt (bool, optional): Whether to use the default system prompt. Defaults to False.
        **kwargs: Additional keyword arguments.

    Returns:
        None

    Raises:
        None
    """
    # mark tokens special to skip them
    additional_special_tokens = additional_special_tokens or []
    for token in [prefix_token, middle_token, suffix_token, eot_token]:
        additional_special_tokens += [token] if token is not None else []
    self.use_default_system_prompt = use_default_system_prompt

    super().__init__(
        vocab_file=vocab_file,
        tokenizer_file=tokenizer_file,
        clean_up_tokenization_spaces=clean_up_tokenization_spaces,
        additional_special_tokens=additional_special_tokens,
        unk_token=unk_token,
        bos_token=bos_token,
        eos_token=eos_token,
        add_bos_token=add_bos_token,
        add_eos_token=add_eos_token,
        prefix_token=prefix_token,
        middle_token=middle_token,
        suffix_token=suffix_token,
        eot_token=eot_token,
        fill_token=fill_token,
        use_default_system_prompt=use_default_system_prompt,
        **kwargs,
    )
    self._add_bos_token = add_bos_token
    self._add_eos_token = add_eos_token
    self.update_post_processor()

    self.vocab_file = vocab_file

    self._prefix_token = prefix_token
    self._middle_token = middle_token
    self._suffix_token = suffix_token
    self._eot_token = eot_token
    self.fill_token = fill_token

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.build_inputs_with_special_tokens(token_ids_0, token_ids_1=None)` ¶

Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and adding special tokens. The special tokens depend on calling set_lang.

An NLLB sequence has the following format, where X represents the sequence:

input_ids (for encoder) X [eos, src_lang_code]
decoder_input_ids: (for decoder) X [eos, tgt_lang_code]

BOS is never used. Pairs of sequences are not the expected use case, but they will be handled without a separator.

PARAMETER	DESCRIPTION
`token_ids_0`	List of IDs to which the special tokens will be added. TYPE: `List[int]`
`token_ids_1`	Optional second list of IDs for sequence pairs. TYPE: `List[int]`, optional DEFAULT: `None`

RETURNS	DESCRIPTION
`List[int]`	`List[int]`: list of input IDs with the appropriate special tokens.

Source code in mindnlp\transformers\models\llama\tokenization_code_llama_fast.py

def build_inputs_with_special_tokens(
    self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
) -> List[int]:
    """
    Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
    adding special tokens. The special tokens depend on calling set_lang.

    An NLLB sequence has the following format, where `X` represents the sequence:

    - `input_ids` (for encoder) `X [eos, src_lang_code]`
    - `decoder_input_ids`: (for decoder) `X [eos, tgt_lang_code]`

    BOS is never used. Pairs of sequences are not the expected use case, but they will be handled without a
    separator.

    Args:
        token_ids_0 (`List[int]`):
            List of IDs to which the special tokens will be added.
        token_ids_1 (`List[int]`, *optional*):
            Optional second list of IDs for sequence pairs.

    Returns:
        `List[int]`: list of [input IDs](../glossary#input-ids) with the appropriate special tokens.
    """
    if token_ids_1 is None:
        return self.bos_token_id + token_ids_0 + self.eos_token_id
    return self.bos_token_id + token_ids_0 + token_ids_1 + self.eos_token_id

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.encode_plus(text, text_pair=None, suffix_first=False, add_special_tokens=True, **kwargs)` ¶

Encodes the given text and text pair into tokens using the CodeLlamaTokenizerFast class.

PARAMETER	DESCRIPTION
`self`	An instance of the CodeLlamaTokenizerFast class. TYPE: `CodeLlamaTokenizerFast`
`text`	The input text to be encoded. TYPE: `str`
`text_pair`	The optional second input text to be encoded. Defaults to None. TYPE: `str` DEFAULT: `None`
`suffix_first`	Specifies whether the suffix should be placed first. Defaults to False. TYPE: `bool` DEFAULT: `False`
`add_special_tokens`	Specifies whether to add special tokens. Defaults to True. TYPE: `bool` DEFAULT: `True`

RETURNS	DESCRIPTION
`tokens`	The encoded tokens. This is an instance of a class defined in the CodeLlamaTokenizerFast class.

RAISES	DESCRIPTION
`ValueError`	If the input includes a `prefix` and a `suffix` used for the infilling task, the `prefix_id, middle_id, suffix_id` must all be initialized. Current values: (self.prefix_id, self.middle_id, self.suffix_id)

Source code in mindnlp\transformers\models\llama\tokenization_code_llama_fast.py

def encode_plus(self, text, text_pair=None, suffix_first=False, add_special_tokens=True, **kwargs):
    """
    Encodes the given text and text pair into tokens using the CodeLlamaTokenizerFast class.

    Args:
        self (CodeLlamaTokenizerFast): An instance of the CodeLlamaTokenizerFast class.
        text (str): The input text to be encoded.
        text_pair (str, optional): The optional second input text to be encoded. Defaults to None.
        suffix_first (bool, optional): Specifies whether the suffix should be placed first. Defaults to False.
        add_special_tokens (bool, optional): Specifies whether to add special tokens. Defaults to True.

    Returns:
        tokens: The encoded tokens. This is an instance of a class defined in the CodeLlamaTokenizerFast class.

    Raises:
        ValueError: If the input includes a `prefix` and a `suffix` used for the infilling task,
            the `prefix_id, middle_id, suffix_id` must all be initialized.
            Current values: (self.prefix_id, self.middle_id, self.suffix_id)
    """
    # hack to make sure the input is pre-process but outside rust
    text_pair = kwargs.pop("suffix", text_pair)
    if self.fill_token is not None and self.fill_token in text and text_pair is None:
        text, text_pair = text.split(self.fill_token)

    if text_pair is None or len(text_pair) < 1:
        return super().encode_plus(text, text_pair, add_special_tokens=add_special_tokens, **kwargs)

    if None in (self.prefix_id, self.middle_id, self.suffix_id):
        raise ValueError(
            "Then input includes a `prefix` and a `suffix` used for the infilling task,"
            " the `prefix_id, middle_id, suffix_id` must all be initialized. Current"
            f" values : {self.prefix_id, self.middle_id, self.suffix_id}"
        )

    self.set_infilling_processor(False, suffix_first=suffix_first, add_special_tokens=add_special_tokens)
    tokens = super().encode_plus(" " + text, text_pair=text_pair, add_special_tokens=True, **kwargs)
    self.set_infilling_processor(True)
    return tokens

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.save_vocabulary(save_directory, filename_prefix=None)` ¶

Save the vocabulary for a fast tokenizer.

PARAMETER	DESCRIPTION
`self`	An instance of the CodeLlamaTokenizerFast class. TYPE: `CodeLlamaTokenizerFast`
`save_directory`	The directory path where the vocabulary will be saved. TYPE: `str`
`filename_prefix`	A prefix to be added to the filename. Defaults to None. TYPE: `Optional[str]` DEFAULT: `None`

RETURNS	DESCRIPTION
`Tuple[str]`	Tuple[str]: A tuple containing the path to the saved vocabulary file.

RAISES	DESCRIPTION
`ValueError`	If the fast tokenizer does not have the necessary information to save the vocabulary for a slow tokenizer.
`FileNotFoundError`	If the save_directory does not exist.
`IsADirectoryError`	If the save_directory is not a directory.

Source code in mindnlp\transformers\models\llama\tokenization_code_llama_fast.py

def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
    """
    Save the vocabulary for a fast tokenizer.

    Args:
        self (CodeLlamaTokenizerFast): An instance of the CodeLlamaTokenizerFast class.
        save_directory (str): The directory path where the vocabulary will be saved.
        filename_prefix (Optional[str], optional): A prefix to be added to the filename. Defaults to None.

    Returns:
        Tuple[str]: A tuple containing the path to the saved vocabulary file.

    Raises:
        ValueError: If the fast tokenizer does not have the necessary information to save the vocabulary
            for a slow tokenizer.
        FileNotFoundError: If the save_directory does not exist.
        IsADirectoryError: If the save_directory is not a directory.

    """
    if not self.can_save_slow_tokenizer:
        raise ValueError(
            "Your fast tokenizer does not have the necessary information to save the vocabulary for a slow "
            "tokenizer."
        )

    if not os.path.isdir(save_directory):
        logger.error(f"Vocabulary path ({save_directory}) should be a directory")
        return
    out_vocab_file = os.path.join(
        save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
    )

    if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file):
        copyfile(self.vocab_file, out_vocab_file)

    return (out_vocab_file,)

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.set_infilling_processor(reset, suffix_first=False, add_special_tokens=True)` ¶

Updates the normalizer to make sure the prompt format for infilling is respected. The infilling format is the following:

if suffix_first
- " <PRE> <SUF>{suf} <MID> {pre}"
else:
- " <PRE> {pre} <SUF>{suf} <MID>"

If reset is set to True, the normalizer and post_processor are reset to their "normal" behaviour, which is to add a prefix space for the normalizer, and add a bos_token to the input text for the post_processor.

Source code in mindnlp\transformers\models\llama\tokenization_code_llama_fast.py

def set_infilling_processor(self, reset, suffix_first=False, add_special_tokens=True):
    r"""
    Updates the normalizer to make sure the prompt format for `infilling` is respected. The infilling format is the
    following:

    - if suffix_first

        - `" <PRE> <SUF>{suf} <MID> {pre}"`

    - else:

        - `" <PRE> {pre} <SUF>{suf} <MID>"`

    If `reset` is set to `True`, the `normalizer` and `post_processor` are reset to their "normal" behaviour, which
    is to add a prefix space for the normalizer, and add a `bos_token` to the input text for the `post_processor`.
    """
    if reset:
        self._tokenizer.normalizer = normalizers.Sequence(
            [
                normalizers.Prepend(prepend="▁"),
                normalizers.Replace(pattern=" ", content="▁"),
            ]
        )
        self.update_post_processor()
        return

    self._tokenizer.normalizer = normalizers.Replace(pattern=" ", content="▁")
    pair = [self.bos_token] if self.add_bos_token and add_special_tokens else []
    special_tokens = [(self.bos_token, self.bos_token_id)] if self.add_bos_token and add_special_tokens else []
    if suffix_first:
        # format as " <PRE> <SUF>{suf} <MID> {pre}"
        pair += [self.prefix_token, self.suffix_token, "$B", self.middle_token, "$A"]
        special_tokens += [
            (self.prefix_token, self.prefix_id),
            (self.suffix_token, self.suffix_id),
            (self.middle_token, self.middle_id),
        ]
    else:
        # format as " <PRE> {pre} <SUF>{suf} <MID>"
        pair += [self.prefix_token, "$A", self.suffix_token, "$B", self.middle_token]
        special_tokens += [
            (self.prefix_token, self.prefix_id),
            (self.suffix_token, self.suffix_id),
            (self.middle_token, self.middle_id),
        ]

    if self.add_eos_token and add_special_tokens:
        pair += [self.eos_token]
        special_tokens += [(self.eos_token, self.eos_token_id)]
    self._tokenizer.post_processor = processors.TemplateProcessing(
        single="$A", pair=pair, special_tokens=special_tokens
    )

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.update_post_processor()` ¶

Updates the underlying post processor with the current bos_token and eos_token.

Source code in mindnlp\transformers\models\llama\tokenization_code_llama_fast.py

def update_post_processor(self):
    """
    Updates the underlying post processor with the current `bos_token` and `eos_token`.
    """
    bos = self.bos_token
    bos_token_id = self.bos_token_id
    if bos is None and self.add_bos_token:
        raise ValueError("add_bos_token = True but bos_token = None")

    eos = self.eos_token
    eos_token_id = self.eos_token_id
    if eos is None and self.add_eos_token:
        raise ValueError("add_eos_token = True but eos_token = None")

    single = f"{(bos+':0 ') if self.add_bos_token else ''}$A:0{(' '+eos+':0') if self.add_eos_token else ''}"
    pair = f"{single}{(' '+bos+':1') if self.add_bos_token else ''} $B:1{(' '+eos+':1') if self.add_eos_token else ''}"

    special_tokens = []
    if self.add_bos_token:
        special_tokens.append((bos, bos_token_id))
    if self.add_eos_token:
        special_tokens.append((eos, eos_token_id))
    self._tokenizer.post_processor = processors.TemplateProcessing(
        single=single, pair=pair, special_tokens=special_tokens
    )

llama

mindnlp.transformers.models.llama.modeling_llama ¶

mindnlp.transformers.models.llama.modeling_llama.LlamaAttention ¶

mindnlp.transformers.models.llama.modeling_llama.LlamaDecoderLayer ¶

mindnlp.transformers.models.llama.modeling_llama.LlamaDecoderLayer.forward(hidden_states, attention_mask=None, position_ids=None, past_key_value=None, output_attentions=False, use_cache=False, cache_position=None, position_embeddings=None, **kwargs) ¶

mindnlp.transformers.models.llama.modeling_llama.LlamaDynamicNTKScalingRotaryEmbedding ¶

mindnlp.transformers.models.llama.modeling_llama.LlamaForCausalLM ¶

mindnlp.transformers.models.llama.modeling_llama.LlamaForQuestionAnswering ¶

mindnlp.transformers.models.llama.modeling_llama.LlamaForQuestionAnswering.forward(input_ids=None, attention_mask=None, position_ids=None, past_key_values=None, inputs_embeds=None, start_positions=None, end_positions=None, output_attentions=None, output_hidden_states=None, return_dict=None) ¶

mindnlp.transformers.models.llama.modeling_llama.LlamaForSequenceClassification ¶

mindnlp.transformers.models.llama.modeling_llama.LlamaForSequenceClassification.forward(input_ids=None, attention_mask=None, position_ids=None, past_key_values=None, inputs_embeds=None, labels=None, use_cache=None, output_attentions=None, output_hidden_states=None, return_dict=None) ¶

mindnlp.transformers.models.llama.modeling_llama.LlamaForTokenClassification ¶

mindnlp.transformers.models.llama.modeling_llama.LlamaForTokenClassification.forward(input_ids=None, attention_mask=None, position_ids=None, past_key_values=None, inputs_embeds=None, labels=None, use_cache=None, output_attentions=None, output_hidden_states=None, return_dict=None) ¶

mindnlp.transformers.models.llama.modeling_llama.LlamaLinearScalingRotaryEmbedding ¶

mindnlp.transformers.models.llama.modeling_llama.LlamaModel ¶

mindnlp.transformers.models.llama.modeling_llama.LlamaRMSNorm ¶

mindnlp.transformers.models.llama.modeling_llama.LlamaRMSNorm.__init__(hidden_size, eps=1e-06) ¶

mindnlp.transformers.models.llama.modeling_llama.LlamaRotaryEmbedding ¶

mindnlp.transformers.models.llama.modeling_llama.apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1) ¶

mindnlp.transformers.models.llama.modeling_llama.repeat_kv(hidden_states, n_rep) ¶

mindnlp.transformers.models.llama.modeling_llama.rotate_half(x) ¶

mindnlp.transformers.models.llama.configuration_llama ¶

mindnlp.transformers.models.llama.configuration_llama.LlamaConfig ¶

mindnlp.transformers.models.llama.tokenization_llama ¶

mindnlp.transformers.models.llama.tokenization_llama.LlamaTokenizer ¶

mindnlp.transformers.models.llama.tokenization_llama.LlamaTokenizer.vocab_size property ¶

mindnlp.transformers.models.llama.tokenization_llama.LlamaTokenizer.convert_tokens_to_string(tokens) ¶

mindnlp.transformers.models.llama.tokenization_llama.LlamaTokenizer.create_token_type_ids_from_sequences(token_ids_0, token_ids_1=None) ¶

mindnlp.transformers.models.llama.tokenization_llama.LlamaTokenizer.get_special_tokens_mask(token_ids_0, token_ids_1=None, already_has_special_tokens=False) ¶

mindnlp.transformers.models.llama.tokenization_llama.LlamaTokenizer.get_vocab() ¶

mindnlp.transformers.models.llama.tokenization_llama.LlamaTokenizer.save_vocabulary(save_directory, filename_prefix=None) ¶

mindnlp.transformers.models.llama.tokenization_llama.LlamaTokenizer.tokenize(text, **kwargs) ¶

mindnlp.transformers.models.llama.tokenization_llama_fast ¶

mindnlp.transformers.models.llama.tokenization_llama_fast.LlamaTokenizerFast ¶

mindnlp.transformers.models.llama.tokenization_llama_fast.LlamaTokenizerFast.update_post_processor() ¶

mindnlp.transformers.models.llama.tokenization_code_llama ¶

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer ¶

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.default_chat_template property ¶

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.eot_id property ¶

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.eot_token property ¶

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.middle_id property ¶

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.middle_token property ¶

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.prefix_id property ¶

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.prefix_token property ¶

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.suffix_id property ¶

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.suffix_token property ¶

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.unk_token_length property ¶

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.vocab_size property ¶

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.__getstate__() ¶

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.__setstate__(d) ¶

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.build_inputs_with_special_tokens(token_ids_0, token_ids_1=None) ¶

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.convert_tokens_to_string(tokens) ¶

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.create_token_type_ids_from_sequences(token_ids_0, token_ids_1=None) ¶

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.get_special_tokens_mask(token_ids_0, token_ids_1=None, already_has_special_tokens=False) ¶

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.get_spm_processor() ¶

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.get_vocab() ¶

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.save_vocabulary(save_directory, filename_prefix=None) ¶

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.tokenize(prefix, suffix=None, suffix_first=False, **kwargs) ¶

mindnlp.transformers.models.llama.tokenization_code_llama_fast ¶

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast ¶

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.add_bos_token property writable ¶

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.add_eos_token property writable ¶

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.can_save_slow_tokenizer: bool property ¶

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.default_chat_template property ¶

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.eot_id property ¶

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.eot_token property ¶

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.middle_id property ¶

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.middle_token property ¶

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.prefix_id property ¶

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.prefix_token property ¶

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.suffix_id property ¶

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.suffix_token property ¶

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.build_inputs_with_special_tokens(token_ids_0, token_ids_1=None) ¶

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.encode_plus(text, text_pair=None, suffix_first=False, add_special_tokens=True, **kwargs) ¶

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.save_vocabulary(save_directory, filename_prefix=None) ¶

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.set_infilling_processor(reset, suffix_first=False, add_special_tokens=True) ¶

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.update_post_processor() ¶

`mindnlp.transformers.models.llama.modeling_llama` ¶

`mindnlp.transformers.models.llama.modeling_llama.LlamaAttention` ¶

`mindnlp.transformers.models.llama.modeling_llama.LlamaDecoderLayer` ¶

`mindnlp.transformers.models.llama.modeling_llama.LlamaDecoderLayer.forward(hidden_states, attention_mask=None, position_ids=None, past_key_value=None, output_attentions=False, use_cache=False, cache_position=None, position_embeddings=None, **kwargs)` ¶

`mindnlp.transformers.models.llama.modeling_llama.LlamaDynamicNTKScalingRotaryEmbedding` ¶

`mindnlp.transformers.models.llama.modeling_llama.LlamaForCausalLM` ¶

`mindnlp.transformers.models.llama.modeling_llama.LlamaForQuestionAnswering` ¶

`mindnlp.transformers.models.llama.modeling_llama.LlamaForQuestionAnswering.forward(input_ids=None, attention_mask=None, position_ids=None, past_key_values=None, inputs_embeds=None, start_positions=None, end_positions=None, output_attentions=None, output_hidden_states=None, return_dict=None)` ¶

`mindnlp.transformers.models.llama.modeling_llama.LlamaForSequenceClassification` ¶

`mindnlp.transformers.models.llama.modeling_llama.LlamaForSequenceClassification.forward(input_ids=None, attention_mask=None, position_ids=None, past_key_values=None, inputs_embeds=None, labels=None, use_cache=None, output_attentions=None, output_hidden_states=None, return_dict=None)` ¶

`mindnlp.transformers.models.llama.modeling_llama.LlamaForTokenClassification` ¶

`mindnlp.transformers.models.llama.modeling_llama.LlamaForTokenClassification.forward(input_ids=None, attention_mask=None, position_ids=None, past_key_values=None, inputs_embeds=None, labels=None, use_cache=None, output_attentions=None, output_hidden_states=None, return_dict=None)` ¶

`mindnlp.transformers.models.llama.modeling_llama.LlamaLinearScalingRotaryEmbedding` ¶

`mindnlp.transformers.models.llama.modeling_llama.LlamaModel` ¶

`mindnlp.transformers.models.llama.modeling_llama.LlamaRMSNorm` ¶

`mindnlp.transformers.models.llama.modeling_llama.LlamaRMSNorm.init(hidden_size, eps=1e-06)` ¶

`mindnlp.transformers.models.llama.modeling_llama.LlamaRotaryEmbedding` ¶

`mindnlp.transformers.models.llama.modeling_llama.apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1)` ¶

`mindnlp.transformers.models.llama.modeling_llama.repeat_kv(hidden_states, n_rep)` ¶

`mindnlp.transformers.models.llama.modeling_llama.rotate_half(x)` ¶

`mindnlp.transformers.models.llama.configuration_llama` ¶

`mindnlp.transformers.models.llama.configuration_llama.LlamaConfig` ¶

`mindnlp.transformers.models.llama.tokenization_llama` ¶

`mindnlp.transformers.models.llama.tokenization_llama.LlamaTokenizer` ¶

`mindnlp.transformers.models.llama.tokenization_llama.LlamaTokenizer.vocab_size` `property` ¶

`mindnlp.transformers.models.llama.tokenization_llama.LlamaTokenizer.convert_tokens_to_string(tokens)` ¶

`mindnlp.transformers.models.llama.tokenization_llama.LlamaTokenizer.create_token_type_ids_from_sequences(token_ids_0, token_ids_1=None)` ¶

`mindnlp.transformers.models.llama.tokenization_llama.LlamaTokenizer.get_special_tokens_mask(token_ids_0, token_ids_1=None, already_has_special_tokens=False)` ¶

`mindnlp.transformers.models.llama.tokenization_llama.LlamaTokenizer.get_vocab()` ¶

`mindnlp.transformers.models.llama.tokenization_llama.LlamaTokenizer.save_vocabulary(save_directory, filename_prefix=None)` ¶

`mindnlp.transformers.models.llama.tokenization_llama.LlamaTokenizer.tokenize(text, **kwargs)` ¶

`mindnlp.transformers.models.llama.tokenization_llama_fast` ¶

`mindnlp.transformers.models.llama.tokenization_llama_fast.LlamaTokenizerFast` ¶

`mindnlp.transformers.models.llama.tokenization_llama_fast.LlamaTokenizerFast.update_post_processor()` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.default_chat_template` `property` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.eot_id` `property` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.eot_token` `property` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.middle_id` `property` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.middle_token` `property` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.prefix_id` `property` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.prefix_token` `property` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.suffix_id` `property` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.suffix_token` `property` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.unk_token_length` `property` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.vocab_size` `property` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.getstate()` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.setstate(d)` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.build_inputs_with_special_tokens(token_ids_0, token_ids_1=None)` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.convert_tokens_to_string(tokens)` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.create_token_type_ids_from_sequences(token_ids_0, token_ids_1=None)` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.get_special_tokens_mask(token_ids_0, token_ids_1=None, already_has_special_tokens=False)` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.get_spm_processor()` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.get_vocab()` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.save_vocabulary(save_directory, filename_prefix=None)` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.tokenize(prefix, suffix=None, suffix_first=False, **kwargs)` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama_fast` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.add_bos_token` `property` `writable` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.add_eos_token` `property` `writable` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.can_save_slow_tokenizer: bool` `property` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.default_chat_template` `property` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.eot_id` `property` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.eot_token` `property` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.middle_id` `property` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.middle_token` `property` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.prefix_id` `property` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.prefix_token` `property` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.suffix_id` `property` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.suffix_token` `property` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.build_inputs_with_special_tokens(token_ids_0, token_ids_1=None)` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.encode_plus(text, text_pair=None, suffix_first=False, add_special_tokens=True, **kwargs)` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.save_vocabulary(save_directory, filename_prefix=None)` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.set_infilling_processor(reset, suffix_first=False, add_special_tokens=True)` ¶

`mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.update_post_processor()` ¶