跳转至

llama

mindnlp.transformers.models.llama.modeling_llama

modeling llama

mindnlp.transformers.models.llama.modeling_llama.LlamaAttention

Bases: Module

Multi-headed attention from 'Attention Is All You Need' paper

Source code in mindnlp\transformers\models\llama\modeling_llama.py
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
class LlamaAttention(nn.Module):
    """Multi-headed attention from 'Attention Is All You Need' paper"""

    def __init__(self, config: LlamaConfig, layer_idx: Optional[int] = None):
        super().__init__()
        self.config = config
        self.layer_idx = layer_idx
        if layer_idx is None:
            logger.warning_once(
                f"Instantiating {self.__class__.__name__} without passing a `layer_idx` is not recommended and will "
                "lead to errors during the forward call if caching is used. Please make sure to provide a `layer_idx` "
                "when creating this class."
            )

        self.attention_dropout = config.attention_dropout
        self.hidden_size = config.hidden_size
        self.num_heads = config.num_attention_heads
        self.head_dim = self.hidden_size // self.num_heads
        self.num_key_value_heads = config.num_key_value_heads
        self.num_key_value_groups = self.num_heads // self.num_key_value_heads
        self.max_position_embeddings = config.max_position_embeddings
        self.rope_theta = config.rope_theta
        self.is_causal = True

        if (self.head_dim * self.num_heads) != self.hidden_size:
            raise ValueError(
                f"hidden_size must be divisible by num_heads (got `hidden_size`: {self.hidden_size}"
                f" and `num_heads`: {self.num_heads})."
            )

        self.q_proj = nn.Linear(self.hidden_size, self.num_heads * self.head_dim, bias=config.attention_bias)
        self.k_proj = nn.Linear(self.hidden_size, self.num_key_value_heads * self.head_dim, bias=config.attention_bias)
        self.v_proj = nn.Linear(self.hidden_size, self.num_key_value_heads * self.head_dim, bias=config.attention_bias)
        self.o_proj = nn.Linear(self.hidden_size, self.hidden_size, bias=config.attention_bias)

        # TODO (joao): remove in v4.45 (RoPE is computed in the model, not in the decoder layers)
        self.rotary_emb = LlamaRotaryEmbedding(config=self.config)

    def forward(
        self,
        hidden_states: mindspore.Tensor,
        attention_mask: Optional[mindspore.Tensor] = None,
        position_ids: Optional[mindspore.Tensor] = None,
        past_key_value: Optional[Cache] = None,
        output_attentions: bool = False,
        use_cache: bool = False,
        cache_position: Optional[mindspore.Tensor] = None,
        position_embeddings: Optional[Tuple[mindspore.Tensor, mindspore.Tensor]] = None,  # will become mandatory in v4.45
        **kwargs,
    ) -> Tuple[mindspore.Tensor, Optional[mindspore.Tensor], Optional[Tuple[mindspore.Tensor]]]:
        bsz, q_len, _ = hidden_states.shape

        if self.config.pretraining_tp > 1:
            key_value_slicing = (self.num_key_value_heads * self.head_dim) // self.config.pretraining_tp
            query_slices = self.q_proj.weight.split(
                (self.num_heads * self.head_dim) // self.config.pretraining_tp, dim=0
            )
            key_slices = self.k_proj.weight.split(key_value_slicing, dim=0)
            value_slices = self.v_proj.weight.split(key_value_slicing, dim=0)

            query_states = [F.linear(hidden_states, query_slices[i]) for i in range(self.config.pretraining_tp)]
            query_states = ops.cat(query_states, dim=-1)

            key_states = [F.linear(hidden_states, key_slices[i]) for i in range(self.config.pretraining_tp)]
            key_states = ops.cat(key_states, dim=-1)

            value_states = [F.linear(hidden_states, value_slices[i]) for i in range(self.config.pretraining_tp)]
            value_states = ops.cat(value_states, dim=-1)

        else:
            query_states = self.q_proj(hidden_states)
            key_states = self.k_proj(hidden_states)
            value_states = self.v_proj(hidden_states)

        query_states = ops.transpose(query_states.view(bsz, q_len, self.num_heads, self.head_dim), 1, 2)
        key_states = ops.transpose(key_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim), 1, 2)
        value_states = ops.transpose(value_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim), 1, 2)

        if position_embeddings is None:
            logger.warning_once(
                "The attention layers in this model are transitioning from computing the RoPE embeddings internally "
                "through `position_ids` (2D tensor with the indexes of the tokens), to using externally computed "
                "`position_embeddings` (Tuple of tensors, containing cos and sin). In v4.45 `position_ids` will be "
                "removed and `position_embeddings` will be mandatory."
            )
            cos, sin = self.rotary_emb(value_states, position_ids)
        else:
            cos, sin = position_embeddings
        query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin)

        if past_key_value is not None:
            # sin and cos are specific to RoPE models; cache_position needed for the static cache
            cache_kwargs = {"sin": sin, "cos": cos, "cache_position": cache_position}
            key_states, value_states = past_key_value.update(key_states, value_states, self.layer_idx, cache_kwargs)

        key_states = repeat_kv(key_states, self.num_key_value_groups)
        value_states = repeat_kv(value_states, self.num_key_value_groups)

        attn_weights = ops.matmul(query_states, ops.transpose(key_states, 2, 3)) / math.sqrt(self.head_dim)

        if attention_mask is not None:  # no matter the length, we just slice it
            # causal_mask = attention_mask[:, :, :, : key_states.shape[-2]]
            causal_mask = ops.narrow(attention_mask, 3, 0, key_states.shape[-2])
            attn_weights = attn_weights + causal_mask

        # upcast attention to fp32
        attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=mindspore.float32).to(query_states.dtype)
        attn_weights = nn.functional.dropout(attn_weights, p=self.attention_dropout, training=self.training)
        attn_output = ops.matmul(attn_weights, value_states)

        if attn_output.shape != (bsz, self.num_heads, q_len, self.head_dim):
            raise ValueError(
                f"`attn_output` should be of size {(bsz, self.num_heads, q_len, self.head_dim)}, but is"
                f" {attn_output.shape}"
            )

        attn_output = ops.transpose(attn_output, 1, 2)

        attn_output = attn_output.reshape(bsz, q_len, -1)

        if self.config.pretraining_tp > 1:
            attn_output = attn_output.split(self.hidden_size // self.config.pretraining_tp, dim=2)
            o_proj_slices = self.o_proj.weight.split(self.hidden_size // self.config.pretraining_tp, dim=1)
            attn_output = sum(F.linear(attn_output[i], o_proj_slices[i]) for i in range(self.config.pretraining_tp))
        else:
            attn_output = self.o_proj(attn_output)

        if not output_attentions:
            attn_weights = None

        return attn_output, attn_weights, past_key_value

mindnlp.transformers.models.llama.modeling_llama.LlamaDecoderLayer

Bases: Module

Source code in mindnlp\transformers\models\llama\modeling_llama.py
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
class LlamaDecoderLayer(nn.Module):
    def __init__(self, config: LlamaConfig, layer_idx: int):
        super().__init__()
        self.hidden_size = config.hidden_size

        self.self_attn = LLAMA_ATTENTION_CLASSES[config._attn_implementation](config=config, layer_idx=layer_idx)

        self.mlp = LlamaMLP(config)
        self.input_layernorm = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
        self.post_attention_layernorm = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps)

    def forward(
        self,
        hidden_states: mindspore.Tensor,
        attention_mask: Optional[mindspore.Tensor] = None,
        position_ids: Optional[mindspore.Tensor] = None,
        past_key_value: Optional[Cache] = None,
        output_attentions: Optional[bool] = False,
        use_cache: Optional[bool] = False,
        cache_position: Optional[mindspore.Tensor] = None,
        position_embeddings: Optional[Tuple[mindspore.Tensor, mindspore.Tensor]] = None,  # will become mandatory in v4.45
        **kwargs,
    ) -> Tuple[mindspore.Tensor, Optional[Tuple[mindspore.Tensor, mindspore.Tensor]]]:
        """
        Args:
            hidden_states (`mindspore.Tensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
            attention_mask (`mindspore.Tensor`, *optional*):
                attention mask of size `(batch_size, sequence_length)` if flash attention is used or `(batch_size, 1,
                query_sequence_length, key_sequence_length)` if default attention is used.
            output_attentions (`bool`, *optional*):
                Whether or not to return the attentions tensors of all attention layers. See `attentions` under
                returned tensors for more detail.
            use_cache (`bool`, *optional*):
                If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
                (see `past_key_values`).
            past_key_value (`Tuple(mindspore.Tensor)`, *optional*): cached past key and value projection states
            cache_position (`mindspore.Tensor` of shape `(sequence_length)`, *optional*):
                Indices depicting the position of the input sequence tokens in the sequence
            position_embeddings (`Tuple[mindspore.Tensor, mindspore.Tensor]`, *optional*):
                Tuple containing the cosine and sine positional embeddings of shape `(batch_size, seq_len, head_dim)`,
                with `head_dim` being the embedding dimension of each attention head.
            kwargs (`dict`, *optional*):
                Arbitrary kwargs to be ignored, used for FSDP and other methods that injects code
                into the model
        """
        residual = hidden_states

        hidden_states = self.input_layernorm(hidden_states)

        # Self Attention
        hidden_states, self_attn_weights, present_key_value = self.self_attn(
            hidden_states=hidden_states,
            attention_mask=attention_mask,
            position_ids=position_ids,
            past_key_value=past_key_value,
            output_attentions=output_attentions,
            use_cache=use_cache,
            cache_position=cache_position,
            position_embeddings=position_embeddings,
            **kwargs,
        )
        hidden_states = residual + hidden_states

        # Fully Connected
        residual = hidden_states
        hidden_states = self.post_attention_layernorm(hidden_states)
        hidden_states = self.mlp(hidden_states)
        hidden_states = residual + hidden_states

        outputs = (hidden_states,)

        if output_attentions:
            outputs += (self_attn_weights,)

        if use_cache:
            outputs += (present_key_value,)

        return outputs

mindnlp.transformers.models.llama.modeling_llama.LlamaDecoderLayer.forward(hidden_states, attention_mask=None, position_ids=None, past_key_value=None, output_attentions=False, use_cache=False, cache_position=None, position_embeddings=None, **kwargs)

PARAMETER DESCRIPTION
hidden_states

input to the layer of shape (batch, seq_len, embed_dim)

TYPE: `mindspore.Tensor`

attention_mask

attention mask of size (batch_size, sequence_length) if flash attention is used or (batch_size, 1, query_sequence_length, key_sequence_length) if default attention is used.

TYPE: `mindspore.Tensor`, *optional* DEFAULT: None

output_attentions

Whether or not to return the attentions tensors of all attention layers. See attentions under returned tensors for more detail.

TYPE: `bool`, *optional* DEFAULT: False

use_cache

If set to True, past_key_values key value states are returned and can be used to speed up decoding (see past_key_values).

TYPE: `bool`, *optional* DEFAULT: False

past_key_value

cached past key and value projection states

TYPE: `Tuple(mindspore.Tensor)`, *optional* DEFAULT: None

cache_position

Indices depicting the position of the input sequence tokens in the sequence

TYPE: `mindspore.Tensor` of shape `(sequence_length)`, *optional* DEFAULT: None

position_embeddings

Tuple containing the cosine and sine positional embeddings of shape (batch_size, seq_len, head_dim), with head_dim being the embedding dimension of each attention head.

TYPE: `Tuple[mindspore.Tensor, mindspore.Tensor]`, *optional* DEFAULT: None

kwargs

Arbitrary kwargs to be ignored, used for FSDP and other methods that injects code into the model

TYPE: `dict`, *optional* DEFAULT: {}

Source code in mindnlp\transformers\models\llama\modeling_llama.py
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
def forward(
    self,
    hidden_states: mindspore.Tensor,
    attention_mask: Optional[mindspore.Tensor] = None,
    position_ids: Optional[mindspore.Tensor] = None,
    past_key_value: Optional[Cache] = None,
    output_attentions: Optional[bool] = False,
    use_cache: Optional[bool] = False,
    cache_position: Optional[mindspore.Tensor] = None,
    position_embeddings: Optional[Tuple[mindspore.Tensor, mindspore.Tensor]] = None,  # will become mandatory in v4.45
    **kwargs,
) -> Tuple[mindspore.Tensor, Optional[Tuple[mindspore.Tensor, mindspore.Tensor]]]:
    """
    Args:
        hidden_states (`mindspore.Tensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
        attention_mask (`mindspore.Tensor`, *optional*):
            attention mask of size `(batch_size, sequence_length)` if flash attention is used or `(batch_size, 1,
            query_sequence_length, key_sequence_length)` if default attention is used.
        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under
            returned tensors for more detail.
        use_cache (`bool`, *optional*):
            If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
            (see `past_key_values`).
        past_key_value (`Tuple(mindspore.Tensor)`, *optional*): cached past key and value projection states
        cache_position (`mindspore.Tensor` of shape `(sequence_length)`, *optional*):
            Indices depicting the position of the input sequence tokens in the sequence
        position_embeddings (`Tuple[mindspore.Tensor, mindspore.Tensor]`, *optional*):
            Tuple containing the cosine and sine positional embeddings of shape `(batch_size, seq_len, head_dim)`,
            with `head_dim` being the embedding dimension of each attention head.
        kwargs (`dict`, *optional*):
            Arbitrary kwargs to be ignored, used for FSDP and other methods that injects code
            into the model
    """
    residual = hidden_states

    hidden_states = self.input_layernorm(hidden_states)

    # Self Attention
    hidden_states, self_attn_weights, present_key_value = self.self_attn(
        hidden_states=hidden_states,
        attention_mask=attention_mask,
        position_ids=position_ids,
        past_key_value=past_key_value,
        output_attentions=output_attentions,
        use_cache=use_cache,
        cache_position=cache_position,
        position_embeddings=position_embeddings,
        **kwargs,
    )
    hidden_states = residual + hidden_states

    # Fully Connected
    residual = hidden_states
    hidden_states = self.post_attention_layernorm(hidden_states)
    hidden_states = self.mlp(hidden_states)
    hidden_states = residual + hidden_states

    outputs = (hidden_states,)

    if output_attentions:
        outputs += (self_attn_weights,)

    if use_cache:
        outputs += (present_key_value,)

    return outputs

mindnlp.transformers.models.llama.modeling_llama.LlamaDynamicNTKScalingRotaryEmbedding

Bases: LlamaRotaryEmbedding

LlamaRotaryEmbedding extended with Dynamic NTK scaling. Credits to the Reddit users /u/bloc97 and /u/emozilla

Source code in mindnlp\transformers\models\llama\modeling_llama.py
232
233
234
235
236
237
238
239
240
241
242
class LlamaDynamicNTKScalingRotaryEmbedding(LlamaRotaryEmbedding):
    """LlamaRotaryEmbedding extended with Dynamic NTK scaling. Credits to the Reddit users /u/bloc97 and /u/emozilla"""

    def __init__(self, *args, **kwargs):
        logger.warning_once(
            "`LlamaDynamicNTKScalingRotaryEmbedding` is deprecated an will be removed in v4.45. Please use "
            "`LlamaRotaryEmbedding`, which now also does dynamic ntk scaling (simply pass the model config to "
            "__init__)."
        )
        kwargs["rope_type"] = "dynamic"
        super().__init__(*args, **kwargs)

mindnlp.transformers.models.llama.modeling_llama.LlamaForCausalLM

Bases: LlamaPreTrainedModel

Source code in mindnlp\transformers\models\llama\modeling_llama.py
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
class LlamaForCausalLM(LlamaPreTrainedModel):
    _tied_weights_keys = ["lm_head.weight"]

    def __init__(self, config):
        super().__init__(config)
        self.model = LlamaModel(config)
        self.vocab_size = config.vocab_size
        self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)

        # Initialize weights and apply final processing
        self.post_init()

    def get_input_embeddings(self):
        return self.model.embed_tokens

    def set_input_embeddings(self, value):
        self.model.embed_tokens = value

    def get_output_embeddings(self):
        return self.lm_head

    def set_output_embeddings(self, new_embeddings):
        self.lm_head = new_embeddings

    def set_decoder(self, decoder):
        self.model = decoder

    def get_decoder(self):
        return self.model

    def forward(
        self,
        input_ids: mindspore.Tensor = None,
        attention_mask: Optional[mindspore.Tensor] = None,
        position_ids: Optional[mindspore.Tensor] = None,
        past_key_values: Optional[Union[Cache, List[mindspore.Tensor]]] = None,
        inputs_embeds: Optional[mindspore.Tensor] = None,
        labels: Optional[mindspore.Tensor] = None,
        use_cache: Optional[bool] = None,
        output_attentions: Optional[bool] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
        cache_position: Optional[mindspore.Tensor] = None,
        num_logits_to_keep: int = 0,
    ) -> Union[Tuple, CausalLMOutputWithPast]:
        r"""
        Args:
            labels (`mindspore.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
                Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
                config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
                (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.

        Returns:

        Example:

        ```python
        >>> from transformers import AutoTokenizer, LlamaForCausalLM

        >>> model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf")
        >>> tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")

        >>> prompt = "Hey, are you conscious? Can you talk to me?"
        >>> inputs = tokenizer(prompt, return_tensors="ms")

        >>> # Generate
        >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
        >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
        "Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you."
        ```"""
        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
        output_hidden_states = (
            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
        )
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
        outputs = self.model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            position_ids=position_ids,
            past_key_values=past_key_values,
            inputs_embeds=inputs_embeds,
            use_cache=use_cache,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
            cache_position=cache_position,
        )

        hidden_states = outputs[0]
        if self.config.pretraining_tp > 1:
            lm_head_slices = self.lm_head.weight.split(self.vocab_size // self.config.pretraining_tp, dim=0)
            logits = [F.linear(hidden_states, lm_head_slices[i]) for i in range(self.config.pretraining_tp)]
            logits = ops.cat(logits, dim=-1)
        else:
            logits = self.lm_head(hidden_states[:, -num_logits_to_keep:, :]).float()
        logits = logits.float()

        loss = None
        if labels is not None:
            # Shift so that tokens < n predict n
            shift_logits = logits[..., :-1, :]
            shift_labels = labels[..., 1:]
            # Flatten the tokens
            loss_fct = CrossEntropyLoss()
            shift_logits = shift_logits.view(-1, self.config.vocab_size)
            shift_labels = shift_labels.view(-1)
            # Enable model parallelism
            loss = loss_fct(shift_logits, shift_labels)

        if not return_dict:
            output = (logits,) + outputs[1:]
            return (loss,) + output if loss is not None else output

        return CausalLMOutputWithPast(
            loss=loss,
            logits=logits,
            past_key_values=outputs.past_key_values,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )

    def prepare_inputs_for_generation(
        self,
        input_ids,
        past_key_values=None,
        attention_mask=None,
        inputs_embeds=None,
        cache_position=None,
        position_ids=None,
        use_cache=True,
        num_logits_to_keep=None,
        **kwargs,
    ):
        # If we have cache: let's slice `input_ids` through `cache_position`, to keep only the unprocessed tokens
        # Exception 1: when passing input_embeds, input_ids may be missing entries
        # Exception 2: some generation methods do special slicing of input_ids, so we don't need to do it here
        if past_key_values is not None:
            if inputs_embeds is not None:  # Exception 1
                if 0 not in input_ids.shape:
                    # input_ids = input_ids[:, -cache_position.shape[0] :]
                    input_ids = ops.narrow(input_ids, 1, input_ids.shape[1] - cache_position.shape[0], cache_position.shape[0])
            elif input_ids.shape[1] != cache_position.shape[0]:  # Default case (the "else", a no op, is Exception 2)
                # input_ids = input_ids[:, cache_position]
                input_ids = ops.index_select(input_ids, -1, cache_position)

        if attention_mask is not None and position_ids is None:
            # create position_ids on the fly for batch generation
            position_ids = ops.cumsum(attention_mask.int(), -1) - 1
            position_ids = ops.masked_fill(position_ids, attention_mask == 0, 1)
            if past_key_values:
                # position_ids = position_ids[:, -input_ids.shape[1] :]
                position_ids = ops.narrow(position_ids, 1, position_ids.shape[1] - input_ids.shape[1], input_ids.shape[1])
        # if `inputs_embeds` are passed, we only want to use them in the 1st generation step
        if inputs_embeds is not None and cache_position[0] == 0:
            model_inputs = {"inputs_embeds": inputs_embeds}
        else:
            model_inputs = {"input_ids": input_ids}

        if isinstance(past_key_values, StaticCache) and attention_mask.ndim == 2:
            if inputs_embeds is not None:
                batch_size, sequence_length = inputs_embeds.shape
            else:
                batch_size, sequence_length = input_ids.shape

            dtype = self.lm_head.weight.dtype
            min_dtype = float(ops.finfo(dtype).min)

            attention_mask = _prepare_4d_causal_attention_mask_with_cache_position(
                attention_mask,
                sequence_length=sequence_length,
                target_length=past_key_values.get_max_length(),
                dtype=dtype,
                min_dtype=min_dtype,
                cache_position=cache_position,
                batch_size=batch_size,
            )

        if num_logits_to_keep is not None:
            model_inputs["num_logits_to_keep"] = num_logits_to_keep

        model_inputs.update(
            {
                "position_ids": position_ids,
                "cache_position": cache_position,
                "past_key_values": past_key_values,
                "use_cache": use_cache,
                "attention_mask": attention_mask,
            }
        )
        return model_inputs

mindnlp.transformers.models.llama.modeling_llama.LlamaForCausalLM.forward(input_ids=None, attention_mask=None, position_ids=None, past_key_values=None, inputs_embeds=None, labels=None, use_cache=None, output_attentions=None, output_hidden_states=None, return_dict=None, cache_position=None, num_logits_to_keep=0)

PARAMETER DESCRIPTION
labels

Labels for computing the masked language modeling loss. Indices should either be in [0, ..., config.vocab_size] or -100 (see input_ids docstring). Tokens with indices set to -100 are ignored (masked), the loss is only computed for the tokens with labels in [0, ..., config.vocab_size].

TYPE: `mindspore.Tensor` of shape `(batch_size, sequence_length)`, *optional* DEFAULT: None

Example:

>>> from transformers import AutoTokenizer, LlamaForCausalLM

>>> model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf")
>>> tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")

>>> prompt = "Hey, are you conscious? Can you talk to me?"
>>> inputs = tokenizer(prompt, return_tensors="ms")

>>> # Generate
>>> generate_ids = model.generate(inputs.input_ids, max_length=30)
>>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
"Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you."
Source code in mindnlp\transformers\models\llama\modeling_llama.py
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
def forward(
    self,
    input_ids: mindspore.Tensor = None,
    attention_mask: Optional[mindspore.Tensor] = None,
    position_ids: Optional[mindspore.Tensor] = None,
    past_key_values: Optional[Union[Cache, List[mindspore.Tensor]]] = None,
    inputs_embeds: Optional[mindspore.Tensor] = None,
    labels: Optional[mindspore.Tensor] = None,
    use_cache: Optional[bool] = None,
    output_attentions: Optional[bool] = None,
    output_hidden_states: Optional[bool] = None,
    return_dict: Optional[bool] = None,
    cache_position: Optional[mindspore.Tensor] = None,
    num_logits_to_keep: int = 0,
) -> Union[Tuple, CausalLMOutputWithPast]:
    r"""
    Args:
        labels (`mindspore.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
            config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
            (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.

    Returns:

    Example:

    ```python
    >>> from transformers import AutoTokenizer, LlamaForCausalLM

    >>> model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf")
    >>> tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")

    >>> prompt = "Hey, are you conscious? Can you talk to me?"
    >>> inputs = tokenizer(prompt, return_tensors="ms")

    >>> # Generate
    >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
    >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
    "Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you."
    ```"""
    output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
    output_hidden_states = (
        output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
    )
    return_dict = return_dict if return_dict is not None else self.config.use_return_dict

    # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
    outputs = self.model(
        input_ids=input_ids,
        attention_mask=attention_mask,
        position_ids=position_ids,
        past_key_values=past_key_values,
        inputs_embeds=inputs_embeds,
        use_cache=use_cache,
        output_attentions=output_attentions,
        output_hidden_states=output_hidden_states,
        return_dict=return_dict,
        cache_position=cache_position,
    )

    hidden_states = outputs[0]
    if self.config.pretraining_tp > 1:
        lm_head_slices = self.lm_head.weight.split(self.vocab_size // self.config.pretraining_tp, dim=0)
        logits = [F.linear(hidden_states, lm_head_slices[i]) for i in range(self.config.pretraining_tp)]
        logits = ops.cat(logits, dim=-1)
    else:
        logits = self.lm_head(hidden_states[:, -num_logits_to_keep:, :]).float()
    logits = logits.float()

    loss = None
    if labels is not None:
        # Shift so that tokens < n predict n
        shift_logits = logits[..., :-1, :]
        shift_labels = labels[..., 1:]
        # Flatten the tokens
        loss_fct = CrossEntropyLoss()
        shift_logits = shift_logits.view(-1, self.config.vocab_size)
        shift_labels = shift_labels.view(-1)
        # Enable model parallelism
        loss = loss_fct(shift_logits, shift_labels)

    if not return_dict:
        output = (logits,) + outputs[1:]
        return (loss,) + output if loss is not None else output

    return CausalLMOutputWithPast(
        loss=loss,
        logits=logits,
        past_key_values=outputs.past_key_values,
        hidden_states=outputs.hidden_states,
        attentions=outputs.attentions,
    )

mindnlp.transformers.models.llama.modeling_llama.LlamaForQuestionAnswering

Bases: LlamaPreTrainedModel

Source code in mindnlp\transformers\models\llama\modeling_llama.py
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
class LlamaForQuestionAnswering(LlamaPreTrainedModel):
    base_model_prefix = "transformer"

    # Copied from transformers.models.bloom.modeling_bloom.BloomForQuestionAnswering.__init__ with Bloom->Llama
    def __init__(self, config):
        super().__init__(config)
        self.transformer = LlamaModel(config)
        self.qa_outputs = nn.Linear(config.hidden_size, 2)

        # Initialize weights and apply final processing
        self.post_init()

    def get_input_embeddings(self):
        return self.transformer.embed_tokens

    def set_input_embeddings(self, value):
        self.transformer.embed_tokens = value

    def forward(
        self,
        input_ids: Optional[mindspore.Tensor] = None,
        attention_mask: Optional[mindspore.Tensor] = None,
        position_ids: Optional[mindspore.Tensor] = None,
        past_key_values: Optional[Union[Cache, List[mindspore.Tensor]]] = None,
        inputs_embeds: Optional[mindspore.Tensor] = None,
        start_positions: Optional[mindspore.Tensor] = None,
        end_positions: Optional[mindspore.Tensor] = None,
        output_attentions: Optional[bool] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
    ) -> Union[Tuple, QuestionAnsweringModelOutput]:
        r"""
        start_positions (`mindspore.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for position (index) of the start of the labelled span for computing the token classification loss.
            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
            are not taken into account for computing the loss.
        end_positions (`mindspore.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for position (index) of the end of the labelled span for computing the token classification loss.
            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
            are not taken into account for computing the loss.
        """
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        outputs = self.transformer(
            input_ids,
            attention_mask=attention_mask,
            position_ids=position_ids,
            past_key_values=past_key_values,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )

        sequence_output = outputs[0]

        logits = self.qa_outputs(sequence_output)
        start_logits, end_logits = ops.split(logits, 1, dim=-1)
        start_logits = start_logits.squeeze(-1)
        end_logits = end_logits.squeeze(-1)

        total_loss = None
        if start_positions is not None and end_positions is not None:
            # If we are on multi-GPU, split add a dimension
            if len(start_positions.shape) > 1:
                start_positions = start_positions.squeeze(-1)
            if len(end_positions.shape) > 1:
                end_positions = end_positions.squeeze(-1)
            # sometimes the start/end positions are outside our model inputs, we ignore these terms
            ignored_index = start_logits.shape[1]
            start_positions = start_positions.clamp(0, ignored_index)
            end_positions = end_positions.clamp(0, ignored_index)

            loss_fct = CrossEntropyLoss(ignore_index=ignored_index)
            start_loss = loss_fct(start_logits, start_positions)
            end_loss = loss_fct(end_logits, end_positions)
            total_loss = (start_loss + end_loss) / 2

        if not return_dict:
            output = (start_logits, end_logits) + outputs[2:]
            return ((total_loss,) + output) if total_loss is not None else output

        return QuestionAnsweringModelOutput(
            loss=total_loss,
            start_logits=start_logits,
            end_logits=end_logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )

mindnlp.transformers.models.llama.modeling_llama.LlamaForQuestionAnswering.forward(input_ids=None, attention_mask=None, position_ids=None, past_key_values=None, inputs_embeds=None, start_positions=None, end_positions=None, output_attentions=None, output_hidden_states=None, return_dict=None)

start_positions (mindspore.Tensor of shape (batch_size,), optional): Labels for position (index) of the start of the labelled span for computing the token classification loss. Positions are clamped to the length of the sequence (sequence_length). Position outside of the sequence are not taken into account for computing the loss. end_positions (mindspore.Tensor of shape (batch_size,), optional): Labels for position (index) of the end of the labelled span for computing the token classification loss. Positions are clamped to the length of the sequence (sequence_length). Position outside of the sequence are not taken into account for computing the loss.

Source code in mindnlp\transformers\models\llama\modeling_llama.py
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
def forward(
    self,
    input_ids: Optional[mindspore.Tensor] = None,
    attention_mask: Optional[mindspore.Tensor] = None,
    position_ids: Optional[mindspore.Tensor] = None,
    past_key_values: Optional[Union[Cache, List[mindspore.Tensor]]] = None,
    inputs_embeds: Optional[mindspore.Tensor] = None,
    start_positions: Optional[mindspore.Tensor] = None,
    end_positions: Optional[mindspore.Tensor] = None,
    output_attentions: Optional[bool] = None,
    output_hidden_states: Optional[bool] = None,
    return_dict: Optional[bool] = None,
) -> Union[Tuple, QuestionAnsweringModelOutput]:
    r"""
    start_positions (`mindspore.Tensor` of shape `(batch_size,)`, *optional*):
        Labels for position (index) of the start of the labelled span for computing the token classification loss.
        Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
        are not taken into account for computing the loss.
    end_positions (`mindspore.Tensor` of shape `(batch_size,)`, *optional*):
        Labels for position (index) of the end of the labelled span for computing the token classification loss.
        Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
        are not taken into account for computing the loss.
    """
    return_dict = return_dict if return_dict is not None else self.config.use_return_dict

    outputs = self.transformer(
        input_ids,
        attention_mask=attention_mask,
        position_ids=position_ids,
        past_key_values=past_key_values,
        inputs_embeds=inputs_embeds,
        output_attentions=output_attentions,
        output_hidden_states=output_hidden_states,
        return_dict=return_dict,
    )

    sequence_output = outputs[0]

    logits = self.qa_outputs(sequence_output)
    start_logits, end_logits = ops.split(logits, 1, dim=-1)
    start_logits = start_logits.squeeze(-1)
    end_logits = end_logits.squeeze(-1)

    total_loss = None
    if start_positions is not None and end_positions is not None:
        # If we are on multi-GPU, split add a dimension
        if len(start_positions.shape) > 1:
            start_positions = start_positions.squeeze(-1)
        if len(end_positions.shape) > 1:
            end_positions = end_positions.squeeze(-1)
        # sometimes the start/end positions are outside our model inputs, we ignore these terms
        ignored_index = start_logits.shape[1]
        start_positions = start_positions.clamp(0, ignored_index)
        end_positions = end_positions.clamp(0, ignored_index)

        loss_fct = CrossEntropyLoss(ignore_index=ignored_index)
        start_loss = loss_fct(start_logits, start_positions)
        end_loss = loss_fct(end_logits, end_positions)
        total_loss = (start_loss + end_loss) / 2

    if not return_dict:
        output = (start_logits, end_logits) + outputs[2:]
        return ((total_loss,) + output) if total_loss is not None else output

    return QuestionAnsweringModelOutput(
        loss=total_loss,
        start_logits=start_logits,
        end_logits=end_logits,
        hidden_states=outputs.hidden_states,
        attentions=outputs.attentions,
    )

mindnlp.transformers.models.llama.modeling_llama.LlamaForSequenceClassification

Bases: LlamaPreTrainedModel

Source code in mindnlp\transformers\models\llama\modeling_llama.py
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
class LlamaForSequenceClassification(LlamaPreTrainedModel):
    def __init__(self, config):
        super().__init__(config)
        self.num_labels = config.num_labels
        self.model = LlamaModel(config)
        self.score = nn.Linear(config.hidden_size, self.num_labels, bias=False)

        # Initialize weights and apply final processing
        self.post_init()

    def get_input_embeddings(self):
        return self.model.embed_tokens

    def set_input_embeddings(self, value):
        self.model.embed_tokens = value

    def forward(
        self,
        input_ids: Optional[mindspore.Tensor] = None,
        attention_mask: Optional[mindspore.Tensor] = None,
        position_ids: Optional[mindspore.Tensor] = None,
        past_key_values: Optional[Union[Cache, List[mindspore.Tensor]]] = None,
        inputs_embeds: Optional[mindspore.Tensor] = None,
        labels: Optional[mindspore.Tensor] = None,
        use_cache: Optional[bool] = None,
        output_attentions: Optional[bool] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
    ) -> Union[Tuple, SequenceClassifierOutputWithPast]:
        r"""
        labels (`mindspore.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        """
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        transformer_outputs = self.model(
            input_ids,
            attention_mask=attention_mask,
            position_ids=position_ids,
            past_key_values=past_key_values,
            inputs_embeds=inputs_embeds,
            use_cache=use_cache,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )
        hidden_states = transformer_outputs[0]
        logits = self.score(hidden_states)

        if input_ids is not None:
            batch_size = input_ids.shape[0]
        else:
            batch_size = inputs_embeds.shape[0]

        if self.config.pad_token_id is None and batch_size != 1:
            raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
        if self.config.pad_token_id is None:
            sequence_lengths = -1
        else:
            if input_ids is not None:
                # if no pad token found, use modulo instead of reverse indexing for ONNX compatibility
                sequence_lengths = ops.eq(input_ids, self.config.pad_token_id).int().argmax(-1) - 1
                sequence_lengths = sequence_lengths % input_ids.shape[-1]
            else:
                sequence_lengths = -1

        if ON_ORANGE_PI:
            if isinstance(sequence_lengths, mindspore.Tensor):
                sequence_lengths = sequence_lengths.to(mindspore.int32)
            pooled_logits = ops.getitem(logits, (ops.arange(batch_size), sequence_lengths))
        else:
            pooled_logits = logits[ops.arange(batch_size), sequence_lengths]

        loss = None
        if labels is not None:
            if self.config.problem_type is None:
                if self.num_labels == 1:
                    self.config.problem_type = "regression"
                elif self.num_labels > 1 and labels.dtype in (mindspore.int64, mindspore.int32):
                    self.config.problem_type = "single_label_classification"
                else:
                    self.config.problem_type = "multi_label_classification"

            if self.config.problem_type == "regression":
                loss_fct = MSELoss()
                if self.num_labels == 1:
                    loss = loss_fct(pooled_logits.squeeze(), labels.squeeze())
                else:
                    loss = loss_fct(pooled_logits, labels)
            elif self.config.problem_type == "single_label_classification":
                loss_fct = CrossEntropyLoss()
                loss = loss_fct(pooled_logits.view(-1, self.num_labels), labels.view(-1))
            elif self.config.problem_type == "multi_label_classification":
                loss_fct = BCEWithLogitsLoss()
                loss = loss_fct(pooled_logits, labels)
        if not return_dict:
            output = (pooled_logits,) + transformer_outputs[1:]
            return ((loss,) + output) if loss is not None else output

        return SequenceClassifierOutputWithPast(
            loss=loss,
            logits=pooled_logits,
            past_key_values=transformer_outputs.past_key_values,
            hidden_states=transformer_outputs.hidden_states,
            attentions=transformer_outputs.attentions,
        )

mindnlp.transformers.models.llama.modeling_llama.LlamaForSequenceClassification.forward(input_ids=None, attention_mask=None, position_ids=None, past_key_values=None, inputs_embeds=None, labels=None, use_cache=None, output_attentions=None, output_hidden_states=None, return_dict=None)

labels (mindspore.Tensor of shape (batch_size,), optional): Labels for computing the sequence classification/regression loss. Indices should be in [0, ..., config.num_labels - 1]. If config.num_labels == 1 a regression loss is computed (Mean-Square loss), If config.num_labels > 1 a classification loss is computed (Cross-Entropy).

Source code in mindnlp\transformers\models\llama\modeling_llama.py
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
def forward(
    self,
    input_ids: Optional[mindspore.Tensor] = None,
    attention_mask: Optional[mindspore.Tensor] = None,
    position_ids: Optional[mindspore.Tensor] = None,
    past_key_values: Optional[Union[Cache, List[mindspore.Tensor]]] = None,
    inputs_embeds: Optional[mindspore.Tensor] = None,
    labels: Optional[mindspore.Tensor] = None,
    use_cache: Optional[bool] = None,
    output_attentions: Optional[bool] = None,
    output_hidden_states: Optional[bool] = None,
    return_dict: Optional[bool] = None,
) -> Union[Tuple, SequenceClassifierOutputWithPast]:
    r"""
    labels (`mindspore.Tensor` of shape `(batch_size,)`, *optional*):
        Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
        config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
        `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
    """
    return_dict = return_dict if return_dict is not None else self.config.use_return_dict

    transformer_outputs = self.model(
        input_ids,
        attention_mask=attention_mask,
        position_ids=position_ids,
        past_key_values=past_key_values,
        inputs_embeds=inputs_embeds,
        use_cache=use_cache,
        output_attentions=output_attentions,
        output_hidden_states=output_hidden_states,
        return_dict=return_dict,
    )
    hidden_states = transformer_outputs[0]
    logits = self.score(hidden_states)

    if input_ids is not None:
        batch_size = input_ids.shape[0]
    else:
        batch_size = inputs_embeds.shape[0]

    if self.config.pad_token_id is None and batch_size != 1:
        raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
    if self.config.pad_token_id is None:
        sequence_lengths = -1
    else:
        if input_ids is not None:
            # if no pad token found, use modulo instead of reverse indexing for ONNX compatibility
            sequence_lengths = ops.eq(input_ids, self.config.pad_token_id).int().argmax(-1) - 1
            sequence_lengths = sequence_lengths % input_ids.shape[-1]
        else:
            sequence_lengths = -1

    if ON_ORANGE_PI:
        if isinstance(sequence_lengths, mindspore.Tensor):
            sequence_lengths = sequence_lengths.to(mindspore.int32)
        pooled_logits = ops.getitem(logits, (ops.arange(batch_size), sequence_lengths))
    else:
        pooled_logits = logits[ops.arange(batch_size), sequence_lengths]

    loss = None
    if labels is not None:
        if self.config.problem_type is None:
            if self.num_labels == 1:
                self.config.problem_type = "regression"
            elif self.num_labels > 1 and labels.dtype in (mindspore.int64, mindspore.int32):
                self.config.problem_type = "single_label_classification"
            else:
                self.config.problem_type = "multi_label_classification"

        if self.config.problem_type == "regression":
            loss_fct = MSELoss()
            if self.num_labels == 1:
                loss = loss_fct(pooled_logits.squeeze(), labels.squeeze())
            else:
                loss = loss_fct(pooled_logits, labels)
        elif self.config.problem_type == "single_label_classification":
            loss_fct = CrossEntropyLoss()
            loss = loss_fct(pooled_logits.view(-1, self.num_labels), labels.view(-1))
        elif self.config.problem_type == "multi_label_classification":
            loss_fct = BCEWithLogitsLoss()
            loss = loss_fct(pooled_logits, labels)
    if not return_dict:
        output = (pooled_logits,) + transformer_outputs[1:]
        return ((loss,) + output) if loss is not None else output

    return SequenceClassifierOutputWithPast(
        loss=loss,
        logits=pooled_logits,
        past_key_values=transformer_outputs.past_key_values,
        hidden_states=transformer_outputs.hidden_states,
        attentions=transformer_outputs.attentions,
    )

mindnlp.transformers.models.llama.modeling_llama.LlamaForTokenClassification

Bases: LlamaPreTrainedModel

Source code in mindnlp\transformers\models\llama\modeling_llama.py
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
class LlamaForTokenClassification(LlamaPreTrainedModel):
    def __init__(self, config):
        super().__init__(config)
        self.num_labels = config.num_labels
        self.model = LlamaModel(config)
        if getattr(config, "classifier_dropout", None) is not None:
            classifier_dropout = config.classifier_dropout
        elif getattr(config, "hidden_dropout", None) is not None:
            classifier_dropout = config.hidden_dropout
        else:
            classifier_dropout = 0.1
        self.dropout = nn.Dropout(classifier_dropout)
        self.score = nn.Linear(config.hidden_size, config.num_labels)

        # Initialize weights and apply final processing
        self.post_init()

    def get_input_embeddings(self):
        return self.model.embed_tokens

    def set_input_embeddings(self, value):
        self.model.embed_tokens = value

    def forward(
        self,
        input_ids: Optional[mindspore.Tensor] = None,
        attention_mask: Optional[mindspore.Tensor] = None,
        position_ids: Optional[mindspore.Tensor] = None,
        past_key_values: Optional[List[mindspore.Tensor]] = None,
        inputs_embeds: Optional[mindspore.Tensor] = None,
        labels: Optional[mindspore.Tensor] = None,
        use_cache: Optional[bool] = None,
        output_attentions: Optional[bool] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
    ) -> Union[Tuple, TokenClassifierOutput]:
        r"""
        labels (`mindspore.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        """
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        outputs = self.model(
            input_ids,
            attention_mask=attention_mask,
            position_ids=position_ids,
            past_key_values=past_key_values,
            inputs_embeds=inputs_embeds,
            use_cache=use_cache,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )
        sequence_output = outputs[0]
        sequence_output = self.dropout(sequence_output)
        logits = self.score(sequence_output)

        loss = None
        if labels is not None:
            loss_fct = CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

        if not return_dict:
            output = (logits,) + outputs[2:]
            return ((loss,) + output) if loss is not None else output

        return TokenClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )

mindnlp.transformers.models.llama.modeling_llama.LlamaForTokenClassification.forward(input_ids=None, attention_mask=None, position_ids=None, past_key_values=None, inputs_embeds=None, labels=None, use_cache=None, output_attentions=None, output_hidden_states=None, return_dict=None)

labels (mindspore.Tensor of shape (batch_size,), optional): Labels for computing the sequence classification/regression loss. Indices should be in [0, ..., config.num_labels - 1]. If config.num_labels == 1 a regression loss is computed (Mean-Square loss), If config.num_labels > 1 a classification loss is computed (Cross-Entropy).

Source code in mindnlp\transformers\models\llama\modeling_llama.py
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
def forward(
    self,
    input_ids: Optional[mindspore.Tensor] = None,
    attention_mask: Optional[mindspore.Tensor] = None,
    position_ids: Optional[mindspore.Tensor] = None,
    past_key_values: Optional[List[mindspore.Tensor]] = None,
    inputs_embeds: Optional[mindspore.Tensor] = None,
    labels: Optional[mindspore.Tensor] = None,
    use_cache: Optional[bool] = None,
    output_attentions: Optional[bool] = None,
    output_hidden_states: Optional[bool] = None,
    return_dict: Optional[bool] = None,
) -> Union[Tuple, TokenClassifierOutput]:
    r"""
    labels (`mindspore.Tensor` of shape `(batch_size,)`, *optional*):
        Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
        config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
        `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
    """
    return_dict = return_dict if return_dict is not None else self.config.use_return_dict

    outputs = self.model(
        input_ids,
        attention_mask=attention_mask,
        position_ids=position_ids,
        past_key_values=past_key_values,
        inputs_embeds=inputs_embeds,
        use_cache=use_cache,
        output_attentions=output_attentions,
        output_hidden_states=output_hidden_states,
        return_dict=return_dict,
    )
    sequence_output = outputs[0]
    sequence_output = self.dropout(sequence_output)
    logits = self.score(sequence_output)

    loss = None
    if labels is not None:
        loss_fct = CrossEntropyLoss()
        loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

    if not return_dict:
        output = (logits,) + outputs[2:]
        return ((loss,) + output) if loss is not None else output

    return TokenClassifierOutput(
        loss=loss,
        logits=logits,
        hidden_states=outputs.hidden_states,
        attentions=outputs.attentions,
    )

mindnlp.transformers.models.llama.modeling_llama.LlamaLinearScalingRotaryEmbedding

Bases: LlamaRotaryEmbedding

LlamaRotaryEmbedding extended with linear scaling. Credits to the Reddit user /u/kaiokendev

Source code in mindnlp\transformers\models\llama\modeling_llama.py
220
221
222
223
224
225
226
227
228
229
class LlamaLinearScalingRotaryEmbedding(LlamaRotaryEmbedding):
    """LlamaRotaryEmbedding extended with linear scaling. Credits to the Reddit user /u/kaiokendev"""

    def __init__(self, *args, **kwargs):
        logger.warning_once(
            "`LlamaLinearScalingRotaryEmbedding` is deprecated an will be removed in v4.45. Please use "
            "`LlamaRotaryEmbedding`, which now also does linear scaling (simply pass the model config to __init__)."
        )
        kwargs["rope_type"] = "linear"
        super().__init__(*args, **kwargs)

mindnlp.transformers.models.llama.modeling_llama.LlamaModel

Bases: LlamaPreTrainedModel

Transformer decoder consisting of config.num_hidden_layers layers. Each layer is a [LlamaDecoderLayer]

PARAMETER DESCRIPTION
config

LlamaConfig

TYPE: LlamaConfig

Source code in mindnlp\transformers\models\llama\modeling_llama.py
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
class LlamaModel(LlamaPreTrainedModel):
    """
    Transformer decoder consisting of *config.num_hidden_layers* layers. Each layer is a [`LlamaDecoderLayer`]

    Args:
        config: LlamaConfig
    """

    def __init__(self, config: LlamaConfig):
        super().__init__(config)
        self.padding_idx = config.pad_token_id
        self.vocab_size = config.vocab_size

        self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx)
        self.layers = nn.ModuleList(
            [LlamaDecoderLayer(config, layer_idx) for layer_idx in range(config.num_hidden_layers)]
        )
        self.norm = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
        self.rotary_emb = LlamaRotaryEmbedding(config=config)
        self.gradient_checkpointing = False

        # Initialize weights and apply final processing
        self.post_init()

    def get_input_embeddings(self):
        return self.embed_tokens

    def set_input_embeddings(self, value):
        self.embed_tokens = value

    def forward(
        self,
        input_ids: mindspore.Tensor = None,
        attention_mask: Optional[mindspore.Tensor] = None,
        position_ids: Optional[mindspore.Tensor] = None,
        past_key_values: Optional[Union[Cache, List[mindspore.Tensor]]] = None,
        inputs_embeds: Optional[mindspore.Tensor] = None,
        use_cache: Optional[bool] = None,
        output_attentions: Optional[bool] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
        cache_position: Optional[mindspore.Tensor] = None,
    ) -> Union[Tuple, BaseModelOutputWithPast]:
        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
        output_hidden_states = (
            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
        )
        use_cache = use_cache if use_cache is not None else self.config.use_cache
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        if (input_ids is None) ^ (inputs_embeds is not None):
            raise ValueError(
                "You cannot specify both input_ids and inputs_embeds at the same time, and must specify either one"
            )

        if self.gradient_checkpointing and self.training and use_cache:
            logger.warning_once(
                "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`."
            )
            use_cache = False

        if inputs_embeds is None:
            inputs_embeds = self.embed_tokens(input_ids)

        return_legacy_cache = False
        if (
            use_cache and not isinstance(past_key_values, Cache) and not self.training
        ):  # kept for BC (non `Cache` `past_key_values` inputs)
            return_legacy_cache = True
            past_key_values = DynamicCache.from_legacy_cache(past_key_values)
            logger.warning_once(
                "We detected that you are passing `past_key_values` as a tuple and this is deprecated. "
                "Please use an appropriate `Cache` class"
            )

        if cache_position is None:
            past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
            cache_position = ops.arange(
                past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1]
            )
        if position_ids is None:
            position_ids = cache_position.unsqueeze(0)

        causal_mask = self._update_causal_mask(
            attention_mask, inputs_embeds, cache_position, past_key_values, output_attentions
        )
        hidden_states = inputs_embeds

        # create position embeddings to be shared across the decoder layers
        position_embeddings = self.rotary_emb(hidden_states, position_ids)

        # decoder layers
        all_hidden_states = () if output_hidden_states else None
        all_self_attns = () if output_attentions else None
        next_decoder_cache = None

        for decoder_layer in self.layers:
            if output_hidden_states:
                all_hidden_states += (hidden_states,)

            if self.gradient_checkpointing and self.training:
                layer_outputs = self._gradient_checkpointing_func(
                    decoder_layer.__call__,
                    hidden_states,
                    causal_mask,
                    position_ids,
                    past_key_values,
                    output_attentions,
                    use_cache,
                    cache_position,
                    position_embeddings,
                )
            else:
                layer_outputs = decoder_layer(
                    hidden_states,
                    attention_mask=causal_mask,
                    position_ids=position_ids,
                    past_key_value=past_key_values,
                    output_attentions=output_attentions,
                    use_cache=use_cache,
                    cache_position=cache_position,
                    position_embeddings=position_embeddings,
                )

            hidden_states = layer_outputs[0]

            if use_cache:
                next_decoder_cache = layer_outputs[2 if output_attentions else 1]

            if output_attentions:
                all_self_attns += (layer_outputs[1],)

        hidden_states = self.norm(hidden_states)

        # add hidden states from the last decoder layer
        if output_hidden_states:
            all_hidden_states += (hidden_states,)

        next_cache = next_decoder_cache if use_cache else None
        if return_legacy_cache:
            next_cache = next_cache.to_legacy_cache()

        if not return_dict:
            return tuple(v for v in [hidden_states, next_cache, all_hidden_states, all_self_attns] if v is not None)
        return BaseModelOutputWithPast(
            last_hidden_state=hidden_states,
            past_key_values=next_cache,
            hidden_states=all_hidden_states,
            attentions=all_self_attns,
        )

    def _update_causal_mask(
        self,
        attention_mask: mindspore.Tensor,
        input_tensor: mindspore.Tensor,
        cache_position: mindspore.Tensor,
        past_key_values: Cache,
        output_attentions: bool,
    ):
        # For SDPA, when possible, we will rely on its `is_causal` argument instead of its `attn_mask` argument, in
        # order to dispatch on Flash Attention 2. This feature is not compatible with static cache, as SDPA will fail
        # to infer the attention mask.
        past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
        using_static_cache = isinstance(past_key_values, StaticCache)


        dtype = input_tensor.dtype
        min_dtype = float(ops.finfo(dtype).min)
        sequence_length = input_tensor.shape[1]
        if using_static_cache:
            target_length = past_key_values.get_max_length()
        else:
            target_length = (
                attention_mask.shape[-1]
                if isinstance(attention_mask, mindspore.Tensor)
                else past_seen_tokens + sequence_length + 1
            )

        # In case the provided `attention` mask is 2D, we generate a causal mask here (4D).
        causal_mask = _prepare_4d_causal_attention_mask_with_cache_position(
            attention_mask,
            sequence_length=sequence_length,
            target_length=target_length,
            dtype=dtype,
            min_dtype=min_dtype,
            cache_position=cache_position,
            batch_size=input_tensor.shape[0],
        )

        return causal_mask

mindnlp.transformers.models.llama.modeling_llama.LlamaRMSNorm

Bases: Module

Source code in mindnlp\transformers\models\llama\modeling_llama.py
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
class LlamaRMSNorm(nn.Module):
    def __init__(self, hidden_size, eps=1e-6):
        """
        LlamaRMSNorm is equivalent to T5LayerNorm
        """
        super().__init__()
        self.weight = nn.Parameter(ops.ones(hidden_size))
        self.variance_epsilon = eps

    def forward(self, hidden_states):
        if not self.training and use_pyboost() and not ON_ORANGE_PI:
            return F.rms_norm(hidden_states, self.weight, self.variance_epsilon)
        input_dtype = hidden_states.dtype
        hidden_states = hidden_states.to(mindspore.float32)
        variance = ops.mean(hidden_states.pow(2), -1, keepdim=True)
        hidden_states = hidden_states * ops.rsqrt(variance + self.variance_epsilon)
        return self.weight * hidden_states.to(input_dtype)

    def extra_repr(self):
        return f"{tuple(self.weight.shape)}, eps={self.variance_epsilon}"

mindnlp.transformers.models.llama.modeling_llama.LlamaRMSNorm.__init__(hidden_size, eps=1e-06)

LlamaRMSNorm is equivalent to T5LayerNorm

Source code in mindnlp\transformers\models\llama\modeling_llama.py
113
114
115
116
117
118
119
def __init__(self, hidden_size, eps=1e-6):
    """
    LlamaRMSNorm is equivalent to T5LayerNorm
    """
    super().__init__()
    self.weight = nn.Parameter(ops.ones(hidden_size))
    self.variance_epsilon = eps

mindnlp.transformers.models.llama.modeling_llama.LlamaRotaryEmbedding

Bases: Module

Source code in mindnlp\transformers\models\llama\modeling_llama.py
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
class LlamaRotaryEmbedding(nn.Module):
    def __init__(
        self,
        dim=None,
        max_position_embeddings=2048,
        base=10000,
        scaling_factor=1.0,
        rope_type="default",
        config: Optional[LlamaConfig] = None,
    ):
        super().__init__()
        # TODO (joao): remove the `if` below, only used for BC
        self.rope_kwargs = {}
        if config is None:
            logger.warning_once(
                "`LlamaRotaryEmbedding` can now be fully parameterized by passing the model config through the "
                "`config` argument. All other arguments will be removed in v4.45"
            )
            self.rope_kwargs = {
                "rope_type": rope_type,
                "factor": scaling_factor,
                "dim": dim,
                "base": base,
                "max_position_embeddings": max_position_embeddings,
            }
            self.rope_type = rope_type
            self.max_seq_len_cached = max_position_embeddings
            self.original_max_seq_len = max_position_embeddings
        else:
            # BC: "rope_type" was originally "type"
            if config.rope_scaling is not None:
                self.rope_type = config.rope_scaling.get("rope_type", config.rope_scaling.get("type"))
            else:
                self.rope_type = "default"
            self.max_seq_len_cached = config.max_position_embeddings
            self.original_max_seq_len = config.max_position_embeddings

        self.config = config
        self.rope_init_fn = ROPE_INIT_FUNCTIONS[self.rope_type]

        inv_freq, self.attention_scaling = self.rope_init_fn(self.config, **self.rope_kwargs)
        self.register_buffer("inv_freq", inv_freq, persistent=False)
        self.original_inv_freq = self.inv_freq

    def _dynamic_frequency_update(self, position_ids):
        """
        dynamic RoPE layers should recompute `inv_freq` in the following situations:
        1 - growing beyond the cached sequence length (allow scaling)
        2 - the current sequence length is in the original scale (avoid losing precision with small sequences)
        """
        seq_len = ops.max(position_ids) + 1
        if seq_len > self.max_seq_len_cached:  # growth
            inv_freq, self.attention_scaling = self.rope_init_fn(
                self.config, seq_len=seq_len, **self.rope_kwargs
            )
            self.register_buffer("inv_freq", inv_freq, persistent=False)  # TODO joao: may break with compilation
            self.max_seq_len_cached = seq_len

        if seq_len < self.original_max_seq_len and self.max_seq_len_cached > self.original_max_seq_len:  # reset
            self.register_buffer("inv_freq", self.original_inv_freq, persistent=False)
            self.max_seq_len_cached = self.original_max_seq_len

    @no_grad()
    def forward(self, x, position_ids):
        if "dynamic" in self.rope_type:
            self._dynamic_frequency_update(position_ids)

        # Core RoPE block
        inv_freq_expanded = self.inv_freq.view(1, -1, 1).float().broadcast_to((position_ids.shape[0], -1, 1))
        position_ids_expanded = ops.unsqueeze(position_ids, 1).float()
        # Force float32 (see https://github.com/huggingface/transformers/pull/29285)
        freqs = ops.transpose(ops.matmul(inv_freq_expanded.float(), position_ids_expanded.float()), 1, 2)
        emb = ops.cat((freqs, freqs), dim=-1)
        cos = emb.cos()
        sin = emb.sin()

        # Advanced RoPE types (e.g. yarn) apply a post-processing scaling factor, equivalent to scaling attention
        cos = cos * self.attention_scaling
        sin = sin * self.attention_scaling

        return cos.to(dtype=x.dtype), sin.to(dtype=x.dtype)

mindnlp.transformers.models.llama.modeling_llama.apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1)

Applies Rotary Position Embedding to the query and key tensors.

PARAMETER DESCRIPTION
q

The query tensor.

TYPE: `mindspore.Tensor`

k

The key tensor.

TYPE: `mindspore.Tensor`

cos

The cosine part of the rotary embedding.

TYPE: `mindspore.Tensor`

sin

The sine part of the rotary embedding.

TYPE: `mindspore.Tensor`

position_ids

Deprecated and unused.

TYPE: `mindspore.Tensor`, *optional* DEFAULT: None

unsqueeze_dim

The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.

TYPE: `int`, *optional*, defaults to 1 DEFAULT: 1

Source code in mindnlp\transformers\models\llama\modeling_llama.py
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
    """Applies Rotary Position Embedding to the query and key tensors.

    Args:
        q (`mindspore.Tensor`): The query tensor.
        k (`mindspore.Tensor`): The key tensor.
        cos (`mindspore.Tensor`): The cosine part of the rotary embedding.
        sin (`mindspore.Tensor`): The sine part of the rotary embedding.
        position_ids (`mindspore.Tensor`, *optional*):
            Deprecated and unused.
        unsqueeze_dim (`int`, *optional*, defaults to 1):
            The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
            sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
            that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
            k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
            cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
            the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
    Returns:
        `tuple(mindspore.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
    """
    cos = cos.unsqueeze(unsqueeze_dim)
    sin = sin.unsqueeze(unsqueeze_dim)
    q_embed = (q * cos) + (rotate_half(q) * sin)
    k_embed = (k * cos) + (rotate_half(k) * sin)
    return q_embed, k_embed

mindnlp.transformers.models.llama.modeling_llama.repeat_kv(hidden_states, n_rep)

This is the equivalent of ops.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch, num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)

Source code in mindnlp\transformers\models\llama\modeling_llama.py
314
315
316
317
318
319
320
321
322
323
324
def repeat_kv(hidden_states: mindspore.Tensor, n_rep: int) -> mindspore.Tensor:
    """
    This is the equivalent of ops.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
    """
    batch, num_key_value_heads, slen, head_dim = hidden_states.shape
    if n_rep == 1:
        return hidden_states
    # hidden_states = hidden_states[:, :, None, :, :].broadcast_to((batch, num_key_value_heads, n_rep, slen, head_dim))
    hidden_states = ops.unsqueeze(hidden_states, 2).broadcast_to((batch, num_key_value_heads, n_rep, slen, head_dim))
    return hidden_states.reshape(batch, num_key_value_heads * n_rep, slen, head_dim)

mindnlp.transformers.models.llama.modeling_llama.rotate_half(x)

Rotates half the hidden dims of the input.

Source code in mindnlp\transformers\models\llama\modeling_llama.py
245
246
247
248
249
250
def rotate_half(x):
    """Rotates half the hidden dims of the input."""
    # x1 = x[..., : x.shape[-1] // 2]
    # x2 = x[..., x.shape[-1] // 2 :]
    x1, x2 = ops.split(x, x.shape[-1] // 2, dim=-1)
    return ops.cat((-x2, x1), dim=-1)

mindnlp.transformers.models.llama.configuration_llama

LLaMA model configuration

mindnlp.transformers.models.llama.configuration_llama.LlamaConfig

Bases: PretrainedConfig

This is the configuration class to store the configuration of a [LlamaModel]. It is used to instantiate an LLaMA model according to the specified arguments, defining the model architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of the LLaMA-7B.

Configuration objects inherit from [PretrainedConfig] and can be used to control the model outputs. Read the documentation from [PretrainedConfig] for more information.

PARAMETER DESCRIPTION
vocab_size

Vocabulary size of the LLaMA model. Defines the number of different tokens that can be represented by the inputs_ids passed when calling [LlamaModel]

TYPE: `int`, *optional*, defaults to 32000 DEFAULT: 32000

hidden_size

Dimension of the hidden representations.

TYPE: `int`, *optional*, defaults to 4096 DEFAULT: 4096

intermediate_size

Dimension of the MLP representations.

TYPE: `int`, *optional*, defaults to 11008 DEFAULT: 11008

num_hidden_layers

Number of hidden layers in the Transformer decoder.

TYPE: `int`, *optional*, defaults to 32 DEFAULT: 32

num_attention_heads

Number of attention heads for each attention layer in the Transformer decoder.

TYPE: `int`, *optional*, defaults to 32 DEFAULT: 32

num_key_value_heads

This is the number of key_value heads that should be used to implement Grouped Query Attention. If num_key_value_heads=num_attention_heads, the model will use Multi Head Attention (MHA), if num_key_value_heads=1 the model will use Multi Query Attention (MQA) otherwise GQA is used. When converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed by meanpooling all the original heads within that group. For more details checkout this paper. If it is not specified, will default to num_attention_heads.

TYPE: `int`, *optional* DEFAULT: None

hidden_act

The non-linear activation function (function or string) in the decoder.

TYPE: `str` or `function`, *optional*, defaults to `"silu"` DEFAULT: 'silu'

max_position_embeddings

The maximum sequence length that this model might ever be used with. Llama 1 supports up to 2048 tokens, Llama 2 up to 4096, CodeLlama up to 16384.

TYPE: `int`, *optional*, defaults to 2048 DEFAULT: 2048

initializer_range

The standard deviation of the truncated_normal_initializer for initializing all weight matrices.

TYPE: `float`, *optional*, defaults to 0.02 DEFAULT: 0.02

rms_norm_eps

The epsilon used by the rms normalization layers.

TYPE: `float`, *optional*, defaults to 1e-06 DEFAULT: 1e-06

use_cache

Whether or not the model should return the last key/values attentions (not used by all models). Only relevant if config.is_decoder=True.

TYPE: `bool`, *optional*, defaults to `True` DEFAULT: True

pad_token_id

Padding token id.

TYPE: `int`, *optional* DEFAULT: None

bos_token_id

Beginning of stream token id.

TYPE: `int`, *optional*, defaults to 1 DEFAULT: 1

eos_token_id

End of stream token id.

TYPE: `int`, *optional*, defaults to 2 DEFAULT: 2

pretraining_tp

Experimental feature. Tensor parallelism rank used during pretraining. Please refer to this document to understand more about it. This value is necessary to ensure exact reproducibility of the pretraining results. Please refer to this issue.

TYPE: `int`, *optional*, defaults to 1 DEFAULT: 1

tie_word_embeddings

Whether to tie weight embeddings

TYPE: `bool`, *optional*, defaults to `False` DEFAULT: False

rope_theta

The base period of the RoPE embeddings.

TYPE: `float`, *optional*, defaults to 10000.0 DEFAULT: 10000.0

rope_scaling

Dictionary containing the scaling configuration for the RoPE embeddings. NOTE: if you apply new rope type and you expect the model to work on longer max_position_embeddings, we recommend you to update this value accordingly. Expected contents: rope_type (str): The sub-variant of RoPE to use. Can be one of ['default', 'linear', 'dynamic', 'yarn', 'longrope', 'llama3'], with 'default' being the original RoPE implementation. factor (float, optional): Used with all rope types except 'default'. The scaling factor to apply to the RoPE embeddings. In most scaling types, a factor of x will enable the model to handle sequences of length x * original maximum pre-trained length. original_max_position_embeddings (int, optional): Used with 'dynamic', 'longrope' and 'llama3'. The original max position embeddings used during pretraining. attention_factor (float, optional): Used with 'yarn' and 'longrope'. The scaling factor to be applied on the attention computation. If unspecified, it defaults to value recommended by the implementation, using the factor field to infer the suggested value. beta_fast (float, optional): Only used with 'yarn'. Parameter to set the boundary for extrapolation (only) in the linear ramp function. If unspecified, it defaults to 32. beta_slow (float, optional): Only used with 'yarn'. Parameter to set the boundary for interpolation (only) in the linear ramp function. If unspecified, it defaults to 1. short_factor (List[float], optional): Only used with 'longrope'. The scaling factor to be applied to short contexts (< original_max_position_embeddings). Must be a list of numbers with the same length as the hidden size divided by the number of attention heads divided by 2 long_factor (List[float], optional): Only used with 'longrope'. The scaling factor to be applied to long contexts (< original_max_position_embeddings). Must be a list of numbers with the same length as the hidden size divided by the number of attention heads divided by 2 low_freq_factor (float, optional): Only used with 'llama3'. Scaling factor applied to low frequency components of the RoPE high_freq_factor (float, optional): Only used with 'llama3'. Scaling factor applied to high frequency components of the RoPE

TYPE: `Dict`, *optional* DEFAULT: None

attention_bias

Whether to use a bias in the query, key, value and output projection layers during self-attention.

TYPE: `bool`, *optional*, defaults to `False` DEFAULT: False

attention_dropout

The dropout ratio for the attention probabilities.

TYPE: `float`, *optional*, defaults to 0.0 DEFAULT: 0.0

mlp_bias

Whether to use a bias in up_proj, down_proj and gate_proj layers in the MLP layers.

TYPE: `bool`, *optional*, defaults to `False` DEFAULT: False

>>> from transformers import LlamaModel, LlamaConfig

>>> # Initializing a LLaMA llama-7b style configuration
>>> configuration = LlamaConfig()

>>> # Initializing a model from the llama-7b style configuration
>>> model = LlamaModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
Source code in mindnlp\transformers\models\llama\configuration_llama.py
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
class LlamaConfig(PretrainedConfig):
    r"""
    This is the configuration class to store the configuration of a [`LlamaModel`]. It is used to instantiate an LLaMA
    model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
    defaults will yield a similar configuration to that of the LLaMA-7B.

    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.


    Args:
        vocab_size (`int`, *optional*, defaults to 32000):
            Vocabulary size of the LLaMA model. Defines the number of different tokens that can be represented by the
            `inputs_ids` passed when calling [`LlamaModel`]
        hidden_size (`int`, *optional*, defaults to 4096):
            Dimension of the hidden representations.
        intermediate_size (`int`, *optional*, defaults to 11008):
            Dimension of the MLP representations.
        num_hidden_layers (`int`, *optional*, defaults to 32):
            Number of hidden layers in the Transformer decoder.
        num_attention_heads (`int`, *optional*, defaults to 32):
            Number of attention heads for each attention layer in the Transformer decoder.
        num_key_value_heads (`int`, *optional*):
            This is the number of key_value heads that should be used to implement Grouped Query Attention. If
            `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
            `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
            converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
            by meanpooling all the original heads within that group. For more details checkout [this
            paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
            `num_attention_heads`.
        hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
            The non-linear activation function (function or string) in the decoder.
        max_position_embeddings (`int`, *optional*, defaults to 2048):
            The maximum sequence length that this model might ever be used with. Llama 1 supports up to 2048 tokens,
            Llama 2 up to 4096, CodeLlama up to 16384.
        initializer_range (`float`, *optional*, defaults to 0.02):
            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
        rms_norm_eps (`float`, *optional*, defaults to 1e-06):
            The epsilon used by the rms normalization layers.
        use_cache (`bool`, *optional*, defaults to `True`):
            Whether or not the model should return the last key/values attentions (not used by all models). Only
            relevant if `config.is_decoder=True`.
        pad_token_id (`int`, *optional*):
            Padding token id.
        bos_token_id (`int`, *optional*, defaults to 1):
            Beginning of stream token id.
        eos_token_id (`int`, *optional*, defaults to 2):
            End of stream token id.
        pretraining_tp (`int`, *optional*, defaults to 1):
            Experimental feature. Tensor parallelism rank used during pretraining. Please refer to [this
            document](https://huggingface.co/docs/transformers/main/perf_train_gpu_many#tensor-parallelism) to
            understand more about it. This value is necessary to ensure exact reproducibility of the pretraining
            results. Please refer to [this issue](https://github.com/pytorch/pytorch/issues/76232).
        tie_word_embeddings (`bool`, *optional*, defaults to `False`):
            Whether to tie weight embeddings
        rope_theta (`float`, *optional*, defaults to 10000.0):
            The base period of the RoPE embeddings.
        rope_scaling (`Dict`, *optional*):
            Dictionary containing the scaling configuration for the RoPE embeddings. NOTE: if you apply new rope type
            and you expect the model to work on longer `max_position_embeddings`, we recommend you to update this value
            accordingly.
            Expected contents:
                `rope_type` (`str`):
                    The sub-variant of RoPE to use. Can be one of ['default', 'linear', 'dynamic', 'yarn', 'longrope',
                    'llama3'], with 'default' being the original RoPE implementation.
                `factor` (`float`, *optional*):
                    Used with all rope types except 'default'. The scaling factor to apply to the RoPE embeddings. In
                    most scaling types, a `factor` of x will enable the model to handle sequences of length x *
                    original maximum pre-trained length.
                `original_max_position_embeddings` (`int`, *optional*):
                    Used with 'dynamic', 'longrope' and 'llama3'. The original max position embeddings used during
                    pretraining.
                `attention_factor` (`float`, *optional*):
                    Used with 'yarn' and 'longrope'. The scaling factor to be applied on the attention
                    computation. If unspecified, it defaults to value recommended by the implementation, using the
                    `factor` field to infer the suggested value.
                `beta_fast` (`float`, *optional*):
                    Only used with 'yarn'. Parameter to set the boundary for extrapolation (only) in the linear
                    ramp function. If unspecified, it defaults to 32.
                `beta_slow` (`float`, *optional*):
                    Only used with 'yarn'. Parameter to set the boundary for interpolation (only) in the linear
                    ramp function. If unspecified, it defaults to 1.
                `short_factor` (`List[float]`, *optional*):
                    Only used with 'longrope'. The scaling factor to be applied to short contexts (<
                    `original_max_position_embeddings`). Must be a list of numbers with the same length as the hidden
                    size divided by the number of attention heads divided by 2
                `long_factor` (`List[float]`, *optional*):
                    Only used with 'longrope'. The scaling factor to be applied to long contexts (<
                    `original_max_position_embeddings`). Must be a list of numbers with the same length as the hidden
                    size divided by the number of attention heads divided by 2
                `low_freq_factor` (`float`, *optional*):
                    Only used with 'llama3'. Scaling factor applied to low frequency components of the RoPE
                `high_freq_factor` (`float`, *optional*):
                    Only used with 'llama3'. Scaling factor applied to high frequency components of the RoPE
        attention_bias (`bool`, *optional*, defaults to `False`):
            Whether to use a bias in the query, key, value and output projection layers during self-attention.
        attention_dropout (`float`, *optional*, defaults to 0.0):
            The dropout ratio for the attention probabilities.
        mlp_bias (`bool`, *optional*, defaults to `False`):
            Whether to use a bias in up_proj, down_proj and gate_proj layers in the MLP layers.

    ```python
    >>> from transformers import LlamaModel, LlamaConfig

    >>> # Initializing a LLaMA llama-7b style configuration
    >>> configuration = LlamaConfig()

    >>> # Initializing a model from the llama-7b style configuration
    >>> model = LlamaModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```"""

    model_type = "llama"
    keys_to_ignore_at_inference = ["past_key_values"]

    def __init__(
        self,
        vocab_size=32000,
        hidden_size=4096,
        intermediate_size=11008,
        num_hidden_layers=32,
        num_attention_heads=32,
        num_key_value_heads=None,
        hidden_act="silu",
        max_position_embeddings=2048,
        initializer_range=0.02,
        rms_norm_eps=1e-6,
        use_cache=True,
        pad_token_id=None,
        bos_token_id=1,
        eos_token_id=2,
        pretraining_tp=1,
        tie_word_embeddings=False,
        rope_theta=10000.0,
        rope_scaling=None,
        attention_bias=False,
        attention_dropout=0.0,
        mlp_bias=False,
        **kwargs,
    ):
        self.vocab_size = vocab_size
        self.max_position_embeddings = max_position_embeddings
        self.hidden_size = hidden_size
        self.intermediate_size = intermediate_size
        self.num_hidden_layers = num_hidden_layers
        self.num_attention_heads = num_attention_heads

        # for backward compatibility
        if num_key_value_heads is None:
            num_key_value_heads = num_attention_heads

        self.num_key_value_heads = num_key_value_heads
        self.hidden_act = hidden_act
        self.initializer_range = initializer_range
        self.rms_norm_eps = rms_norm_eps
        self.pretraining_tp = pretraining_tp
        self.use_cache = use_cache
        self.rope_theta = rope_theta
        self.rope_scaling = rope_scaling
        self.attention_bias = attention_bias
        self.attention_dropout = attention_dropout
        self.mlp_bias = mlp_bias

        # Validate the correctness of rotary position embeddings parameters
        # BC: if there is a 'type' field, move it to 'rope_type'.
        if self.rope_scaling is not None and "type" in self.rope_scaling:
            self.rope_scaling["rope_type"] = self.rope_scaling["type"]
        rope_config_validation(self)

        super().__init__(
            pad_token_id=pad_token_id,
            bos_token_id=bos_token_id,
            eos_token_id=eos_token_id,
            tie_word_embeddings=tie_word_embeddings,
            **kwargs,
        )

mindnlp.transformers.models.llama.tokenization_llama

Tokenization classes for LLaMA.

mindnlp.transformers.models.llama.tokenization_llama.LlamaTokenizer

Bases: PreTrainedTokenizer

Construct a Llama tokenizer. Based on byte-level Byte-Pair-Encoding. The default padding token is unset as there is no padding token in the original model.

PARAMETER DESCRIPTION
vocab_file

Path to the vocabulary file.

TYPE: `str`

unk_token

The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this token instead.

TYPE: `str` or `tokenizers.AddedToken`, *optional*, defaults to `"<unk>"` DEFAULT: '<unk>'

bos_token

The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token.

TYPE: `str` or `tokenizers.AddedToken`, *optional*, defaults to `"<s>"` DEFAULT: '<s>'

eos_token

The end of sequence token.

TYPE: `str` or `tokenizers.AddedToken`, *optional*, defaults to `"</s>"` DEFAULT: '</s>'

pad_token

A special token used to make arrays of tokens the same size for batching purpose. Will then be ignored by attention mechanisms or loss computation.

TYPE: `str` or `tokenizers.AddedToken`, *optional* DEFAULT: None

sp_model_kwargs

Will be passed to the SentencePieceProcessor.__init__() method. The Python wrapper for SentencePiece can be used, among other things, to set:

  • enable_sampling: Enable subword regularization.
  • nbest_size: Sampling parameters for unigram. Invalid for BPE-Dropout.

  • nbest_size = {0,1}: No sampling is performed.

  • nbest_size > 1: samples from the nbest_size results.
  • nbest_size < 0: assuming that nbest_size is infinite and samples from the all hypothesis (lattice) using forward-filtering-and-backward-sampling algorithm.

  • alpha: Smoothing parameter for unigram sampling, and dropout probability of merge operations for BPE-dropout.

TYPE: `Dict[str, Any]`, `Optional`, *optional* DEFAULT: None

add_bos_token

Whether or not to add an bos_token at the start of sequences.

TYPE: `bool`, *optional*, defaults to `True` DEFAULT: True

add_eos_token

Whether or not to add an eos_token at the end of sequences.

TYPE: `bool`, *optional*, defaults to `False` DEFAULT: False

clean_up_tokenization_spaces

Whether or not to cleanup spaces after decoding, cleanup consists in removing potential artifacts like extra spaces.

TYPE: `bool`, *optional*, defaults to `False` DEFAULT: False

use_default_system_prompt

Whether or not the default system prompt for Llama should be used.

TYPE: `bool`, *optional*, defaults to `False` DEFAULT: False

spaces_between_special_tokens

Whether or not to add spaces between special tokens.

TYPE: `bool`, *optional*, defaults to `False` DEFAULT: False

legacy

Whether or not the legacy behavior of the tokenizer should be used. Legacy is before the merge of #24622 and #25224 which includes fixes to properly handle tokens that appear after special tokens. Make sure to also set from_slow to True. A simple example:

  • legacy=True:
    >>> from transformers import LlamaTokenizerFast
    
    >>> tokenizer = LlamaTokenizerFast.from_pretrained("huggyllama/llama-7b", legacy=True, from_slow=True)
    >>> tokenizer.encode("Hello <s>.") # 869 is '▁.'
    [1, 15043, 29871, 1, 869]
    
  • legacy=False:
    >>> from transformers import LlamaTokenizerFast
    
    >>> tokenizer = LlamaTokenizerFast.from_pretrained("huggyllama/llama-7b", legacy=False, from_slow=True)
    >>> tokenizer.encode("Hello <s>.")  # 29889 is '.'
    [1, 15043, 29871, 1, 29889]
    
    Checkout the pull request for more details.

TYPE: `bool`, *optional* DEFAULT: None

add_prefix_space

Whether or not to add an initial space to the input. This allows to treat the leading word just as any other word. Again, this should be set with from_slow=True to make sure it's taken into account.

TYPE: `bool`, *optional*, defaults to `True` DEFAULT: True

Source code in mindnlp\transformers\models\llama\tokenization_llama.py
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
class LlamaTokenizer(PreTrainedTokenizer):
    """
    Construct a Llama tokenizer. Based on byte-level Byte-Pair-Encoding. The default padding token is unset as there is
    no padding token in the original model.

    Args:
        vocab_file (`str`):
            Path to the vocabulary file.
        unk_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to `"<unk>"`):
            The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
            token instead.
        bos_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to `"<s>"`):
            The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token.
        eos_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to `"</s>"`):
            The end of sequence token.
        pad_token (`str` or `tokenizers.AddedToken`, *optional*):
            A special token used to make arrays of tokens the same size for batching purpose. Will then be ignored by
            attention mechanisms or loss computation.
        sp_model_kwargs (`Dict[str, Any]`, `Optional`, *optional*):
            Will be passed to the `SentencePieceProcessor.__init__()` method. The [Python wrapper for
            SentencePiece](https://github.com/google/sentencepiece/tree/master/python) can be used, among other things,
            to set:

            - `enable_sampling`: Enable subword regularization.
            - `nbest_size`: Sampling parameters for unigram. Invalid for BPE-Dropout.

              - `nbest_size = {0,1}`: No sampling is performed.
              - `nbest_size > 1`: samples from the nbest_size results.
              - `nbest_size < 0`: assuming that nbest_size is infinite and samples from the all hypothesis (lattice)
                using forward-filtering-and-backward-sampling algorithm.

            - `alpha`: Smoothing parameter for unigram sampling, and dropout probability of merge operations for
              BPE-dropout.

        add_bos_token (`bool`, *optional*, defaults to `True`):
            Whether or not to add an `bos_token` at the start of sequences.
        add_eos_token (`bool`, *optional*, defaults to `False`):
            Whether or not to add an `eos_token` at the end of sequences.
        clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
            Whether or not to cleanup spaces after decoding, cleanup consists in removing potential artifacts like
            extra spaces.
        use_default_system_prompt (`bool`, *optional*, defaults to `False`):
            Whether or not the default system prompt for Llama should be used.
        spaces_between_special_tokens (`bool`, *optional*, defaults to `False`):
            Whether or not to add spaces between special tokens.
        legacy (`bool`, *optional*):
            Whether or not the `legacy` behavior of the tokenizer should be used. Legacy is before the merge of #24622
            and #25224 which includes fixes to properly handle tokens that appear after special tokens.
            Make sure to also set `from_slow` to `True`.
            A simple example:

            - `legacy=True`:
            ```python
            >>> from transformers import LlamaTokenizerFast

            >>> tokenizer = LlamaTokenizerFast.from_pretrained("huggyllama/llama-7b", legacy=True, from_slow=True)
            >>> tokenizer.encode("Hello <s>.") # 869 is '▁.'
            [1, 15043, 29871, 1, 869]
            ```
            - `legacy=False`:
            ```python
            >>> from transformers import LlamaTokenizerFast

            >>> tokenizer = LlamaTokenizerFast.from_pretrained("huggyllama/llama-7b", legacy=False, from_slow=True)
            >>> tokenizer.encode("Hello <s>.")  # 29889 is '.'
            [1, 15043, 29871, 1, 29889]
            ```
            Checkout the [pull request](https://github.com/huggingface/transformers/pull/24565) for more details.
        add_prefix_space (`bool`, *optional*, defaults to `True`):
            Whether or not to add an initial space to the input. This allows to treat the leading word just as any
            other word. Again, this should be set with `from_slow=True` to make sure it's taken into account.
    """

    vocab_files_names = VOCAB_FILES_NAMES
    model_input_names = ["input_ids", "attention_mask"]

    def __init__(
        self,
        vocab_file,
        unk_token="<unk>",
        bos_token="<s>",
        eos_token="</s>",
        pad_token=None,
        sp_model_kwargs: Optional[Dict[str, Any]] = None,
        add_bos_token=True,
        add_eos_token=False,
        clean_up_tokenization_spaces=False,
        use_default_system_prompt=False,
        spaces_between_special_tokens=False,
        legacy=None,
        add_prefix_space=True,
        **kwargs,
    ):
        self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
        bos_token = AddedToken(bos_token, normalized=False, special=True) if isinstance(bos_token, str) else bos_token
        eos_token = AddedToken(eos_token, normalized=False, special=True) if isinstance(eos_token, str) else eos_token
        unk_token = AddedToken(unk_token, normalized=False, special=True) if isinstance(unk_token, str) else unk_token
        pad_token = AddedToken(pad_token, normalized=False, special=True) if isinstance(pad_token, str) else pad_token

        if legacy is None:
            logger.warning_once(
                f"You are using the default legacy behaviour of the {self.__class__}. This is"
                " expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you."
                " If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it"
                " means, and thoroughly read the reason why this was added as explained in"
                " https://github.com/huggingface/transformers/pull/24565 - if you loaded a llama tokenizer from a GGUF file"
                " you can ignore this message"
            )
            legacy = True

        self.legacy = legacy
        self.vocab_file = vocab_file
        self.add_bos_token = add_bos_token
        self.add_eos_token = add_eos_token
        self.use_default_system_prompt = use_default_system_prompt
        self.sp_model = self.get_spm_processor(kwargs.pop("from_slow", False))
        self.add_prefix_space = add_prefix_space

        super().__init__(
            bos_token=bos_token,
            eos_token=eos_token,
            unk_token=unk_token,
            pad_token=pad_token,
            add_bos_token=add_bos_token,
            add_eos_token=add_eos_token,
            sp_model_kwargs=self.sp_model_kwargs,
            clean_up_tokenization_spaces=clean_up_tokenization_spaces,
            use_default_system_prompt=use_default_system_prompt,
            spaces_between_special_tokens=spaces_between_special_tokens,
            legacy=legacy,
            add_prefix_space=add_prefix_space,
            **kwargs,
        )

    @property
    def unk_token_length(self):
        return len(self.sp_model.encode(str(self.unk_token)))

    # Copied from transformers.models.t5.tokenization_t5.T5Tokenizer.get_spm_processor
    def get_spm_processor(self, from_slow=False):
        tokenizer = spm.SentencePieceProcessor(**self.sp_model_kwargs)
        if self.legacy or from_slow:  # no dependency on protobuf
            tokenizer.Load(self.vocab_file)
            return tokenizer

        with open(self.vocab_file, "rb") as f:
            sp_model = f.read()
            model_pb2 = import_protobuf(f"The new behaviour of {self.__class__.__name__} (with `self.legacy = False`)")
            model = model_pb2.ModelProto.FromString(sp_model)
            normalizer_spec = model_pb2.NormalizerSpec()
            normalizer_spec.add_dummy_prefix = False
            model.normalizer_spec.MergeFrom(normalizer_spec)
            sp_model = model.SerializeToString()
            tokenizer.LoadFromSerializedProto(sp_model)
        return tokenizer

    def __getstate__(self):
        state = self.__dict__.copy()
        state["sp_model"] = None
        state["sp_model_proto"] = self.sp_model.serialized_model_proto()
        return state

    def __setstate__(self, d):
        self.__dict__ = d
        self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
        self.sp_model.LoadFromSerializedProto(self.sp_model_proto)

    @property
    def vocab_size(self):
        """Returns vocab size"""
        return self.sp_model.get_piece_size()

    def get_vocab(self):
        """Returns vocab as a dict"""
        vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)}
        vocab.update(self.added_tokens_encoder)
        return vocab

    # Copied from transformers.models.t5.tokenization_t5.T5Tokenizer.tokenize
    def tokenize(self, text: "TextInput", **kwargs) -> List[str]:
        """
        Converts a string to a list of tokens. If `self.legacy` is set to `False`, a prefix token is added unless the
        first token is special.
        """
        if self.legacy or len(text) == 0:
            return super().tokenize(text, **kwargs)

        text = text.replace(SPIECE_UNDERLINE, " ")
        if self.add_prefix_space:
            text = SPIECE_UNDERLINE + text

        tokens = super().tokenize(text, **kwargs)

        if len(tokens) > 1 and tokens[0] == SPIECE_UNDERLINE and tokens[1] in self.all_special_tokens:
            tokens = tokens[1:]
        return tokens

    # Copied from transformers.models.t5.tokenization_t5.T5Tokenizer._tokenize
    def _tokenize(self, text, **kwargs):
        """
        Returns a tokenized string.

        We de-activated the `add_dummy_prefix` option, thus the sentencepiece internals will always strip any
        SPIECE_UNDERLINE. For example: `self.sp_model.encode(f"{SPIECE_UNDERLINE}Hey", out_type = str)` will give
        `['H', 'e', 'y']` instead of `['▁He', 'y']`. Thus we always encode `f"{unk_token}text"` and strip the
        `unk_token`. Here is an example with `unk_token = "<unk>"` and `unk_token_length = 4`.
        `self.tokenizer.sp_model.encode("<unk> Hey", out_type = str)[4:]`.
        """
        if self.legacy or not text.startswith((SPIECE_UNDERLINE, " ")):
            return self.sp_model.encode(text, out_type=str)

        # 1. Encode string + prefix ex: "<unk> Hey"
        tokens = self.sp_model.encode(self.unk_token + text, out_type=str)
        # 2. Remove self.unk_token from ['<','unk','>', '▁Hey']
        return tokens[self.unk_token_length :] if len(tokens) >= self.unk_token_length else tokens

    def _convert_token_to_id(self, token):
        """Converts a token (str) in an id using the vocab."""
        return self.sp_model.piece_to_id(token)

    def _convert_id_to_token(self, index):
        """Converts an index (integer) in a token (str) using the vocab."""
        token = self.sp_model.IdToPiece(index)
        return token

    def convert_tokens_to_string(self, tokens):
        """Converts a sequence of tokens (string) in a single string."""
        # since we manually add the prefix space, we have to remove it when decoding
        if tokens[0].startswith(SPIECE_UNDERLINE) and self.add_prefix_space:
            tokens[0] = tokens[0][1:]

        current_sub_tokens = []
        out_string = ""
        prev_is_special = False
        for i, token in enumerate(tokens):
            # make sure that special tokens are not decoded using sentencepiece model
            if token in self.all_special_tokens:
                if not prev_is_special and i != 0 and self.legacy:
                    out_string += " "
                out_string += self.sp_model.decode(current_sub_tokens) + token
                prev_is_special = True
                current_sub_tokens = []
            else:
                if prev_is_special and i == 1 and self.add_prefix_space and not token.startswith(SPIECE_UNDERLINE):
                    out_string += " "
                current_sub_tokens.append(token)
                prev_is_special = False
        out_string += self.sp_model.decode(current_sub_tokens)
        return out_string

    def save_vocabulary(self, save_directory, filename_prefix: Optional[str] = None) -> Tuple[str]:
        """
        Save the vocabulary and special tokens file to a directory.

        Args:
            save_directory (`str`):
                The directory in which to save the vocabulary.

        Returns:
            `Tuple(str)`: Paths to the files saved.
        """
        if not os.path.isdir(save_directory):
            logger.error(f"Vocabulary path ({save_directory}) should be a directory")
            return
        out_vocab_file = os.path.join(
            save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
        )

        if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file) and os.path.isfile(self.vocab_file):
            copyfile(self.vocab_file, out_vocab_file)
        elif not os.path.isfile(self.vocab_file):
            with open(out_vocab_file, "wb") as fi:
                content_spiece_model = self.sp_model.serialized_model_proto()
                fi.write(content_spiece_model)

        return (out_vocab_file,)

    def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
        bos_token_id = [self.bos_token_id] if self.add_bos_token else []
        eos_token_id = [self.eos_token_id] if self.add_eos_token else []

        output = bos_token_id + token_ids_0 + eos_token_id

        if token_ids_1 is not None:
            output = output + bos_token_id + token_ids_1 + eos_token_id

        return output

    def get_special_tokens_mask(
        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
    ) -> List[int]:
        """
        Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
        special tokens using the tokenizer `prepare_for_model` method.

        Args:
            token_ids_0 (`List[int]`):
                List of IDs.
            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.
            already_has_special_tokens (`bool`, *optional*, defaults to `False`):
                Whether or not the token list is already formatted with special tokens for the model.

        Returns:
            `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
        """
        if already_has_special_tokens:
            return super().get_special_tokens_mask(
                token_ids_0=token_ids_0, token_ids_1=token_ids_1, already_has_special_tokens=True
            )

        bos_token_id = [1] if self.add_bos_token else []
        eos_token_id = [1] if self.add_eos_token else []

        if token_ids_1 is None:
            return bos_token_id + ([0] * len(token_ids_0)) + eos_token_id
        return (
            bos_token_id
            + ([0] * len(token_ids_0))
            + eos_token_id
            + bos_token_id
            + ([0] * len(token_ids_1))
            + eos_token_id
        )

    def create_token_type_ids_from_sequences(
        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
    ) -> List[int]:
        """
        Creates a mask from the two sequences passed to be used in a sequence-pair classification task. An ALBERT
        sequence pair mask has the following format:

        ```
        0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
        | first sequence    | second sequence |
        ```

        if token_ids_1 is None, only returns the first portion of the mask (0s).

        Args:
            token_ids_0 (`List[int]`):
                List of ids.
            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.

        Returns:
            `List[int]`: List of [token type IDs](../glossary#token-type-ids) according to the given sequence(s).
        """
        bos_token_id = [self.bos_token_id] if self.add_bos_token else []
        eos_token_id = [self.eos_token_id] if self.add_eos_token else []

        output = [0] * len(bos_token_id + token_ids_0 + eos_token_id)

        if token_ids_1 is not None:
            output += [1] * len(bos_token_id + token_ids_1 + eos_token_id)

        return output

mindnlp.transformers.models.llama.tokenization_llama.LlamaTokenizer.vocab_size property

Returns vocab size

mindnlp.transformers.models.llama.tokenization_llama.LlamaTokenizer.convert_tokens_to_string(tokens)

Converts a sequence of tokens (string) in a single string.

Source code in mindnlp\transformers\models\llama\tokenization_llama.py
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
def convert_tokens_to_string(self, tokens):
    """Converts a sequence of tokens (string) in a single string."""
    # since we manually add the prefix space, we have to remove it when decoding
    if tokens[0].startswith(SPIECE_UNDERLINE) and self.add_prefix_space:
        tokens[0] = tokens[0][1:]

    current_sub_tokens = []
    out_string = ""
    prev_is_special = False
    for i, token in enumerate(tokens):
        # make sure that special tokens are not decoded using sentencepiece model
        if token in self.all_special_tokens:
            if not prev_is_special and i != 0 and self.legacy:
                out_string += " "
            out_string += self.sp_model.decode(current_sub_tokens) + token
            prev_is_special = True
            current_sub_tokens = []
        else:
            if prev_is_special and i == 1 and self.add_prefix_space and not token.startswith(SPIECE_UNDERLINE):
                out_string += " "
            current_sub_tokens.append(token)
            prev_is_special = False
    out_string += self.sp_model.decode(current_sub_tokens)
    return out_string

mindnlp.transformers.models.llama.tokenization_llama.LlamaTokenizer.create_token_type_ids_from_sequences(token_ids_0, token_ids_1=None)

Creates a mask from the two sequences passed to be used in a sequence-pair classification task. An ALBERT sequence pair mask has the following format:

0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
| first sequence    | second sequence |

if token_ids_1 is None, only returns the first portion of the mask (0s).

PARAMETER DESCRIPTION
token_ids_0

List of ids.

TYPE: `List[int]`

token_ids_1

Optional second list of IDs for sequence pairs.

TYPE: `List[int]`, *optional* DEFAULT: None

RETURNS DESCRIPTION
List[int]

List[int]: List of token type IDs according to the given sequence(s).

Source code in mindnlp\transformers\models\llama\tokenization_llama.py
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
def create_token_type_ids_from_sequences(
    self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
) -> List[int]:
    """
    Creates a mask from the two sequences passed to be used in a sequence-pair classification task. An ALBERT
    sequence pair mask has the following format:

    ```
    0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
    | first sequence    | second sequence |
    ```

    if token_ids_1 is None, only returns the first portion of the mask (0s).

    Args:
        token_ids_0 (`List[int]`):
            List of ids.
        token_ids_1 (`List[int]`, *optional*):
            Optional second list of IDs for sequence pairs.

    Returns:
        `List[int]`: List of [token type IDs](../glossary#token-type-ids) according to the given sequence(s).
    """
    bos_token_id = [self.bos_token_id] if self.add_bos_token else []
    eos_token_id = [self.eos_token_id] if self.add_eos_token else []

    output = [0] * len(bos_token_id + token_ids_0 + eos_token_id)

    if token_ids_1 is not None:
        output += [1] * len(bos_token_id + token_ids_1 + eos_token_id)

    return output

mindnlp.transformers.models.llama.tokenization_llama.LlamaTokenizer.get_special_tokens_mask(token_ids_0, token_ids_1=None, already_has_special_tokens=False)

Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding special tokens using the tokenizer prepare_for_model method.

PARAMETER DESCRIPTION
token_ids_0

List of IDs.

TYPE: `List[int]`

token_ids_1

Optional second list of IDs for sequence pairs.

TYPE: `List[int]`, *optional* DEFAULT: None

already_has_special_tokens

Whether or not the token list is already formatted with special tokens for the model.

TYPE: `bool`, *optional*, defaults to `False` DEFAULT: False

RETURNS DESCRIPTION
List[int]

List[int]: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.

Source code in mindnlp\transformers\models\llama\tokenization_llama.py
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
def get_special_tokens_mask(
    self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
) -> List[int]:
    """
    Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
    special tokens using the tokenizer `prepare_for_model` method.

    Args:
        token_ids_0 (`List[int]`):
            List of IDs.
        token_ids_1 (`List[int]`, *optional*):
            Optional second list of IDs for sequence pairs.
        already_has_special_tokens (`bool`, *optional*, defaults to `False`):
            Whether or not the token list is already formatted with special tokens for the model.

    Returns:
        `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
    """
    if already_has_special_tokens:
        return super().get_special_tokens_mask(
            token_ids_0=token_ids_0, token_ids_1=token_ids_1, already_has_special_tokens=True
        )

    bos_token_id = [1] if self.add_bos_token else []
    eos_token_id = [1] if self.add_eos_token else []

    if token_ids_1 is None:
        return bos_token_id + ([0] * len(token_ids_0)) + eos_token_id
    return (
        bos_token_id
        + ([0] * len(token_ids_0))
        + eos_token_id
        + bos_token_id
        + ([0] * len(token_ids_1))
        + eos_token_id
    )

mindnlp.transformers.models.llama.tokenization_llama.LlamaTokenizer.get_vocab()

Returns vocab as a dict

Source code in mindnlp\transformers\models\llama\tokenization_llama.py
228
229
230
231
232
def get_vocab(self):
    """Returns vocab as a dict"""
    vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)}
    vocab.update(self.added_tokens_encoder)
    return vocab

mindnlp.transformers.models.llama.tokenization_llama.LlamaTokenizer.save_vocabulary(save_directory, filename_prefix=None)

Save the vocabulary and special tokens file to a directory.

PARAMETER DESCRIPTION
save_directory

The directory in which to save the vocabulary.

TYPE: `str`

RETURNS DESCRIPTION
Tuple[str]

Tuple(str): Paths to the files saved.

Source code in mindnlp\transformers\models\llama\tokenization_llama.py
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
def save_vocabulary(self, save_directory, filename_prefix: Optional[str] = None) -> Tuple[str]:
    """
    Save the vocabulary and special tokens file to a directory.

    Args:
        save_directory (`str`):
            The directory in which to save the vocabulary.

    Returns:
        `Tuple(str)`: Paths to the files saved.
    """
    if not os.path.isdir(save_directory):
        logger.error(f"Vocabulary path ({save_directory}) should be a directory")
        return
    out_vocab_file = os.path.join(
        save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
    )

    if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file) and os.path.isfile(self.vocab_file):
        copyfile(self.vocab_file, out_vocab_file)
    elif not os.path.isfile(self.vocab_file):
        with open(out_vocab_file, "wb") as fi:
            content_spiece_model = self.sp_model.serialized_model_proto()
            fi.write(content_spiece_model)

    return (out_vocab_file,)

mindnlp.transformers.models.llama.tokenization_llama.LlamaTokenizer.tokenize(text, **kwargs)

Converts a string to a list of tokens. If self.legacy is set to False, a prefix token is added unless the first token is special.

Source code in mindnlp\transformers\models\llama\tokenization_llama.py
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
def tokenize(self, text: "TextInput", **kwargs) -> List[str]:
    """
    Converts a string to a list of tokens. If `self.legacy` is set to `False`, a prefix token is added unless the
    first token is special.
    """
    if self.legacy or len(text) == 0:
        return super().tokenize(text, **kwargs)

    text = text.replace(SPIECE_UNDERLINE, " ")
    if self.add_prefix_space:
        text = SPIECE_UNDERLINE + text

    tokens = super().tokenize(text, **kwargs)

    if len(tokens) > 1 and tokens[0] == SPIECE_UNDERLINE and tokens[1] in self.all_special_tokens:
        tokens = tokens[1:]
    return tokens

mindnlp.transformers.models.llama.tokenization_llama_fast

tokenization llama fast

mindnlp.transformers.models.llama.tokenization_llama_fast.LlamaTokenizerFast

Bases: PreTrainedTokenizerFast

Construct a Llama tokenizer. Based on byte-level Byte-Pair-Encoding.

This uses notably ByteFallback and no normalization.

>>> from transformers import LlamaTokenizerFast

>>> tokenizer = LlamaTokenizerFast.from_pretrained("hf-internal-testing/llama-tokenizer")
>>> tokenizer.encode("Hello this is a test")
[1, 15043, 445, 338, 263, 1243]

If you want to change the bos_token or the eos_token, make sure to specify them when initializing the model, or call tokenizer.update_post_processor() to make sure that the post-processing is correctly done (otherwise the values of the first token and final token of an encoded sequence will not be correct). For more details, checkout [post-processors] (https://huggingface.co/docs/tokenizers/api/post-processors) documentation.

This tokenizer inherits from [PreTrainedTokenizerFast] which contains most of the main methods. Users should refer to this superclass for more information regarding those methods.

PARAMETER DESCRIPTION
vocab_file

SentencePiece file (generally has a .model extension) that contains the vocabulary necessary to instantiate a tokenizer.

TYPE: `str`, *optional* DEFAULT: None

tokenizer_file

tokenizers file (generally has a .json extension) that contains everything needed to load the tokenizer.

TYPE: `str`, *optional* DEFAULT: None

clean_up_tokenization_spaces

Whether or not to cleanup spaces after decoding, cleanup consists in removing potential artifacts like extra spaces.

TYPE: `bool`, *optional*, defaults to `False` DEFAULT: False

unk_token

The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this token instead.

TYPE: `str` or `tokenizers.AddedToken`, *optional*, defaults to `"<unk>"` DEFAULT: '<unk>'

bos_token

The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token.

TYPE: `str` or `tokenizers.AddedToken`, *optional*, defaults to `"<s>"` DEFAULT: '<s>'

eos_token

The end of sequence token.

TYPE: `str` or `tokenizers.AddedToken`, *optional*, defaults to `"</s>"` DEFAULT: '</s>'

add_bos_token

Whether or not to add an bos_token at the start of sequences.

TYPE: `bool`, *optional*, defaults to `True` DEFAULT: True

add_eos_token

Whether or not to add an eos_token at the end of sequences.

TYPE: `bool`, *optional*, defaults to `False` DEFAULT: False

use_default_system_prompt

Whether or not the default system prompt for Llama should be used

TYPE: `bool`, *optional*, defaults to `False` DEFAULT: False

legacy

Whether or not the legacy behavior of the tokenizer should be used. Legacy is before the merge of #24622 and #25224 which includes fixes to properly handle tokens that appear after special tokens. Make sure to also set from_slow to True. A simple example:

  • legacy=True:
    >>> from transformers import LlamaTokenizerFast
    
    >>> tokenizer = LlamaTokenizerFast.from_pretrained("huggyllama/llama-7b", legacy=True, from_slow=True)
    >>> tokenizer.encode("Hello <s>.") # 869 is '▁.'
    [1, 15043, 29871, 1, 869]
    
  • legacy=False:
    >>> from transformers import LlamaTokenizerFast
    
    >>> tokenizer = LlamaTokenizerFast.from_pretrained("huggyllama/llama-7b", legacy=False, from_slow=True)
    >>> tokenizer.encode("Hello <s>.")  # 29889 is '.'
    [1, 15043, 29871, 1, 29889]
    
    Checkout the pull request for more details.

TYPE: `bool`, *optional* DEFAULT: None

add_prefix_space

Whether or not the tokenizer should automatically add a prefix space

TYPE: `bool`, *optional* DEFAULT: None

Source code in mindnlp\transformers\models\llama\tokenization_llama_fast.py
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
class LlamaTokenizerFast(PreTrainedTokenizerFast):
    """
    Construct a Llama tokenizer. Based on byte-level Byte-Pair-Encoding.

    This uses notably ByteFallback and no normalization.

    ```python
    >>> from transformers import LlamaTokenizerFast

    >>> tokenizer = LlamaTokenizerFast.from_pretrained("hf-internal-testing/llama-tokenizer")
    >>> tokenizer.encode("Hello this is a test")
    [1, 15043, 445, 338, 263, 1243]
    ```

    If you want to change the `bos_token` or the `eos_token`, make sure to specify them when initializing the model, or
    call `tokenizer.update_post_processor()` to make sure that the post-processing is correctly done (otherwise the
    values of the first token and final token of an encoded sequence will not be correct). For more details, checkout
    [post-processors] (https://huggingface.co/docs/tokenizers/api/post-processors) documentation.


    This tokenizer inherits from [`PreTrainedTokenizerFast`] which contains most of the main methods. Users should
    refer to this superclass for more information regarding those methods.

    Args:
        vocab_file (`str`, *optional*):
            [SentencePiece](https://github.com/google/sentencepiece) file (generally has a .model extension) that
            contains the vocabulary necessary to instantiate a tokenizer.
        tokenizer_file (`str`, *optional*):
            [tokenizers](https://github.com/huggingface/tokenizers) file (generally has a .json extension) that
            contains everything needed to load the tokenizer.
        clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
            Whether or not to cleanup spaces after decoding, cleanup consists in removing potential artifacts like
            extra spaces.
        unk_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to `"<unk>"`):
            The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
            token instead.
        bos_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to `"<s>"`):
            The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token.
        eos_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to `"</s>"`):
            The end of sequence token.
        add_bos_token (`bool`, *optional*, defaults to `True`):
            Whether or not to add an `bos_token` at the start of sequences.
        add_eos_token (`bool`, *optional*, defaults to `False`):
            Whether or not to add an `eos_token` at the end of sequences.
        use_default_system_prompt (`bool`, *optional*, defaults to `False`):
            Whether or not the default system prompt for Llama should be used
        legacy (`bool`, *optional*):
            Whether or not the `legacy` behavior of the tokenizer should be used. Legacy is before the merge of #24622
            and #25224 which includes fixes to properly handle tokens that appear after special tokens.
            Make sure to also set `from_slow` to `True`.
            A simple example:

            - `legacy=True`:
            ```python
            >>> from transformers import LlamaTokenizerFast

            >>> tokenizer = LlamaTokenizerFast.from_pretrained("huggyllama/llama-7b", legacy=True, from_slow=True)
            >>> tokenizer.encode("Hello <s>.") # 869 is '▁.'
            [1, 15043, 29871, 1, 869]
            ```
            - `legacy=False`:
            ```python
            >>> from transformers import LlamaTokenizerFast

            >>> tokenizer = LlamaTokenizerFast.from_pretrained("huggyllama/llama-7b", legacy=False, from_slow=True)
            >>> tokenizer.encode("Hello <s>.")  # 29889 is '.'
            [1, 15043, 29871, 1, 29889]
            ```
            Checkout the [pull request](https://github.com/huggingface/transformers/pull/24565) for more details.
        add_prefix_space (`bool`, *optional*):
            Whether or not the tokenizer should automatically add a prefix space
    """

    vocab_files_names = VOCAB_FILES_NAMES
    slow_tokenizer_class = LlamaTokenizer
    padding_side = "left"
    model_input_names = ["input_ids", "attention_mask"]

    def __init__(
        self,
        vocab_file=None,
        tokenizer_file=None,
        clean_up_tokenization_spaces=False,
        unk_token="<unk>",
        bos_token="<s>",
        eos_token="</s>",
        add_bos_token=True,
        add_eos_token=False,
        use_default_system_prompt=False,
        legacy=None,
        add_prefix_space=None,
        **kwargs,
    ):
        if legacy is None:
            logger.warning_once(
                f"You are using the default legacy behaviour of the {self.__class__}. This is"
                " expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you."
                " If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it"
                " means, and thoroughly read the reason why this was added as explained in"
                " https://github.com/huggingface/transformers/pull/24565 - if you loaded a llama tokenizer from a GGUF file"
                " you can ignore this message."
            )
            legacy = True
        self.legacy = legacy

        if add_prefix_space is not None:
            kwargs["from_slow"] = True

        super().__init__(
            vocab_file=vocab_file,
            tokenizer_file=tokenizer_file,
            clean_up_tokenization_spaces=clean_up_tokenization_spaces,
            unk_token=unk_token,
            bos_token=bos_token,
            eos_token=eos_token,
            add_bos_token=add_bos_token,
            add_eos_token=add_eos_token,
            use_default_system_prompt=use_default_system_prompt,
            add_prefix_space=add_prefix_space,
            legacy=legacy,
            **kwargs,
        )
        self._add_bos_token = add_bos_token
        self._add_eos_token = add_eos_token
        self.update_post_processor()
        self.use_default_system_prompt = use_default_system_prompt
        self.vocab_file = vocab_file

    @property
    def can_save_slow_tokenizer(self) -> bool:
        return os.path.isfile(self.vocab_file) if self.vocab_file else False

    def update_post_processor(self):
        """
        Updates the underlying post processor with the current `bos_token` and `eos_token`.
        """
        bos = self.bos_token
        bos_token_id = self.bos_token_id
        if bos is None and self.add_bos_token:
            raise ValueError("add_bos_token = True but bos_token = None")

        eos = self.eos_token
        eos_token_id = self.eos_token_id
        if eos is None and self.add_eos_token:
            raise ValueError("add_eos_token = True but eos_token = None")

        single = f"{(bos+':0 ') if self.add_bos_token else ''}$A:0{(' '+eos+':0') if self.add_eos_token else ''}"
        pair = f"{single}{(' '+bos+':1') if self.add_bos_token else ''} $B:1{(' '+eos+':1') if self.add_eos_token else ''}"

        special_tokens = []
        if self.add_bos_token:
            special_tokens.append((bos, bos_token_id))
        if self.add_eos_token:
            special_tokens.append((eos, eos_token_id))
        self._tokenizer.post_processor = processors.TemplateProcessing(
            single=single, pair=pair, special_tokens=special_tokens
        )

    @property
    def add_eos_token(self):
        return self._add_eos_token

    @property
    def add_bos_token(self):
        return self._add_bos_token

    @add_eos_token.setter
    def add_eos_token(self, value):
        self._add_eos_token = value
        self.update_post_processor()

    @add_bos_token.setter
    def add_bos_token(self, value):
        self._add_bos_token = value
        self.update_post_processor()

    def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
        if not self.can_save_slow_tokenizer:
            raise ValueError(
                "Your fast tokenizer does not have the necessary information to save the vocabulary for a slow "
                "tokenizer."
            )

        if not os.path.isdir(save_directory):
            logger.error(f"Vocabulary path ({save_directory}) should be a directory")
            return
        out_vocab_file = os.path.join(
            save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
        )

        if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file):
            copyfile(self.vocab_file, out_vocab_file)

        return (out_vocab_file,)

    # TODO ArthurZ let's rely on the template processor instead, refactor all fast tokenizers
    # Copied from transformers.models.llama.tokenization_llama.LlamaTokenizer.build_inputs_with_special_tokens
    def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
        bos_token_id = [self.bos_token_id] if self.add_bos_token else []
        eos_token_id = [self.eos_token_id] if self.add_eos_token else []

        output = bos_token_id + token_ids_0 + eos_token_id

        if token_ids_1 is not None:
            output = output + bos_token_id + token_ids_1 + eos_token_id

        return output

mindnlp.transformers.models.llama.tokenization_llama_fast.LlamaTokenizerFast.update_post_processor()

Updates the underlying post processor with the current bos_token and eos_token.

Source code in mindnlp\transformers\models\llama\tokenization_llama_fast.py
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
def update_post_processor(self):
    """
    Updates the underlying post processor with the current `bos_token` and `eos_token`.
    """
    bos = self.bos_token
    bos_token_id = self.bos_token_id
    if bos is None and self.add_bos_token:
        raise ValueError("add_bos_token = True but bos_token = None")

    eos = self.eos_token
    eos_token_id = self.eos_token_id
    if eos is None and self.add_eos_token:
        raise ValueError("add_eos_token = True but eos_token = None")

    single = f"{(bos+':0 ') if self.add_bos_token else ''}$A:0{(' '+eos+':0') if self.add_eos_token else ''}"
    pair = f"{single}{(' '+bos+':1') if self.add_bos_token else ''} $B:1{(' '+eos+':1') if self.add_eos_token else ''}"

    special_tokens = []
    if self.add_bos_token:
        special_tokens.append((bos, bos_token_id))
    if self.add_eos_token:
        special_tokens.append((eos, eos_token_id))
    self._tokenizer.post_processor = processors.TemplateProcessing(
        single=single, pair=pair, special_tokens=special_tokens
    )

mindnlp.transformers.models.llama.tokenization_code_llama

Tokenization classes for Code LLaMA.

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer

Bases: PreTrainedTokenizer

Construct a CodeLlama tokenizer. Based on byte-level Byte-Pair-Encoding. The default padding token is unset as there is no padding token in the original model.

The default configuration match that of codellama/CodeLlama-7b-Instruct-hf which supports prompt infilling.

PARAMETER DESCRIPTION
vocab_file

Path to the vocabulary file.

TYPE: `str`

unk_token

The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this token instead.

TYPE: `str`, *optional*, defaults to `"<unk>"` DEFAULT: '<unk>'

bos_token

The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token.

TYPE: `str`, *optional*, defaults to `"<s>"` DEFAULT: '<s>'

eos_token

The end of sequence token.

When building a sequence using special tokens, this is not the token that is used for the end of sequence. The token used is the sep_token.

TYPE: `str`, *optional*, defaults to `"</s>"` DEFAULT: '</s>'

prefix_token

Prefix token used for infilling.

TYPE: `str`, *optional*, defaults to `"▁<PRE>"` DEFAULT: '▁<PRE>'

middle_token

Middle token used for infilling.

TYPE: `str`, *optional*, defaults to `"▁<MID>"` DEFAULT: '▁<MID>'

suffix_token

Suffix token used for infilling.

TYPE: `str`, *optional*, defaults to `"▁<SUF>"` DEFAULT: '▁<SUF>'

eot_token

End of text token used for infilling.

TYPE: `str`, *optional*, defaults to `"▁<EOT>"` DEFAULT: '▁<EOT>'

fill_token

The token used to split the input between the prefix and suffix.

TYPE: `str`, *optional*, defaults to `"<FILL_ME>"` DEFAULT: '<FILL_ME>'

suffix_first

Whether the input prompt and suffix should be formatted with the suffix first.

TYPE: `bool`, *optional*, defaults to `False` DEFAULT: False

sp_model_kwargs

Will be passed to the SentencePieceProcessor.__init__() method. The Python wrapper for SentencePiece can be used, among other things, to set:

  • enable_sampling: Enable subword regularization.
  • nbest_size: Sampling parameters for unigram. Invalid for BPE-Dropout.

    • nbest_size = {0,1}: No sampling is performed.
    • nbest_size > 1: samples from the nbest_size results.
    • nbest_size < 0: assuming that nbest_size is infinite and samples from the all hypothesis (lattice) using forward-filtering-and-backward-sampling algorithm.
    • alpha: Smoothing parameter for unigram sampling, and dropout probability of merge operations for BPE-dropout.

TYPE: `dict`, *optional* DEFAULT: None

add_bos_token

Whether to add a beginning of sequence token at the start of sequences.

TYPE: `bool`, *optional*, defaults to `True` DEFAULT: True

add_eos_token

Whether to add an end of sequence token at the end of sequences.

TYPE: `bool`, *optional*, defaults to `False` DEFAULT: False

clean_up_tokenization_spaces

Whether or not to clean up the tokenization spaces.

TYPE: `bool`, *optional*, defaults to `False` DEFAULT: False

additional_special_tokens

Additional special tokens used by the tokenizer.

TYPE: `List[str]`, *optional* DEFAULT: None

use_default_system_prompt

Whether or not the default system prompt for Llama should be used.

TYPE: `bool`, *optional*, defaults to `False` DEFAULT: False

Source code in mindnlp\transformers\models\llama\tokenization_code_llama.py
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
class CodeLlamaTokenizer(PreTrainedTokenizer):
    """
    Construct a CodeLlama tokenizer. Based on byte-level Byte-Pair-Encoding. The default padding token is unset as
    there is no padding token in the original model.

    The default configuration match that of
    [codellama/CodeLlama-7b-Instruct-hf](https://hf-mirror.com/codellama/CodeLlama-7b-Instruct-hf/blob/main/tokenizer_config.json)
    which supports prompt infilling.

    Args:
        vocab_file (`str`):
            Path to the vocabulary file.
        unk_token (`str`, *optional*, defaults to `"<unk>"`):
            The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
            token instead.
        bos_token (`str`, *optional*, defaults to `"<s>"`):
            The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token.
        eos_token (`str`, *optional*, defaults to `"</s>"`):
            The end of sequence token.

            <Tip>

            When building a sequence using special tokens, this is not the token that is used for the end of sequence.
            The token used is the `sep_token`.

            </Tip>

        prefix_token (`str`, *optional*, defaults to `"▁<PRE>"`):
            Prefix token used for infilling.
        middle_token (`str`, *optional*, defaults to `"▁<MID>"`):
            Middle token used for infilling.
        suffix_token (`str`, *optional*, defaults to `"▁<SUF>"`):
            Suffix token used for infilling.
        eot_token (`str`, *optional*, defaults to `"▁<EOT>"`):
            End of text token used for infilling.
        fill_token (`str`, *optional*, defaults to `"<FILL_ME>"`):
            The token used to split the input between the prefix and suffix.
        suffix_first (`bool`, *optional*, defaults to `False`):
            Whether the input prompt and suffix should be formatted with the suffix first.
        sp_model_kwargs (`dict`, *optional*):
            Will be passed to the `SentencePieceProcessor.__init__()` method. The [Python wrapper for
            SentencePiece](https://github.com/google/sentencepiece/tree/master/python) can be used, among other things,
            to set:

            - `enable_sampling`: Enable subword regularization.
            - `nbest_size`: Sampling parameters for unigram. Invalid for BPE-Dropout.

                - `nbest_size = {0,1}`: No sampling is performed.
                - `nbest_size > 1`: samples from the nbest_size results.
                - `nbest_size < 0`: assuming that nbest_size is infinite and samples from the all hypothesis (lattice)
                using forward-filtering-and-backward-sampling algorithm.
            - `alpha`: Smoothing parameter for unigram sampling, and dropout probability of merge operations for
            BPE-dropout.
        add_bos_token (`bool`, *optional*, defaults to `True`):
            Whether to add a beginning of sequence token at the start of sequences.
        add_eos_token (`bool`, *optional*, defaults to `False`):
            Whether to add an end of sequence token at the end of sequences.
        clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
            Whether or not to clean up the tokenization spaces.
        additional_special_tokens (`List[str]`, *optional*):
            Additional special tokens used by the tokenizer.
        use_default_system_prompt (`bool`, *optional*, defaults to `False`):
            Whether or not the default system prompt for Llama should be used.
    """
    vocab_files_names = VOCAB_FILES_NAMES
    pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
    max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
    model_input_names = ["input_ids", "attention_mask"]

    def __init__(
        self,
        vocab_file,
        unk_token="<unk>",
        bos_token="<s>",
        eos_token="</s>",
        prefix_token="▁<PRE>",
        middle_token="▁<MID>",
        suffix_token="▁<SUF>",
        eot_token="▁<EOT>",
        fill_token="<FILL_ME>",
        suffix_first=False,
        sp_model_kwargs: Optional[Dict[str, Any]] = None,
        add_bos_token=True,
        add_eos_token=False,
        clean_up_tokenization_spaces=False,
        additional_special_tokens=None,
        use_default_system_prompt=False,
        **kwargs,
    ):
        """
        This method initializes an instance of the CodeLlamaTokenizer class.

        Args:
            self: The instance of the class.
            vocab_file (str): The path to the vocabulary file.
            unk_token (str, optional): The unknown token, default is '<unk>'.
            bos_token (str, optional): The beginning of sequence token, default is '<s>'.
            eos_token (str, optional): The end of sequence token, default is '</s>'.
            prefix_token (str, optional): The prefix token, default is '▁<PRE>'.
            middle_token (str, optional): The middle token, default is '▁<MID>'.
            suffix_token (str, optional): The suffix token, default is '▁<SUF>'.
            eot_token (str, optional): The end of text token, default is '▁<EOT>'.
            fill_token (str, optional): The fill token, default is '<FILL_ME>'.
            suffix_first (bool): Indicates whether suffix comes before prefix.
            sp_model_kwargs (Optional[Dict[str, Any]], optional): Additional arguments for the sentencepiece model.
            add_bos_token (bool, optional): Whether to add the bos token, default is True.
            add_eos_token (bool, optional): Whether to add the eos token, default is False.
            clean_up_tokenization_spaces (bool, optional): Whether to clean up tokenization spaces, default is False.
            additional_special_tokens (list, optional): Additional special tokens to include.
            use_default_system_prompt (bool, optional): Whether to use the default system prompt.

        Returns:
            None.

        Raises:
            MissingBackendError: If the required backend 'protobuf' is not available.
        """
        requires_backends(self, "protobuf")
        self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
        bos_token = AddedToken(bos_token, normalized=False, special=True) if isinstance(bos_token, str) else bos_token
        eos_token = AddedToken(eos_token, normalized=False, special=True) if isinstance(eos_token, str) else eos_token
        unk_token = AddedToken(unk_token, normalized=False, special=True) if isinstance(unk_token, str) else unk_token

        self.use_default_system_prompt = use_default_system_prompt
        # mark tokens special to skip them
        additional_special_tokens = additional_special_tokens or []
        for token in [prefix_token, middle_token, suffix_token, eot_token]:
            additional_special_tokens += [token] if token is not None else []

        self.vocab_file = vocab_file
        self.add_bos_token = add_bos_token
        self.add_eos_token = add_eos_token
        self._prefix_token = prefix_token
        self._middle_token = middle_token
        self._suffix_token = suffix_token
        self._eot_token = eot_token
        self.fill_token = fill_token
        self.suffix_first = suffix_first
        self.sp_model = self.get_spm_processor()

        super().__init__(
            bos_token=bos_token,
            eos_token=eos_token,
            unk_token=unk_token,
            add_bos_token=add_bos_token,
            add_eos_token=add_eos_token,
            prefix_token=prefix_token,
            middle_token=middle_token,
            suffix_token=suffix_token,
            eot_token=eot_token,
            fill_token=fill_token,
            sp_model_kwargs=self.sp_model_kwargs,
            suffix_first=suffix_first,
            clean_up_tokenization_spaces=clean_up_tokenization_spaces,
            additional_special_tokens=additional_special_tokens,
            use_default_system_prompt=use_default_system_prompt,
            **kwargs,
        )

    @property
    def unk_token_length(self):
        """
        Returns the length of the unknown token in the CodeLlamaTokenizer.

        Args:
            self (CodeLlamaTokenizer): An instance of the CodeLlamaTokenizer class.

        Returns:
            int: The length of the unknown token. If the unknown token is not found, it returns 0.

        Raises:
            None.

        """
        return len(self.sp_model.encode(str(self.unk_token)))

    def get_spm_processor(self):
        """
        This method initializes and returns a SentencePieceProcessor object for tokenizing text using
        the SentencePiece library.

        Args:
            self: The instance of the CodeLlamaTokenizer class.

        Returns:
            spm.SentencePieceProcessor: A tokenizer object of type spm.SentencePieceProcessor.

        Raises:
            None:
                However, potential exceptions that may occur during the method execution include:

                - FileNotFoundError: If the specified vocab_file cannot be found.
                - IOError: If there are issues with reading the vocab_file.
                - ValueError: If the provided sp_model_kwargs are invalid or missing required information.
                - Any other relevant exceptions that may occur during the loading and initialization of the tokenizer.
        """
        tokenizer = spm.SentencePieceProcessor(**self.sp_model_kwargs)
        with open(self.vocab_file, "rb") as f:
            sp_model = f.read()
            model_pb2 = import_protobuf()
            model = model_pb2.ModelProto.FromString(sp_model)
            normalizer_spec = model_pb2.NormalizerSpec()
            normalizer_spec.add_dummy_prefix = False
            model.normalizer_spec.MergeFrom(normalizer_spec)
            sp_model = model.SerializeToString()
            tokenizer.LoadFromSerializedProto(sp_model)
        return tokenizer

    @property
    def prefix_token(self):
        """
        Returns the prefix token used for tokenizing code in the CodeLlamaTokenizer class.

        Args:
            self: An instance of the CodeLlamaTokenizer class.

        Returns:
            None.

        Raises:
            None.

        This method retrieves the prefix token that is used for tokenizing code in the CodeLlamaTokenizer class.
        The prefix token serves as a marker or indicator to identify the start of a code block or expression.
        It is used during the tokenization process to correctly identify and separate different parts of the code.

        Note that the prefix token is an internal attribute of the CodeLlamaTokenizer class, and it is not meant to
        be modified directly. To change the prefix token, use the appropriate setter method or modify the underlying
        implementation of the class if necessary.

        Example:
            ```python
            >>> tokenizer = CodeLlamaTokenizer()
            >>> prefix = tokenizer.prefix_token
            >>> print(prefix)
            >>> # Output: '>>'
            ```
        """
        return self._prefix_token

    @property
    def prefix_id(self):
        """
        Method to retrieve the ID associated with the prefix token in the CodeLlamaTokenizer class.

        Args:
            self (CodeLlamaTokenizer): The instance of the CodeLlamaTokenizer class.

        Returns:
            None: If the prefix token is None, the method returns None.
                Otherwise, it returns the ID associated with the prefix token.

        Raises:
            None
        """
        if self._prefix_token is None:
            return None
        return self.convert_tokens_to_ids(self.prefix_token)

    @property
    def middle_token(self):
        """
        This method 'middle_token' is a property method defined in the class 'CodeLlamaTokenizer' that
        retrieves the middle token stored in the instance.

        Args:
            self (CodeLlamaTokenizer): The instance of the CodeLlamaTokenizer class.
                This parameter refers to the current instance of the class.

        Returns:
            None: This method returns the middle token stored in the instance.
                If no middle token is set, it returns None.

        Raises:
            None.
        """
        return self._middle_token

    @property
    def middle_id(self):
        """
        Get the middle ID of the CodeLlamaTokenizer instance.

        Args:
            self (CodeLlamaTokenizer): The instance of the CodeLlamaTokenizer class.

        Returns:
            None: If the middle token is None.

        Raises:
            None.

        This method returns the middle ID of the CodeLlamaTokenizer instance.
        If the middle token is None, it returns None.
        The middle ID is obtained by converting the middle token to its corresponding ID using the
        'convert_tokens_to_ids' method.
        """
        if self._middle_token is None:
            return None
        return self.convert_tokens_to_ids(self.middle_token)

    @property
    def suffix_token(self):
        """
        Method to retrieve the suffix token associated with the CodeLlamaTokenizer instance.

        Args:
            self (CodeLlamaTokenizer): The instance of CodeLlamaTokenizer.
                This parameter refers to the instance of the CodeLlamaTokenizer class on which the method is being called.

        Returns:
            None: This method returns the suffix token corresponding to the CodeLlamaTokenizer instance.
                The suffix token is a property value associated with the instance.

        Raises:
            None
        """
        return self._suffix_token

    @property
    def suffix_id(self):
        """
        Returns the ID of the suffix token.

        Args:
            self (CodeLlamaTokenizer): The instance of the CodeLlamaTokenizer class.

        Returns:
            None: If the suffix token is None.

        Raises:
            None.

        This method retrieves the ID corresponding to the suffix token.
        If the suffix token is None, the method returns None.
        The suffix token is obtained by converting the suffix token to its corresponding ID using
        the convert_tokens_to_ids method.
        """
        if self._suffix_token is None:
            return None
        return self.convert_tokens_to_ids(self.suffix_token)

    @property
    def eot_token(self):
        """
        This method 'eot_token' in the class 'CodeLlamaTokenizer' retrieves the end-of-text token.

        Args:
            self (CodeLlamaTokenizer): The instance of the CodeLlamaTokenizer class.

        Returns:
            None: This method returns the end-of-text token value stored in the instance.

        Raises:
            None.
        """
        return self._eot_token

    @property
    def eot_id(self):
        """
        This method 'eot_id' is a property in the 'CodeLlamaTokenizer' class.

        Args:
            self: The instance of the 'CodeLlamaTokenizer' class.

        Returns:
            None: If the '_eot_token' attribute is None, the method returns None.
            int: If the '_eot_token' attribute is not None, the method returns the integer value obtained
                by converting the token to its corresponding ID using the 'convert_tokens_to_ids' method.

        Raises:
            No specific exceptions are raised by this method.
        """
        if self._eot_token is None:
            return None
        return self.convert_tokens_to_ids(self.eot_token)

    @property
    def vocab_size(self):
        """Returns vocab size"""
        return self.sp_model.get_piece_size()

    # Copied from transformers.models.llama.tokenization_llama.LlamaTokenizer.get_vocab
    def get_vocab(self):
        """Returns vocab as a dict"""
        vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)}
        vocab.update(self.added_tokens_encoder)
        return vocab

    def tokenize(self, prefix, suffix=None, suffix_first=False, **kwargs) -> List[int]:
        """
        Tokenizes the given prefix and suffix to generate a list of integers representing tokens.

        Args:
            self (CodeLlamaTokenizer): An instance of the CodeLlamaTokenizer class.
            prefix (str): The prefix string to tokenize.
            suffix (str, optional): The suffix string to tokenize. Defaults to None.
            suffix_first (bool, optional): Flag indicating whether to place the suffix before the prefix.
                Defaults to False.

        Returns:
            List[int]: A list of integers representing the tokens generated from the prefix and suffix.

        Raises:
            ValueError: If the input includes a prefix and a suffix used for the infilling task,
                or if the model does not support infilling.

        Note:
            - The `prefix` and `suffix` parameters are used to split the input on the `fill_token` token to
            create a suffix and prefix.
            - If only a prefix is provided, the method tokenizes the prefix and returns the resulting tokens.
            - If a prefix and suffix are provided, the method tokenizes both and returns the tokens in the
            specified order.
            - The `suffix_first` parameter takes precedence over the class attribute `suffix_first`
            if both are provided.
            - The method removes special tokens from the beginning of the tokens list if they match the
            specified conditions.
            - The method replaces occurrences of the `SPIECE_UNDERLINE` token in the prefix with a space.
        """
        # add a prefix space to `prefix`
        if self.fill_token is not None and self.fill_token in prefix and suffix is None:
            prefix, suffix = prefix.split(self.fill_token)

        if len(prefix) > 0:
            prefix = SPIECE_UNDERLINE + prefix.replace(SPIECE_UNDERLINE, " ")

        if suffix is None or len(suffix) < 1:
            tokens = super().tokenize(prefix, **kwargs)
            if len(tokens) > 1 and tokens[0] == SPIECE_UNDERLINE and tokens[1] in self.all_special_tokens:
                tokens = tokens[1:]
            return tokens

        prefix_tokens = self._tokenize(prefix)  # prefix has an extra `SPIECE_UNDERLINE`

        if None in (self.prefix_id, self.middle_id, self.suffix_id):
            raise ValueError(
                "The input either includes a `prefix` and a `suffix` used for the infilling task,"
                f"  or can be split on the {self.fill_token} token, creating a suffix and prefix,"
                " but the model does not support `infilling`."
            )
        suffix_tokens = self._tokenize(suffix)  # make sure CodeLlama sp model does not mess up

        suffix_first = suffix_first if suffix_first is not None else self.suffix_first
        if suffix_first:
            # format as " <PRE> <SUF>{suf} <MID> {pre}"
            return [self.prefix_token, self.suffix_token] + suffix_tokens + [self.middle_token] + prefix_tokens
        # format as " <PRE> {pre} <SUF>{suf} <MID>"
        return [self.prefix_token] + prefix_tokens + [self.suffix_token] + suffix_tokens + [self.middle_token]

    def _tokenize(self, text, **kwargs):
        """
        Returns a tokenized string.

        We de-activated the `add_dummy_prefix` option, thus the sentencepiece internals will always strip any
        SPIECE_UNDERLINE. For example: `self.sp_model.encode(f"{SPIECE_UNDERLINE}Hey", out_type = str)` will give
        `['H', 'e', 'y']` instead of `['▁He', 'y']`. Thus we always encode `f"{unk_token}text"` and strip the
        `unk_token`. Here is an example with `unk_token = "<unk>"` and `unk_token_length = 4`.
        `self.tokenizer.sp_model.encode("<unk> Hey", out_type = str)[4:]`.
        """
        tokens = self.sp_model.encode(text, out_type=str)
        if not text.startswith((SPIECE_UNDERLINE, " ")):
            return tokens
        # 1. Encode string + prefix ex: "<unk> Hey"
        tokens = self.sp_model.encode(self.unk_token + text, out_type=str)
        # 2. Remove self.unk_token from ['<','unk','>', '▁Hey']
        return tokens[self.unk_token_length :] if len(tokens) >= self.unk_token_length else tokens

    # Copied from transformers.models.llama.tokenization_llama.LlamaTokenizer._convert_token_to_id
    def _convert_token_to_id(self, token):
        """Converts a token (str) in an id using the vocab."""
        return self.sp_model.piece_to_id(token)

    # Copied from transformers.models.llama.tokenization_llama.LlamaTokenizer._convert_id_to_token
    def _convert_id_to_token(self, index):
        """Converts an index (integer) in a token (str) using the vocab."""
        token = self.sp_model.IdToPiece(index)
        return token

    def convert_tokens_to_string(self, tokens):
        """Converts a sequence of tokens (string) in a single string."""
        # since we manually add the prefix space, we have to remove it when decoding
        if tokens[0].startswith(SPIECE_UNDERLINE):
            tokens[0] = tokens[0][1:]

        current_sub_tokens = []
        out_string = ""
        for _, token in enumerate(tokens):
            # make sure that special tokens are not decoded using sentencepiece model
            if token in self.all_special_tokens:
                out_string += self.sp_model.decode(current_sub_tokens) + token
                current_sub_tokens = []
            else:
                current_sub_tokens.append(token)
        out_string += self.sp_model.decode(current_sub_tokens)
        return out_string

    # Copied from transformers.models.llama.tokenization_llama.LlamaTokenizer.save_vocabulary
    def save_vocabulary(self, save_directory, filename_prefix: Optional[str] = None) -> Tuple[str]:
        """
        Save the vocabulary and special tokens file to a directory.

        Args:
            save_directory (`str`):
                The directory in which to save the vocabulary.

        Returns:
            `Tuple(str)`: Paths to the files saved.
        """
        if not os.path.isdir(save_directory):
            logger.error(f"Vocabulary path ({save_directory}) should be a directory")
            return
        out_vocab_file = os.path.join(
            save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
        )

        if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file) and os.path.isfile(self.vocab_file):
            copyfile(self.vocab_file, out_vocab_file)
        elif not os.path.isfile(self.vocab_file):
            with open(out_vocab_file, "wb") as fi:
                content_spiece_model = self.sp_model.serialized_model_proto()
                fi.write(content_spiece_model)

        return (out_vocab_file,)

    # Copied from transformers.models.llama.tokenization_llama.LlamaTokenizer.build_inputs_with_special_tokens
    def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
        """
        Method to build inputs with special tokens in the CodeLlamaTokenizer class.

        Args:
            self: Reference to the current instance of the class.
            token_ids_0 (list): List of token IDs for the first input sequence.
            token_ids_1 (list, optional): List of token IDs for the second input sequence. Defaults to None.

        Returns:
            list: A list representing the input sequences with special tokens added based on the configuration settings.

        Raises:
            None.
        """
        bos_token_id = [self.bos_token_id] if self.add_bos_token else []
        eos_token_id = [self.eos_token_id] if self.add_eos_token else []

        output = bos_token_id + token_ids_0 + eos_token_id

        if token_ids_1 is not None:
            output = output + bos_token_id + token_ids_1 + eos_token_id

        return output

    # Copied from transformers.models.llama.tokenization_llama.LlamaTokenizer.get_special_tokens_mask
    def get_special_tokens_mask(
        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
    ) -> List[int]:
        """
        Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
        special tokens using the tokenizer `prepare_for_model` method.

        Args:
            token_ids_0 (`List[int]`):
                List of IDs.
            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.
            already_has_special_tokens (`bool`, *optional*, defaults to `False`):
                Whether or not the token list is already formatted with special tokens for the model.

        Returns:
            `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
        """
        if already_has_special_tokens:
            return super().get_special_tokens_mask(
                token_ids_0=token_ids_0, token_ids_1=token_ids_1, already_has_special_tokens=True
            )

        bos_token_id = [1] if self.add_bos_token else []
        eos_token_id = [1] if self.add_eos_token else []

        if token_ids_1 is None:
            return bos_token_id + ([0] * len(token_ids_0)) + eos_token_id
        return (
            bos_token_id
            + ([0] * len(token_ids_0))
            + eos_token_id
            + bos_token_id
            + ([0] * len(token_ids_1))
            + eos_token_id
        )

    # Copied from transformers.models.llama.tokenization_llama.LlamaTokenizer.create_token_type_ids_from_sequences
    def create_token_type_ids_from_sequences(
        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
    ) -> List[int]:
        """
        Creates a mask from the two sequences passed to be used in a sequence-pair classification task. An ALBERT
        sequence pair mask has the following format:
        ```
        0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
        | first sequence    | second sequence |
        ```

        if token_ids_1 is None, only returns the first portion of the mask (0s).

        Args:
            token_ids_0 (`List[int]`):
                List of ids.
            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.

        Returns:
            `List[int]`: List of [token type IDs](../glossary#token-type-ids) according to the given sequence(s).
        """
        bos_token_id = [self.bos_token_id] if self.add_bos_token else []
        eos_token_id = [self.eos_token_id] if self.add_eos_token else []

        output = [0] * len(bos_token_id + token_ids_0 + eos_token_id)

        if token_ids_1 is not None:
            output += [1] * len(bos_token_id + token_ids_1 + eos_token_id)

        return output

    @property
    # Copied from transformers.models.llama.tokenization_llama.LlamaTokenizer.default_chat_template
    def default_chat_template(self):
        """
        LLaMA uses [INST] and [/INST] to indicate user messages, and <<SYS>> and <</SYS>> to indicate system messages.
        Assistant messages do not have special tokens, because LLaMA chat models are generally trained with strict
        user/assistant/user/assistant message ordering, and so assistant messages can be identified from the ordering
        rather than needing special tokens. The system message is partly 'embedded' in the first user message, which
        results in an unusual token ordering when it is present. This template should definitely be changed if you wish
        to fine-tune a model with more flexible role ordering!

        The output should look something like:

            <bos>[INST] B_SYS SystemPrompt E_SYS Prompt [/INST] Answer <eos><bos>[INST] Prompt [/INST] Answer <eos>
            <bos>[INST] Prompt [/INST]

        The reference for this chat template is [this code
        snippet](https://github.com/facebookresearch/llama/blob/556949fdfb72da27c2f4a40b7f0e4cf0b8153a28/llama/generation.py#L320-L362)
        in the original repository.
        """
        logger.warning_once(
            "\nNo chat template is defined for this tokenizer - using the default template "
            f"for the {self.__class__.__name__} class. If the default is not appropriate for "
            "your model, please set `tokenizer.chat_template` to an appropriate template. "
            "See https://hf-mirror.com/docs/transformers/main/chat_templating for more information.\n"
        )
        template = (
            "{% if messages[0]['role'] == 'system' %}"
            "{% set loop_messages = messages[1:] %}"  # Extract system message if it's present
            "{% set system_message = messages[0]['content'] %}"
            "{% elif USE_DEFAULT_PROMPT == true and not '<<SYS>>' in messages[0]['content'] %}"
            "{% set loop_messages = messages %}"  # Or use the default system message if the flag is set
            "{% set system_message = 'DEFAULT_SYSTEM_MESSAGE' %}"
            "{% else %}"
            "{% set loop_messages = messages %}"
            "{% set system_message = false %}"
            "{% endif %}"
            "{% for message in loop_messages %}"  # Loop over all non-system messages
            "{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}"
            "{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}"
            "{% endif %}"
            "{% if loop.index0 == 0 and system_message != false %}"  # Embed system message in first message
            "{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}"
            "{% else %}"
            "{% set content = message['content'] %}"
            "{% endif %}"
            "{% if message['role'] == 'user' %}"  # After all of that, handle messages/roles in a fairly normal way
            "{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}"
            "{% elif message['role'] == 'system' %}"
            "{{ '<<SYS>>\\n' + content.strip() + '\\n<</SYS>>\\n\\n' }}"
            "{% elif message['role'] == 'assistant' %}"
            "{{ ' '  + content.strip() + ' ' + eos_token }}"
            "{% endif %}"
            "{% endfor %}"
        )
        template = template.replace("USE_DEFAULT_PROMPT", "true" if self.use_default_system_prompt else "false")
        default_message = DEFAULT_SYSTEM_PROMPT.replace("\n", "\\n").replace("'", "\\'")
        template = template.replace("DEFAULT_SYSTEM_MESSAGE", default_message)

        return template

    def __getstate__(self):
        """
        Method: __getstate__

        Description:
            This method is used to retrieve the state of the CodeLlamaTokenizer object for serialization purposes.
            It returns a dictionary representing the current state of the object.

        Args:
            self: The instance of the CodeLlamaTokenizer class.

        Returns:
            None: This method does not return any value. Instead, it modifies the state dictionary and returns None.

        Raises:
            None.
        """
        state = self.__dict__.copy()
        state["sp_model"] = None
        state["sp_model_proto"] = self.sp_model.serialized_model_proto()
        return state

    def __setstate__(self, d):
        """
        This method '__setstate__' is defined within the 'CodeLlamaTokenizer' class to set the internal state of the
        object based on the provided dictionary 'd'. It reforwards the object's state including the SentencePiece
        model by loading it from a serialized proto.

        Args:
            self (CodeLlamaTokenizer): The instance of the CodeLlamaTokenizer class.
            d (dict): A dictionary containing the state information to be set.
                It should include the necessary attributes for the object's state reforwardion.

        Returns:
            None: This method does not return any value explicitly.
                It operates by modifying the internal state of the object.

        Raises:
            None:
                However, potential exceptions that could be raised during the execution may include but are not limited to:

                - TypeError: If the input 'd' is not a dictionary.
                - ValueError: If the input 'd' does not contain the required state information.
                - Any exceptions related to the SentencePieceProcessor initialization or loading process.
        """
        self.__dict__ = d
        self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
        self.sp_model.LoadFromSerializedProto(self.sp_model_proto)

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.default_chat_template property

LLaMA uses [INST] and [/INST] to indicate user messages, and <> and <> to indicate system messages. Assistant messages do not have special tokens, because LLaMA chat models are generally trained with strict user/assistant/user/assistant message ordering, and so assistant messages can be identified from the ordering rather than needing special tokens. The system message is partly 'embedded' in the first user message, which results in an unusual token ordering when it is present. This template should definitely be changed if you wish to fine-tune a model with more flexible role ordering!

The output should look something like:

<bos>[INST] B_SYS SystemPrompt E_SYS Prompt [/INST] Answer <eos><bos>[INST] Prompt [/INST] Answer <eos>
<bos>[INST] Prompt [/INST]

The reference for this chat template is this code snippet in the original repository.

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.eot_id property

This method 'eot_id' is a property in the 'CodeLlamaTokenizer' class.

PARAMETER DESCRIPTION
self

The instance of the 'CodeLlamaTokenizer' class.

RETURNS DESCRIPTION
None

If the '_eot_token' attribute is None, the method returns None.

int

If the '_eot_token' attribute is not None, the method returns the integer value obtained by converting the token to its corresponding ID using the 'convert_tokens_to_ids' method.

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.eot_token property

This method 'eot_token' in the class 'CodeLlamaTokenizer' retrieves the end-of-text token.

PARAMETER DESCRIPTION
self

The instance of the CodeLlamaTokenizer class.

TYPE: CodeLlamaTokenizer

RETURNS DESCRIPTION
None

This method returns the end-of-text token value stored in the instance.

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.middle_id property

Get the middle ID of the CodeLlamaTokenizer instance.

PARAMETER DESCRIPTION
self

The instance of the CodeLlamaTokenizer class.

TYPE: CodeLlamaTokenizer

RETURNS DESCRIPTION
None

If the middle token is None.

This method returns the middle ID of the CodeLlamaTokenizer instance. If the middle token is None, it returns None. The middle ID is obtained by converting the middle token to its corresponding ID using the 'convert_tokens_to_ids' method.

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.middle_token property

This method 'middle_token' is a property method defined in the class 'CodeLlamaTokenizer' that retrieves the middle token stored in the instance.

PARAMETER DESCRIPTION
self

The instance of the CodeLlamaTokenizer class. This parameter refers to the current instance of the class.

TYPE: CodeLlamaTokenizer

RETURNS DESCRIPTION
None

This method returns the middle token stored in the instance. If no middle token is set, it returns None.

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.prefix_id property

Method to retrieve the ID associated with the prefix token in the CodeLlamaTokenizer class.

PARAMETER DESCRIPTION
self

The instance of the CodeLlamaTokenizer class.

TYPE: CodeLlamaTokenizer

RETURNS DESCRIPTION
None

If the prefix token is None, the method returns None. Otherwise, it returns the ID associated with the prefix token.

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.prefix_token property

Returns the prefix token used for tokenizing code in the CodeLlamaTokenizer class.

PARAMETER DESCRIPTION
self

An instance of the CodeLlamaTokenizer class.

RETURNS DESCRIPTION

None.

This method retrieves the prefix token that is used for tokenizing code in the CodeLlamaTokenizer class. The prefix token serves as a marker or indicator to identify the start of a code block or expression. It is used during the tokenization process to correctly identify and separate different parts of the code.

Note that the prefix token is an internal attribute of the CodeLlamaTokenizer class, and it is not meant to be modified directly. To change the prefix token, use the appropriate setter method or modify the underlying implementation of the class if necessary.

Example
>>> tokenizer = CodeLlamaTokenizer()
>>> prefix = tokenizer.prefix_token
>>> print(prefix)
>>> # Output: '>>'

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.suffix_id property

Returns the ID of the suffix token.

PARAMETER DESCRIPTION
self

The instance of the CodeLlamaTokenizer class.

TYPE: CodeLlamaTokenizer

RETURNS DESCRIPTION
None

If the suffix token is None.

This method retrieves the ID corresponding to the suffix token. If the suffix token is None, the method returns None. The suffix token is obtained by converting the suffix token to its corresponding ID using the convert_tokens_to_ids method.

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.suffix_token property

Method to retrieve the suffix token associated with the CodeLlamaTokenizer instance.

PARAMETER DESCRIPTION
self

The instance of CodeLlamaTokenizer. This parameter refers to the instance of the CodeLlamaTokenizer class on which the method is being called.

TYPE: CodeLlamaTokenizer

RETURNS DESCRIPTION
None

This method returns the suffix token corresponding to the CodeLlamaTokenizer instance. The suffix token is a property value associated with the instance.

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.unk_token_length property

Returns the length of the unknown token in the CodeLlamaTokenizer.

PARAMETER DESCRIPTION
self

An instance of the CodeLlamaTokenizer class.

TYPE: CodeLlamaTokenizer

RETURNS DESCRIPTION
int

The length of the unknown token. If the unknown token is not found, it returns 0.

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.vocab_size property

Returns vocab size

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.__getstate__()

Description

This method is used to retrieve the state of the CodeLlamaTokenizer object for serialization purposes. It returns a dictionary representing the current state of the object.

PARAMETER DESCRIPTION
self

The instance of the CodeLlamaTokenizer class.

RETURNS DESCRIPTION
None

This method does not return any value. Instead, it modifies the state dictionary and returns None.

Source code in mindnlp\transformers\models\llama\tokenization_code_llama.py
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
def __getstate__(self):
    """
    Method: __getstate__

    Description:
        This method is used to retrieve the state of the CodeLlamaTokenizer object for serialization purposes.
        It returns a dictionary representing the current state of the object.

    Args:
        self: The instance of the CodeLlamaTokenizer class.

    Returns:
        None: This method does not return any value. Instead, it modifies the state dictionary and returns None.

    Raises:
        None.
    """
    state = self.__dict__.copy()
    state["sp_model"] = None
    state["sp_model_proto"] = self.sp_model.serialized_model_proto()
    return state

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.__init__(vocab_file, unk_token='<unk>', bos_token='<s>', eos_token='</s>', prefix_token='▁<PRE>', middle_token='▁<MID>', suffix_token='▁<SUF>', eot_token='▁<EOT>', fill_token='<FILL_ME>', suffix_first=False, sp_model_kwargs=None, add_bos_token=True, add_eos_token=False, clean_up_tokenization_spaces=False, additional_special_tokens=None, use_default_system_prompt=False, **kwargs)

This method initializes an instance of the CodeLlamaTokenizer class.

PARAMETER DESCRIPTION
self

The instance of the class.

vocab_file

The path to the vocabulary file.

TYPE: str

unk_token

The unknown token, default is ''.

TYPE: str DEFAULT: '<unk>'

bos_token

The beginning of sequence token, default is ''.

TYPE: str DEFAULT: '<s>'

eos_token

The end of sequence token, default is ''.

TYPE: str DEFAULT: '</s>'

prefix_token

The prefix token, default is '▁

'.

TYPE: str DEFAULT: '▁<PRE>'

middle_token

The middle token, default is '▁'.

TYPE: str DEFAULT: '▁<MID>'

suffix_token

The suffix token, default is '▁'.

TYPE: str DEFAULT: '▁<SUF>'

eot_token

The end of text token, default is '▁'.

TYPE: str DEFAULT: '▁<EOT>'

fill_token

The fill token, default is ''.

TYPE: str DEFAULT: '<FILL_ME>'

suffix_first

Indicates whether suffix comes before prefix.

TYPE: bool DEFAULT: False

sp_model_kwargs

Additional arguments for the sentencepiece model.

TYPE: Optional[Dict[str, Any]] DEFAULT: None

add_bos_token

Whether to add the bos token, default is True.

TYPE: bool DEFAULT: True

add_eos_token

Whether to add the eos token, default is False.

TYPE: bool DEFAULT: False

clean_up_tokenization_spaces

Whether to clean up tokenization spaces, default is False.

TYPE: bool DEFAULT: False

additional_special_tokens

Additional special tokens to include.

TYPE: list DEFAULT: None

use_default_system_prompt

Whether to use the default system prompt.

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION

None.

RAISES DESCRIPTION
MissingBackendError

If the required backend 'protobuf' is not available.

Source code in mindnlp\transformers\models\llama\tokenization_code_llama.py
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
def __init__(
    self,
    vocab_file,
    unk_token="<unk>",
    bos_token="<s>",
    eos_token="</s>",
    prefix_token="▁<PRE>",
    middle_token="▁<MID>",
    suffix_token="▁<SUF>",
    eot_token="▁<EOT>",
    fill_token="<FILL_ME>",
    suffix_first=False,
    sp_model_kwargs: Optional[Dict[str, Any]] = None,
    add_bos_token=True,
    add_eos_token=False,
    clean_up_tokenization_spaces=False,
    additional_special_tokens=None,
    use_default_system_prompt=False,
    **kwargs,
):
    """
    This method initializes an instance of the CodeLlamaTokenizer class.

    Args:
        self: The instance of the class.
        vocab_file (str): The path to the vocabulary file.
        unk_token (str, optional): The unknown token, default is '<unk>'.
        bos_token (str, optional): The beginning of sequence token, default is '<s>'.
        eos_token (str, optional): The end of sequence token, default is '</s>'.
        prefix_token (str, optional): The prefix token, default is '▁<PRE>'.
        middle_token (str, optional): The middle token, default is '▁<MID>'.
        suffix_token (str, optional): The suffix token, default is '▁<SUF>'.
        eot_token (str, optional): The end of text token, default is '▁<EOT>'.
        fill_token (str, optional): The fill token, default is '<FILL_ME>'.
        suffix_first (bool): Indicates whether suffix comes before prefix.
        sp_model_kwargs (Optional[Dict[str, Any]], optional): Additional arguments for the sentencepiece model.
        add_bos_token (bool, optional): Whether to add the bos token, default is True.
        add_eos_token (bool, optional): Whether to add the eos token, default is False.
        clean_up_tokenization_spaces (bool, optional): Whether to clean up tokenization spaces, default is False.
        additional_special_tokens (list, optional): Additional special tokens to include.
        use_default_system_prompt (bool, optional): Whether to use the default system prompt.

    Returns:
        None.

    Raises:
        MissingBackendError: If the required backend 'protobuf' is not available.
    """
    requires_backends(self, "protobuf")
    self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
    bos_token = AddedToken(bos_token, normalized=False, special=True) if isinstance(bos_token, str) else bos_token
    eos_token = AddedToken(eos_token, normalized=False, special=True) if isinstance(eos_token, str) else eos_token
    unk_token = AddedToken(unk_token, normalized=False, special=True) if isinstance(unk_token, str) else unk_token

    self.use_default_system_prompt = use_default_system_prompt
    # mark tokens special to skip them
    additional_special_tokens = additional_special_tokens or []
    for token in [prefix_token, middle_token, suffix_token, eot_token]:
        additional_special_tokens += [token] if token is not None else []

    self.vocab_file = vocab_file
    self.add_bos_token = add_bos_token
    self.add_eos_token = add_eos_token
    self._prefix_token = prefix_token
    self._middle_token = middle_token
    self._suffix_token = suffix_token
    self._eot_token = eot_token
    self.fill_token = fill_token
    self.suffix_first = suffix_first
    self.sp_model = self.get_spm_processor()

    super().__init__(
        bos_token=bos_token,
        eos_token=eos_token,
        unk_token=unk_token,
        add_bos_token=add_bos_token,
        add_eos_token=add_eos_token,
        prefix_token=prefix_token,
        middle_token=middle_token,
        suffix_token=suffix_token,
        eot_token=eot_token,
        fill_token=fill_token,
        sp_model_kwargs=self.sp_model_kwargs,
        suffix_first=suffix_first,
        clean_up_tokenization_spaces=clean_up_tokenization_spaces,
        additional_special_tokens=additional_special_tokens,
        use_default_system_prompt=use_default_system_prompt,
        **kwargs,
    )

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.__setstate__(d)

This method 'setstate' is defined within the 'CodeLlamaTokenizer' class to set the internal state of the object based on the provided dictionary 'd'. It reforwards the object's state including the SentencePiece model by loading it from a serialized proto.

PARAMETER DESCRIPTION
self

The instance of the CodeLlamaTokenizer class.

TYPE: CodeLlamaTokenizer

d

A dictionary containing the state information to be set. It should include the necessary attributes for the object's state reforwardion.

TYPE: dict

RETURNS DESCRIPTION
None

This method does not return any value explicitly. It operates by modifying the internal state of the object.

RAISES DESCRIPTION
None

However, potential exceptions that could be raised during the execution may include but are not limited to:

  • TypeError: If the input 'd' is not a dictionary.
  • ValueError: If the input 'd' does not contain the required state information.
  • Any exceptions related to the SentencePieceProcessor initialization or loading process.
Source code in mindnlp\transformers\models\llama\tokenization_code_llama.py
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
def __setstate__(self, d):
    """
    This method '__setstate__' is defined within the 'CodeLlamaTokenizer' class to set the internal state of the
    object based on the provided dictionary 'd'. It reforwards the object's state including the SentencePiece
    model by loading it from a serialized proto.

    Args:
        self (CodeLlamaTokenizer): The instance of the CodeLlamaTokenizer class.
        d (dict): A dictionary containing the state information to be set.
            It should include the necessary attributes for the object's state reforwardion.

    Returns:
        None: This method does not return any value explicitly.
            It operates by modifying the internal state of the object.

    Raises:
        None:
            However, potential exceptions that could be raised during the execution may include but are not limited to:

            - TypeError: If the input 'd' is not a dictionary.
            - ValueError: If the input 'd' does not contain the required state information.
            - Any exceptions related to the SentencePieceProcessor initialization or loading process.
    """
    self.__dict__ = d
    self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
    self.sp_model.LoadFromSerializedProto(self.sp_model_proto)

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.build_inputs_with_special_tokens(token_ids_0, token_ids_1=None)

Method to build inputs with special tokens in the CodeLlamaTokenizer class.

PARAMETER DESCRIPTION
self

Reference to the current instance of the class.

token_ids_0

List of token IDs for the first input sequence.

TYPE: list

token_ids_1

List of token IDs for the second input sequence. Defaults to None.

TYPE: list DEFAULT: None

RETURNS DESCRIPTION
list

A list representing the input sequences with special tokens added based on the configuration settings.

Source code in mindnlp\transformers\models\llama\tokenization_code_llama.py
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
    """
    Method to build inputs with special tokens in the CodeLlamaTokenizer class.

    Args:
        self: Reference to the current instance of the class.
        token_ids_0 (list): List of token IDs for the first input sequence.
        token_ids_1 (list, optional): List of token IDs for the second input sequence. Defaults to None.

    Returns:
        list: A list representing the input sequences with special tokens added based on the configuration settings.

    Raises:
        None.
    """
    bos_token_id = [self.bos_token_id] if self.add_bos_token else []
    eos_token_id = [self.eos_token_id] if self.add_eos_token else []

    output = bos_token_id + token_ids_0 + eos_token_id

    if token_ids_1 is not None:
        output = output + bos_token_id + token_ids_1 + eos_token_id

    return output

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.convert_tokens_to_string(tokens)

Converts a sequence of tokens (string) in a single string.

Source code in mindnlp\transformers\models\llama\tokenization_code_llama.py
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
def convert_tokens_to_string(self, tokens):
    """Converts a sequence of tokens (string) in a single string."""
    # since we manually add the prefix space, we have to remove it when decoding
    if tokens[0].startswith(SPIECE_UNDERLINE):
        tokens[0] = tokens[0][1:]

    current_sub_tokens = []
    out_string = ""
    for _, token in enumerate(tokens):
        # make sure that special tokens are not decoded using sentencepiece model
        if token in self.all_special_tokens:
            out_string += self.sp_model.decode(current_sub_tokens) + token
            current_sub_tokens = []
        else:
            current_sub_tokens.append(token)
    out_string += self.sp_model.decode(current_sub_tokens)
    return out_string

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.create_token_type_ids_from_sequences(token_ids_0, token_ids_1=None)

Creates a mask from the two sequences passed to be used in a sequence-pair classification task. An ALBERT sequence pair mask has the following format:

0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
| first sequence    | second sequence |

if token_ids_1 is None, only returns the first portion of the mask (0s).

PARAMETER DESCRIPTION
token_ids_0

List of ids.

TYPE: `List[int]`

token_ids_1

Optional second list of IDs for sequence pairs.

TYPE: `List[int]`, *optional* DEFAULT: None

RETURNS DESCRIPTION
List[int]

List[int]: List of token type IDs according to the given sequence(s).

Source code in mindnlp\transformers\models\llama\tokenization_code_llama.py
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
def create_token_type_ids_from_sequences(
    self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
) -> List[int]:
    """
    Creates a mask from the two sequences passed to be used in a sequence-pair classification task. An ALBERT
    sequence pair mask has the following format:
    ```
    0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
    | first sequence    | second sequence |
    ```

    if token_ids_1 is None, only returns the first portion of the mask (0s).

    Args:
        token_ids_0 (`List[int]`):
            List of ids.
        token_ids_1 (`List[int]`, *optional*):
            Optional second list of IDs for sequence pairs.

    Returns:
        `List[int]`: List of [token type IDs](../glossary#token-type-ids) according to the given sequence(s).
    """
    bos_token_id = [self.bos_token_id] if self.add_bos_token else []
    eos_token_id = [self.eos_token_id] if self.add_eos_token else []

    output = [0] * len(bos_token_id + token_ids_0 + eos_token_id)

    if token_ids_1 is not None:
        output += [1] * len(bos_token_id + token_ids_1 + eos_token_id)

    return output

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.get_special_tokens_mask(token_ids_0, token_ids_1=None, already_has_special_tokens=False)

Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding special tokens using the tokenizer prepare_for_model method.

PARAMETER DESCRIPTION
token_ids_0

List of IDs.

TYPE: `List[int]`

token_ids_1

Optional second list of IDs for sequence pairs.

TYPE: `List[int]`, *optional* DEFAULT: None

already_has_special_tokens

Whether or not the token list is already formatted with special tokens for the model.

TYPE: `bool`, *optional*, defaults to `False` DEFAULT: False

RETURNS DESCRIPTION
List[int]

List[int]: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.

Source code in mindnlp\transformers\models\llama\tokenization_code_llama.py
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
def get_special_tokens_mask(
    self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
) -> List[int]:
    """
    Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
    special tokens using the tokenizer `prepare_for_model` method.

    Args:
        token_ids_0 (`List[int]`):
            List of IDs.
        token_ids_1 (`List[int]`, *optional*):
            Optional second list of IDs for sequence pairs.
        already_has_special_tokens (`bool`, *optional*, defaults to `False`):
            Whether or not the token list is already formatted with special tokens for the model.

    Returns:
        `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
    """
    if already_has_special_tokens:
        return super().get_special_tokens_mask(
            token_ids_0=token_ids_0, token_ids_1=token_ids_1, already_has_special_tokens=True
        )

    bos_token_id = [1] if self.add_bos_token else []
    eos_token_id = [1] if self.add_eos_token else []

    if token_ids_1 is None:
        return bos_token_id + ([0] * len(token_ids_0)) + eos_token_id
    return (
        bos_token_id
        + ([0] * len(token_ids_0))
        + eos_token_id
        + bos_token_id
        + ([0] * len(token_ids_1))
        + eos_token_id
    )

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.get_spm_processor()

This method initializes and returns a SentencePieceProcessor object for tokenizing text using the SentencePiece library.

PARAMETER DESCRIPTION
self

The instance of the CodeLlamaTokenizer class.

RETURNS DESCRIPTION

spm.SentencePieceProcessor: A tokenizer object of type spm.SentencePieceProcessor.

RAISES DESCRIPTION
None

However, potential exceptions that may occur during the method execution include:

  • FileNotFoundError: If the specified vocab_file cannot be found.
  • IOError: If there are issues with reading the vocab_file.
  • ValueError: If the provided sp_model_kwargs are invalid or missing required information.
  • Any other relevant exceptions that may occur during the loading and initialization of the tokenizer.
Source code in mindnlp\transformers\models\llama\tokenization_code_llama.py
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
def get_spm_processor(self):
    """
    This method initializes and returns a SentencePieceProcessor object for tokenizing text using
    the SentencePiece library.

    Args:
        self: The instance of the CodeLlamaTokenizer class.

    Returns:
        spm.SentencePieceProcessor: A tokenizer object of type spm.SentencePieceProcessor.

    Raises:
        None:
            However, potential exceptions that may occur during the method execution include:

            - FileNotFoundError: If the specified vocab_file cannot be found.
            - IOError: If there are issues with reading the vocab_file.
            - ValueError: If the provided sp_model_kwargs are invalid or missing required information.
            - Any other relevant exceptions that may occur during the loading and initialization of the tokenizer.
    """
    tokenizer = spm.SentencePieceProcessor(**self.sp_model_kwargs)
    with open(self.vocab_file, "rb") as f:
        sp_model = f.read()
        model_pb2 = import_protobuf()
        model = model_pb2.ModelProto.FromString(sp_model)
        normalizer_spec = model_pb2.NormalizerSpec()
        normalizer_spec.add_dummy_prefix = False
        model.normalizer_spec.MergeFrom(normalizer_spec)
        sp_model = model.SerializeToString()
        tokenizer.LoadFromSerializedProto(sp_model)
    return tokenizer

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.get_vocab()

Returns vocab as a dict

Source code in mindnlp\transformers\models\llama\tokenization_code_llama.py
442
443
444
445
446
def get_vocab(self):
    """Returns vocab as a dict"""
    vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)}
    vocab.update(self.added_tokens_encoder)
    return vocab

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.save_vocabulary(save_directory, filename_prefix=None)

Save the vocabulary and special tokens file to a directory.

PARAMETER DESCRIPTION
save_directory

The directory in which to save the vocabulary.

TYPE: `str`

RETURNS DESCRIPTION
Tuple[str]

Tuple(str): Paths to the files saved.

Source code in mindnlp\transformers\models\llama\tokenization_code_llama.py
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
def save_vocabulary(self, save_directory, filename_prefix: Optional[str] = None) -> Tuple[str]:
    """
    Save the vocabulary and special tokens file to a directory.

    Args:
        save_directory (`str`):
            The directory in which to save the vocabulary.

    Returns:
        `Tuple(str)`: Paths to the files saved.
    """
    if not os.path.isdir(save_directory):
        logger.error(f"Vocabulary path ({save_directory}) should be a directory")
        return
    out_vocab_file = os.path.join(
        save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
    )

    if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file) and os.path.isfile(self.vocab_file):
        copyfile(self.vocab_file, out_vocab_file)
    elif not os.path.isfile(self.vocab_file):
        with open(out_vocab_file, "wb") as fi:
            content_spiece_model = self.sp_model.serialized_model_proto()
            fi.write(content_spiece_model)

    return (out_vocab_file,)

mindnlp.transformers.models.llama.tokenization_code_llama.CodeLlamaTokenizer.tokenize(prefix, suffix=None, suffix_first=False, **kwargs)

Tokenizes the given prefix and suffix to generate a list of integers representing tokens.

PARAMETER DESCRIPTION
self

An instance of the CodeLlamaTokenizer class.

TYPE: CodeLlamaTokenizer

prefix

The prefix string to tokenize.

TYPE: str

suffix

The suffix string to tokenize. Defaults to None.

TYPE: str DEFAULT: None

suffix_first

Flag indicating whether to place the suffix before the prefix. Defaults to False.

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION
List[int]

List[int]: A list of integers representing the tokens generated from the prefix and suffix.

RAISES DESCRIPTION
ValueError

If the input includes a prefix and a suffix used for the infilling task, or if the model does not support infilling.

Note
  • The prefix and suffix parameters are used to split the input on the fill_token token to create a suffix and prefix.
  • If only a prefix is provided, the method tokenizes the prefix and returns the resulting tokens.
  • If a prefix and suffix are provided, the method tokenizes both and returns the tokens in the specified order.
  • The suffix_first parameter takes precedence over the class attribute suffix_first if both are provided.
  • The method removes special tokens from the beginning of the tokens list if they match the specified conditions.
  • The method replaces occurrences of the SPIECE_UNDERLINE token in the prefix with a space.
Source code in mindnlp\transformers\models\llama\tokenization_code_llama.py
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
def tokenize(self, prefix, suffix=None, suffix_first=False, **kwargs) -> List[int]:
    """
    Tokenizes the given prefix and suffix to generate a list of integers representing tokens.

    Args:
        self (CodeLlamaTokenizer): An instance of the CodeLlamaTokenizer class.
        prefix (str): The prefix string to tokenize.
        suffix (str, optional): The suffix string to tokenize. Defaults to None.
        suffix_first (bool, optional): Flag indicating whether to place the suffix before the prefix.
            Defaults to False.

    Returns:
        List[int]: A list of integers representing the tokens generated from the prefix and suffix.

    Raises:
        ValueError: If the input includes a prefix and a suffix used for the infilling task,
            or if the model does not support infilling.

    Note:
        - The `prefix` and `suffix` parameters are used to split the input on the `fill_token` token to
        create a suffix and prefix.
        - If only a prefix is provided, the method tokenizes the prefix and returns the resulting tokens.
        - If a prefix and suffix are provided, the method tokenizes both and returns the tokens in the
        specified order.
        - The `suffix_first` parameter takes precedence over the class attribute `suffix_first`
        if both are provided.
        - The method removes special tokens from the beginning of the tokens list if they match the
        specified conditions.
        - The method replaces occurrences of the `SPIECE_UNDERLINE` token in the prefix with a space.
    """
    # add a prefix space to `prefix`
    if self.fill_token is not None and self.fill_token in prefix and suffix is None:
        prefix, suffix = prefix.split(self.fill_token)

    if len(prefix) > 0:
        prefix = SPIECE_UNDERLINE + prefix.replace(SPIECE_UNDERLINE, " ")

    if suffix is None or len(suffix) < 1:
        tokens = super().tokenize(prefix, **kwargs)
        if len(tokens) > 1 and tokens[0] == SPIECE_UNDERLINE and tokens[1] in self.all_special_tokens:
            tokens = tokens[1:]
        return tokens

    prefix_tokens = self._tokenize(prefix)  # prefix has an extra `SPIECE_UNDERLINE`

    if None in (self.prefix_id, self.middle_id, self.suffix_id):
        raise ValueError(
            "The input either includes a `prefix` and a `suffix` used for the infilling task,"
            f"  or can be split on the {self.fill_token} token, creating a suffix and prefix,"
            " but the model does not support `infilling`."
        )
    suffix_tokens = self._tokenize(suffix)  # make sure CodeLlama sp model does not mess up

    suffix_first = suffix_first if suffix_first is not None else self.suffix_first
    if suffix_first:
        # format as " <PRE> <SUF>{suf} <MID> {pre}"
        return [self.prefix_token, self.suffix_token] + suffix_tokens + [self.middle_token] + prefix_tokens
    # format as " <PRE> {pre} <SUF>{suf} <MID>"
    return [self.prefix_token] + prefix_tokens + [self.suffix_token] + suffix_tokens + [self.middle_token]

mindnlp.transformers.models.llama.tokenization_code_llama_fast

Fast Tokenization classes for Code LLaMA.

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast

Bases: PreTrainedTokenizerFast

Construct a Llama tokenizer. Based on byte-level Byte-Pair-Encoding. This uses notably ByteFallback and no normalization.

Example
>>> from transformers import CodeLlamaTokenizerFast
...
>>> tokenizer = CodeLlamaTokenizerFast.from_pretrained("hf-internal-testing/llama-tokenizer")
>>> tokenizer.encode("Hello this is a test")
[1, 15043, 445, 338, 263, 1243]

If you want to change the bos_token or the eos_token, make sure to specify them when initializing the model, or call tokenizer.update_post_processor() to make sure that the post-processing is correctly done (otherwise the values of the first token and final token of an encoded sequence will not be correct). For more details, checkout [post-processors] (https://hf-mirror.com/docs/tokenizers/api/post-processors) documentation.

This tokenizer inherits from [PreTrainedTokenizerFast] which contains most of the main methods. Users should refer to this superclass for more information regarding those methods. The default configuration match that of codellama/CodeLlama-7b-Instruct-hf which supports prompt infilling.

PARAMETER DESCRIPTION
vocab_file

SentencePiece file (generally has a .model extension) that contains the vocabulary necessary to instantiate a tokenizer.

TYPE: `str`, *optional* DEFAULT: None

tokenizer_file

tokenizers file (generally has a .json extension) that contains everything needed to load the tokenizer.

TYPE: `str`, *optional* DEFAULT: None

clean_up_tokenization_spaces

Wether to cleanup spaces after decoding, cleanup consists in removing potential artifacts like extra spaces.

TYPE: `str`, *optional*, defaults to `False` DEFAULT: False

unk_token

The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this token instead.

TYPE: `str`, *optional*, defaults to `"<unk>"` DEFAULT: '<unk>'

bos_token

The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token.

TYPE: `str`, *optional*, defaults to `"<s>"` DEFAULT: '<s>'

eos_token

The end of sequence token.

TYPE: `str`, *optional*, defaults to `"</s>"` DEFAULT: '</s>'

prefix_token

Prefix token used for infilling.

TYPE: `str`, *optional*, defaults to `"▁<PRE>"` DEFAULT: '▁<PRE>'

middle_token

Middle token used for infilling.

TYPE: `str`, *optional*, defaults to `"▁<MID>"` DEFAULT: '▁<MID>'

suffix_token

Suffix token used for infilling.

TYPE: `str`, *optional*, defaults to `"▁<SUF>"` DEFAULT: '▁<SUF>'

eot_token

End of text token used for infilling.

TYPE: `str`, *optional*, defaults to `"▁<EOT>"` DEFAULT: '▁<EOT>'

fill_token

The token used to split the input between the prefix and suffix.

TYPE: `str`, *optional*, defaults to `"<FILL_ME>"` DEFAULT: '<FILL_ME>'

additional_special_tokens

Additional special tokens used by the tokenizer.

TYPE: `List[str]`, *optional* DEFAULT: None

add_bos_token

Whether to add a beginning of sequence token at the start of sequences.

TYPE: `bool`, *optional*, defaults to `True` DEFAULT: True

add_eos_token

Whether to add an end of sequence token at the end of sequences.

TYPE: `bool`, *optional*, defaults to `False` DEFAULT: False

use_default_system_prompt

Whether or not the default system prompt for Llama should be used.

TYPE: `bool`, *optional*, defaults to `False` DEFAULT: False

Source code in mindnlp\transformers\models\llama\tokenization_code_llama_fast.py
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
class CodeLlamaTokenizerFast(PreTrainedTokenizerFast):
    """
    Construct a Llama tokenizer. Based on byte-level Byte-Pair-Encoding.
    This uses notably ByteFallback and no normalization.

    Example:
        ```python
        >>> from transformers import CodeLlamaTokenizerFast
        ...
        >>> tokenizer = CodeLlamaTokenizerFast.from_pretrained("hf-internal-testing/llama-tokenizer")
        >>> tokenizer.encode("Hello this is a test")
        [1, 15043, 445, 338, 263, 1243]
        ```

    If you want to change the `bos_token` or the `eos_token`, make sure to specify them when initializing the model, or
    call `tokenizer.update_post_processor()` to make sure that the post-processing is correctly done (otherwise the
    values of the first token and final token of an encoded sequence will not be correct). For more details, checkout
    [post-processors] (https://hf-mirror.com/docs/tokenizers/api/post-processors) documentation.


    This tokenizer inherits from [`PreTrainedTokenizerFast`] which contains most of the main methods. Users should
    refer to this superclass for more information regarding those methods. The default configuration match that of
    [codellama/CodeLlama-7b-Instruct-hf](https://hf-mirror.com/codellama/CodeLlama-7b-Instruct-hf/blob/main/tokenizer_config.json)
    which supports prompt infilling.

    Args:
        vocab_file (`str`, *optional*):
            [SentencePiece](https://github.com/google/sentencepiece) file (generally has a .model extension) that
            contains the vocabulary necessary to instantiate a tokenizer.
        tokenizer_file (`str`, *optional*):
            [tokenizers](https://github.com/huggingface/tokenizers) file (generally has a .json extension) that
            contains everything needed to load the tokenizer.
        clean_up_tokenization_spaces (`str`, *optional*, defaults to `False`):
            Wether to cleanup spaces after decoding, cleanup consists in removing potential artifacts like extra
            spaces.
        unk_token (`str`, *optional*, defaults to `"<unk>"`):
            The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
            token instead.
        bos_token (`str`, *optional*, defaults to `"<s>"`):
            The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token.
        eos_token (`str`, *optional*, defaults to `"</s>"`):
            The end of sequence token.
        prefix_token (`str`, *optional*, defaults to `"▁<PRE>"`):
            Prefix token used for infilling.
        middle_token (`str`, *optional*, defaults to `"▁<MID>"`):
            Middle token used for infilling.
        suffix_token (`str`, *optional*, defaults to `"▁<SUF>"`):
            Suffix token used for infilling.
        eot_token (`str`, *optional*, defaults to `"▁<EOT>"`):
            End of text token used for infilling.
        fill_token (`str`, *optional*, defaults to `"<FILL_ME>"`):
            The token used to split the input between the prefix and suffix.
        additional_special_tokens (`List[str]`, *optional*):
            Additional special tokens used by the tokenizer.
        add_bos_token (`bool`, *optional*, defaults to `True`):
            Whether to add a beginning of sequence token at the start of sequences.
        add_eos_token (`bool`, *optional*, defaults to `False`):
            Whether to add an end of sequence token at the end of sequences.
        use_default_system_prompt (`bool`, *optional*, defaults to `False`):
            Whether or not the default system prompt for Llama should be used.
    """
    vocab_files_names = VOCAB_FILES_NAMES
    slow_tokenizer_class = CodeLlamaTokenizer
    padding_side = "left"
    model_input_names = ["input_ids", "attention_mask"]

    def __init__(
        self,
        vocab_file=None,
        tokenizer_file=None,
        clean_up_tokenization_spaces=False,
        unk_token="<unk>",
        bos_token="<s>",
        eos_token="</s>",
        prefix_token="▁<PRE>",
        middle_token="▁<MID>",
        suffix_token="▁<SUF>",
        eot_token="▁<EOT>",
        fill_token="<FILL_ME>",
        additional_special_tokens=None,
        add_bos_token=True,
        add_eos_token=False,
        use_default_system_prompt=False,
        **kwargs,
    ):
        """
        Initializes an instance of the CodeLlamaTokenizerFast class.

        Args:
            self: The instance of the class.
            vocab_file (str, optional): Path to the vocabulary file. Defaults to None.
            tokenizer_file (str, optional): Path to the tokenizer file. Defaults to None.
            clean_up_tokenization_spaces (bool, optional): Whether to clean up tokenization spaces. Defaults to False.
            unk_token (str, optional): Unknown token. Defaults to '<unk>'.
            bos_token (str, optional): Beginning of sentence token. Defaults to '<s>'.
            eos_token (str, optional): End of sentence token. Defaults to '</s>'.
            prefix_token (str, optional): Prefix token. Defaults to '▁<PRE>'.
            middle_token (str, optional): Middle token. Defaults to '▁<MID>'.
            suffix_token (str, optional): Suffix token. Defaults to '▁<SUF>'.
            eot_token (str, optional): End of text token. Defaults to '▁<EOT>'.
            fill_token (str, optional): Fill token. Defaults to '<FILL_ME>'.
            additional_special_tokens (List[str], optional): Additional special tokens. Defaults to None.
            add_bos_token (bool, optional): Whether to add the beginning of sentence token. Defaults to True.
            add_eos_token (bool, optional): Whether to add the end of sentence token. Defaults to False.
            use_default_system_prompt (bool, optional): Whether to use the default system prompt. Defaults to False.
            **kwargs: Additional keyword arguments.

        Returns:
            None

        Raises:
            None
        """
        # mark tokens special to skip them
        additional_special_tokens = additional_special_tokens or []
        for token in [prefix_token, middle_token, suffix_token, eot_token]:
            additional_special_tokens += [token] if token is not None else []
        self.use_default_system_prompt = use_default_system_prompt

        super().__init__(
            vocab_file=vocab_file,
            tokenizer_file=tokenizer_file,
            clean_up_tokenization_spaces=clean_up_tokenization_spaces,
            additional_special_tokens=additional_special_tokens,
            unk_token=unk_token,
            bos_token=bos_token,
            eos_token=eos_token,
            add_bos_token=add_bos_token,
            add_eos_token=add_eos_token,
            prefix_token=prefix_token,
            middle_token=middle_token,
            suffix_token=suffix_token,
            eot_token=eot_token,
            fill_token=fill_token,
            use_default_system_prompt=use_default_system_prompt,
            **kwargs,
        )
        self._add_bos_token = add_bos_token
        self._add_eos_token = add_eos_token
        self.update_post_processor()

        self.vocab_file = vocab_file

        self._prefix_token = prefix_token
        self._middle_token = middle_token
        self._suffix_token = suffix_token
        self._eot_token = eot_token
        self.fill_token = fill_token

    @property
    def can_save_slow_tokenizer(self) -> bool:
        """
        Checks if the slow tokenizer can be saved.

        Args:
            self (CodeLlamaTokenizerFast): An instance of the CodeLlamaTokenizerFast class.

        Returns:
            bool: True if the slow tokenizer can be saved, False otherwise.

        Raises:
            None.

        This method checks if the slow tokenizer can be saved by verifying if the vocab_file attribute exists.
        If the vocab_file attribute is not None and it corresponds to an existing file, the method returns True.
        Otherwise, it returns False.
        """
        return os.path.isfile(self.vocab_file) if self.vocab_file else False

    # Copied from transformers.models.llama.tokenization_llama_fast.LlamaTokenizerFast.update_post_processor
    def update_post_processor(self):
        """
        Updates the underlying post processor with the current `bos_token` and `eos_token`.
        """
        bos = self.bos_token
        bos_token_id = self.bos_token_id
        if bos is None and self.add_bos_token:
            raise ValueError("add_bos_token = True but bos_token = None")

        eos = self.eos_token
        eos_token_id = self.eos_token_id
        if eos is None and self.add_eos_token:
            raise ValueError("add_eos_token = True but eos_token = None")

        single = f"{(bos+':0 ') if self.add_bos_token else ''}$A:0{(' '+eos+':0') if self.add_eos_token else ''}"
        pair = f"{single}{(' '+bos+':1') if self.add_bos_token else ''} $B:1{(' '+eos+':1') if self.add_eos_token else ''}"

        special_tokens = []
        if self.add_bos_token:
            special_tokens.append((bos, bos_token_id))
        if self.add_eos_token:
            special_tokens.append((eos, eos_token_id))
        self._tokenizer.post_processor = processors.TemplateProcessing(
            single=single, pair=pair, special_tokens=special_tokens
        )

    @property
    def prefix_token(self):
        '''
        Returns the prefix token for the CodeLlamaTokenizerFast class.

        Args:
            self (CodeLlamaTokenizerFast): The instance of the CodeLlamaTokenizerFast class.

        Returns:
            None.

        Raises:
            None.
        '''
        return self._prefix_token

    @property
    def prefix_id(self):
        """
        Returns the prefix token converted to its corresponding ID.

        Args:
            self (CodeLlamaTokenizerFast): An instance of the CodeLlamaTokenizerFast class.

        Returns:
            None: If the prefix token is None.

        Raises:
            None.

        """
        if self._prefix_token is None:
            return None
        return self.convert_tokens_to_ids(self.prefix_token)

    @property
    def middle_token(self):
        """
        This method 'middle_token' is a property method in the class 'CodeLlamaTokenizerFast' that returns the middle token.

        Args:
            self: The instance of the class.

        Returns:
            None: This method returns the middle token or None if there is no middle token.

        Raises:
            None.
        """
        return self._middle_token

    @property
    def middle_id(self):
        """
        Returns the middle token ID of the CodeLlamaTokenizerFast instance.

        Args:
            self (CodeLlamaTokenizerFast): The instance of the CodeLlamaTokenizerFast class.

        Returns:
            None: If the middle token is not set or is set to None.
            int: The ID of the middle token.

        Raises:
            None.

        This method retrieves the ID of the middle token in the CodeLlamaTokenizerFast instance.
        If the middle token is not set or is set to None, None is returned. Otherwise, the method calls the
        'convert_tokens_to_ids' function to convert the middle token into its corresponding ID and returns the ID value.
        """
        if self._middle_token is None:
            return None
        return self.convert_tokens_to_ids(self.middle_token)

    @property
    def suffix_token(self):
        """
        This method, 'suffix_token', is a property method defined in the 'CodeLlamaTokenizerFast' class.

        Args:
            self: An instance of the 'CodeLlamaTokenizerFast' class.
                It is used to access the attributes and methods of the class within this method.

        Returns:
            None.

        Raises:
            None.

        """
        return self._suffix_token

    @property
    def suffix_id(self):
        """
        This method is defined in the `CodeLlamaTokenizerFast` class and is named `suffix_id`.
        It takes one parameter, `self`, which refers to the instance of the class.

        Args:
            self: An instance of the `CodeLlamaTokenizerFast` class.

        Returns:
            None: If the `_suffix_token` attribute is `None`, the method returns `None`.

        Raises:
            None.

        Description:
            This method retrieves the suffix ID associated with the `_suffix_token` attribute.
            If the `_suffix_token` is `None`, indicating the absence of a suffix token, the method returns `None`.
            Otherwise, it calls the `convert_tokens_to_ids` method to convert the `_suffix_token` to its
            corresponding ID and returns the result.

        Note:
            - The `_suffix_token` attribute should be set before calling this method to ensure accurate results.
            - The return value is of type `None`.
        """
        if self._suffix_token is None:
            return None
        return self.convert_tokens_to_ids(self.suffix_token)

    @property
    def eot_id(self):
        """
        Returns the ID representation of the end-of-text (EOT) token in the CodeLlamaTokenizerFast class.

        Args:
            self: An instance of the CodeLlamaTokenizerFast class.

        Returns:
            None: If the EOT token is not set.
            int: The ID representation of the EOT token.

        Raises:
            None.

        This method retrieves the ID representation of the EOT token.
        If the EOT token is not set (None), it returns None.
        Otherwise, it uses the 'convert_tokens_to_ids' method to convert the EOT token to its corresponding ID
        representation and returns it.
        """
        if self._eot_token is None:
            return None
        return self.convert_tokens_to_ids(self.eot_token)

    @property
    def eot_token(self):
        """
        eot_token method in the CodeLlamaTokenizerFast class.

        Args:
            self: The instance of the CodeLlamaTokenizerFast class.

        Returns:
            The value of the _eot_token attribute.

        Raises:
            None.
        """
        return self._eot_token

    @property
    def add_eos_token(self):
        """
        Adds an end-of-sequence (EOS) token to the tokenizer.

        Args:
            self: The instance of the CodeLlamaTokenizerFast class.

        Returns:
            None.

        Raises:
            None.
        """
        return self._add_eos_token

    @property
    def add_bos_token(self):
        """
        Method to add a beginning of sentence (BOS) token to the tokenizer.

        Args:
            self: An instance of the CodeLlamaTokenizerFast class.
                It is used to access the tokenizer object.

        Returns:
            None.

        Raises:
            None
        """
        return self._add_bos_token

    @add_eos_token.setter
    def add_eos_token(self, value):
        """
        This method 'add_eos_token' is a setter method for the 'add_eos_token' property in the
        'CodeLlamaTokenizerFast' class.

        Args:
            self (CodeLlamaTokenizerFast): The instance of the CodeLlamaTokenizerFast class.
            value (bool): A boolean value indicating whether to add an end-of-sequence token.

        Returns:
            None.

        Raises:
            None.
        """
        self._add_eos_token = value
        self.update_post_processor()

    @add_bos_token.setter
    def add_bos_token(self, value):
        """
        Sets the value of the 'add_bos_token' attribute in the CodeLlamaTokenizerFast class.

        Args:
            self (CodeLlamaTokenizerFast): An instance of the CodeLlamaTokenizerFast class.
            value: The value to be assigned to the 'add_bos_token' attribute. It can be of any type.

        Returns:
            None.

        Raises:
            None.

        This method updates the 'add_bos_token' attribute with the provided value and triggers the
        'update_post_processor' method.
        """
        self._add_bos_token = value
        self.update_post_processor()

    def set_infilling_processor(self, reset, suffix_first=False, add_special_tokens=True):
        r"""
        Updates the normalizer to make sure the prompt format for `infilling` is respected. The infilling format is the
        following:

        - if suffix_first

            - `" <PRE> <SUF>{suf} <MID> {pre}"`

        - else:

            - `" <PRE> {pre} <SUF>{suf} <MID>"`

        If `reset` is set to `True`, the `normalizer` and `post_processor` are reset to their "normal" behaviour, which
        is to add a prefix space for the normalizer, and add a `bos_token` to the input text for the `post_processor`.
        """
        if reset:
            self._tokenizer.normalizer = normalizers.Sequence(
                [
                    normalizers.Prepend(prepend="▁"),
                    normalizers.Replace(pattern=" ", content="▁"),
                ]
            )
            self.update_post_processor()
            return

        self._tokenizer.normalizer = normalizers.Replace(pattern=" ", content="▁")
        pair = [self.bos_token] if self.add_bos_token and add_special_tokens else []
        special_tokens = [(self.bos_token, self.bos_token_id)] if self.add_bos_token and add_special_tokens else []
        if suffix_first:
            # format as " <PRE> <SUF>{suf} <MID> {pre}"
            pair += [self.prefix_token, self.suffix_token, "$B", self.middle_token, "$A"]
            special_tokens += [
                (self.prefix_token, self.prefix_id),
                (self.suffix_token, self.suffix_id),
                (self.middle_token, self.middle_id),
            ]
        else:
            # format as " <PRE> {pre} <SUF>{suf} <MID>"
            pair += [self.prefix_token, "$A", self.suffix_token, "$B", self.middle_token]
            special_tokens += [
                (self.prefix_token, self.prefix_id),
                (self.suffix_token, self.suffix_id),
                (self.middle_token, self.middle_id),
            ]

        if self.add_eos_token and add_special_tokens:
            pair += [self.eos_token]
            special_tokens += [(self.eos_token, self.eos_token_id)]
        self._tokenizer.post_processor = processors.TemplateProcessing(
            single="$A", pair=pair, special_tokens=special_tokens
        )

    def encode_plus(self, text, text_pair=None, suffix_first=False, add_special_tokens=True, **kwargs):
        """
        Encodes the given text and text pair into tokens using the CodeLlamaTokenizerFast class.

        Args:
            self (CodeLlamaTokenizerFast): An instance of the CodeLlamaTokenizerFast class.
            text (str): The input text to be encoded.
            text_pair (str, optional): The optional second input text to be encoded. Defaults to None.
            suffix_first (bool, optional): Specifies whether the suffix should be placed first. Defaults to False.
            add_special_tokens (bool, optional): Specifies whether to add special tokens. Defaults to True.

        Returns:
            tokens: The encoded tokens. This is an instance of a class defined in the CodeLlamaTokenizerFast class.

        Raises:
            ValueError: If the input includes a `prefix` and a `suffix` used for the infilling task,
                the `prefix_id, middle_id, suffix_id` must all be initialized.
                Current values: (self.prefix_id, self.middle_id, self.suffix_id)
        """
        # hack to make sure the input is pre-process but outside rust
        text_pair = kwargs.pop("suffix", text_pair)
        if self.fill_token is not None and self.fill_token in text and text_pair is None:
            text, text_pair = text.split(self.fill_token)

        if text_pair is None or len(text_pair) < 1:
            return super().encode_plus(text, text_pair, add_special_tokens=add_special_tokens, **kwargs)

        if None in (self.prefix_id, self.middle_id, self.suffix_id):
            raise ValueError(
                "Then input includes a `prefix` and a `suffix` used for the infilling task,"
                " the `prefix_id, middle_id, suffix_id` must all be initialized. Current"
                f" values : {self.prefix_id, self.middle_id, self.suffix_id}"
            )

        self.set_infilling_processor(False, suffix_first=suffix_first, add_special_tokens=add_special_tokens)
        tokens = super().encode_plus(" " + text, text_pair=text_pair, add_special_tokens=True, **kwargs)
        self.set_infilling_processor(True)
        return tokens

    # Copied from transformers.models.llama.tokenization_llama_fast.LlamaTokenizerFast.save_vocabulary
    def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
        """
        Save the vocabulary for a fast tokenizer.

        Args:
            self (CodeLlamaTokenizerFast): An instance of the CodeLlamaTokenizerFast class.
            save_directory (str): The directory path where the vocabulary will be saved.
            filename_prefix (Optional[str], optional): A prefix to be added to the filename. Defaults to None.

        Returns:
            Tuple[str]: A tuple containing the path to the saved vocabulary file.

        Raises:
            ValueError: If the fast tokenizer does not have the necessary information to save the vocabulary
                for a slow tokenizer.
            FileNotFoundError: If the save_directory does not exist.
            IsADirectoryError: If the save_directory is not a directory.

        """
        if not self.can_save_slow_tokenizer:
            raise ValueError(
                "Your fast tokenizer does not have the necessary information to save the vocabulary for a slow "
                "tokenizer."
            )

        if not os.path.isdir(save_directory):
            logger.error(f"Vocabulary path ({save_directory}) should be a directory")
            return
        out_vocab_file = os.path.join(
            save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
        )

        if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file):
            copyfile(self.vocab_file, out_vocab_file)

        return (out_vocab_file,)

    @property
    # Copied from transformers.models.llama.tokenization_llama.LlamaTokenizer.default_chat_template
    def default_chat_template(self):
        """
        LLaMA uses [INST] and [/INST] to indicate user messages, and <<SYS>> and <</SYS>> to indicate system messages.
        Assistant messages do not have special tokens, because LLaMA chat models are generally trained with strict
        user/assistant/user/assistant message ordering, and so assistant messages can be identified from the ordering
        rather than needing special tokens. The system message is partly 'embedded' in the first user message, which
        results in an unusual token ordering when it is present. This template should definitely be changed if you wish
        to fine-tune a model with more flexible role ordering!

        The output should look something like:

        <bos>[INST] B_SYS SystemPrompt E_SYS Prompt [/INST] Answer <eos><bos>[INST] Prompt [/INST] Answer <eos>
        <bos>[INST] Prompt [/INST]

        The reference for this chat template is [this code
        snippet](https://github.com/facebookresearch/llama/blob/556949fdfb72da27c2f4a40b7f0e4cf0b8153a28/llama/generation.py#L320-L362)
        in the original repository.
        """
        logger.warning_once(
            "\nNo chat template is defined for this tokenizer - using the default template "
            f"for the {self.__class__.__name__} class. If the default is not appropriate for "
            "your model, please set `tokenizer.chat_template` to an appropriate template. "
            "See https://hf-mirror.com/docs/transformers/main/chat_templating for more information.\n"
        )
        template = (
            "{% if messages[0]['role'] == 'system' %}"
            "{% set loop_messages = messages[1:] %}"  # Extract system message if it's present
            "{% set system_message = messages[0]['content'] %}"
            "{% elif USE_DEFAULT_PROMPT == true and not '<<SYS>>' in messages[0]['content'] %}"
            "{% set loop_messages = messages %}"  # Or use the default system message if the flag is set
            "{% set system_message = 'DEFAULT_SYSTEM_MESSAGE' %}"
            "{% else %}"
            "{% set loop_messages = messages %}"
            "{% set system_message = false %}"
            "{% endif %}"
            "{% for message in loop_messages %}"  # Loop over all non-system messages
            "{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}"
            "{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}"
            "{% endif %}"
            "{% if loop.index0 == 0 and system_message != false %}"  # Embed system message in first message
            "{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}"
            "{% else %}"
            "{% set content = message['content'] %}"
            "{% endif %}"
            "{% if message['role'] == 'user' %}"  # After all of that, handle messages/roles in a fairly normal way
            "{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}"
            "{% elif message['role'] == 'system' %}"
            "{{ '<<SYS>>\\n' + content.strip() + '\\n<</SYS>>\\n\\n' }}"
            "{% elif message['role'] == 'assistant' %}"
            "{{ ' '  + content.strip() + ' ' + eos_token }}"
            "{% endif %}"
            "{% endfor %}"
        )
        template = template.replace("USE_DEFAULT_PROMPT", "true" if self.use_default_system_prompt else "false")
        default_message = DEFAULT_SYSTEM_PROMPT.replace("\n", "\\n").replace("'", "\\'")
        template = template.replace("DEFAULT_SYSTEM_MESSAGE", default_message)

        return template

    def build_inputs_with_special_tokens(
        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
    ) -> List[int]:
        """
        Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
        adding special tokens. The special tokens depend on calling set_lang.

        An NLLB sequence has the following format, where `X` represents the sequence:

        - `input_ids` (for encoder) `X [eos, src_lang_code]`
        - `decoder_input_ids`: (for decoder) `X [eos, tgt_lang_code]`

        BOS is never used. Pairs of sequences are not the expected use case, but they will be handled without a
        separator.

        Args:
            token_ids_0 (`List[int]`):
                List of IDs to which the special tokens will be added.
            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.

        Returns:
            `List[int]`: list of [input IDs](../glossary#input-ids) with the appropriate special tokens.
        """
        if token_ids_1 is None:
            return self.bos_token_id + token_ids_0 + self.eos_token_id
        return self.bos_token_id + token_ids_0 + token_ids_1 + self.eos_token_id

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.add_bos_token property writable

Method to add a beginning of sentence (BOS) token to the tokenizer.

PARAMETER DESCRIPTION
self

An instance of the CodeLlamaTokenizerFast class. It is used to access the tokenizer object.

RETURNS DESCRIPTION

None.

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.add_eos_token property writable

Adds an end-of-sequence (EOS) token to the tokenizer.

PARAMETER DESCRIPTION
self

The instance of the CodeLlamaTokenizerFast class.

RETURNS DESCRIPTION

None.

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.can_save_slow_tokenizer: bool property

Checks if the slow tokenizer can be saved.

PARAMETER DESCRIPTION
self

An instance of the CodeLlamaTokenizerFast class.

TYPE: CodeLlamaTokenizerFast

RETURNS DESCRIPTION
bool

True if the slow tokenizer can be saved, False otherwise.

TYPE: bool

This method checks if the slow tokenizer can be saved by verifying if the vocab_file attribute exists. If the vocab_file attribute is not None and it corresponds to an existing file, the method returns True. Otherwise, it returns False.

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.default_chat_template property

LLaMA uses [INST] and [/INST] to indicate user messages, and <> and <> to indicate system messages. Assistant messages do not have special tokens, because LLaMA chat models are generally trained with strict user/assistant/user/assistant message ordering, and so assistant messages can be identified from the ordering rather than needing special tokens. The system message is partly 'embedded' in the first user message, which results in an unusual token ordering when it is present. This template should definitely be changed if you wish to fine-tune a model with more flexible role ordering!

The output should look something like:

[INST] B_SYS SystemPrompt E_SYS Prompt [/INST] Answer [INST] Prompt [/INST] Answer [INST] Prompt [/INST]

The reference for this chat template is this code snippet in the original repository.

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.eot_id property

Returns the ID representation of the end-of-text (EOT) token in the CodeLlamaTokenizerFast class.

PARAMETER DESCRIPTION
self

An instance of the CodeLlamaTokenizerFast class.

RETURNS DESCRIPTION
None

If the EOT token is not set.

int

The ID representation of the EOT token.

This method retrieves the ID representation of the EOT token. If the EOT token is not set (None), it returns None. Otherwise, it uses the 'convert_tokens_to_ids' method to convert the EOT token to its corresponding ID representation and returns it.

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.eot_token property

eot_token method in the CodeLlamaTokenizerFast class.

PARAMETER DESCRIPTION
self

The instance of the CodeLlamaTokenizerFast class.

RETURNS DESCRIPTION

The value of the _eot_token attribute.

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.middle_id property

Returns the middle token ID of the CodeLlamaTokenizerFast instance.

PARAMETER DESCRIPTION
self

The instance of the CodeLlamaTokenizerFast class.

TYPE: CodeLlamaTokenizerFast

RETURNS DESCRIPTION
None

If the middle token is not set or is set to None.

int

The ID of the middle token.

This method retrieves the ID of the middle token in the CodeLlamaTokenizerFast instance. If the middle token is not set or is set to None, None is returned. Otherwise, the method calls the 'convert_tokens_to_ids' function to convert the middle token into its corresponding ID and returns the ID value.

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.middle_token property

This method 'middle_token' is a property method in the class 'CodeLlamaTokenizerFast' that returns the middle token.

PARAMETER DESCRIPTION
self

The instance of the class.

RETURNS DESCRIPTION
None

This method returns the middle token or None if there is no middle token.

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.prefix_id property

Returns the prefix token converted to its corresponding ID.

PARAMETER DESCRIPTION
self

An instance of the CodeLlamaTokenizerFast class.

TYPE: CodeLlamaTokenizerFast

RETURNS DESCRIPTION
None

If the prefix token is None.

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.prefix_token property

Returns the prefix token for the CodeLlamaTokenizerFast class.

PARAMETER DESCRIPTION
self

The instance of the CodeLlamaTokenizerFast class.

TYPE: CodeLlamaTokenizerFast

RETURNS DESCRIPTION

None.

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.suffix_id property

This method is defined in the CodeLlamaTokenizerFast class and is named suffix_id. It takes one parameter, self, which refers to the instance of the class.

PARAMETER DESCRIPTION
self

An instance of the CodeLlamaTokenizerFast class.

RETURNS DESCRIPTION
None

If the _suffix_token attribute is None, the method returns None.

Description

This method retrieves the suffix ID associated with the _suffix_token attribute. If the _suffix_token is None, indicating the absence of a suffix token, the method returns None. Otherwise, it calls the convert_tokens_to_ids method to convert the _suffix_token to its corresponding ID and returns the result.

Note
  • The _suffix_token attribute should be set before calling this method to ensure accurate results.
  • The return value is of type None.

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.suffix_token property

This method, 'suffix_token', is a property method defined in the 'CodeLlamaTokenizerFast' class.

PARAMETER DESCRIPTION
self

An instance of the 'CodeLlamaTokenizerFast' class. It is used to access the attributes and methods of the class within this method.

RETURNS DESCRIPTION

None.

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.__init__(vocab_file=None, tokenizer_file=None, clean_up_tokenization_spaces=False, unk_token='<unk>', bos_token='<s>', eos_token='</s>', prefix_token='▁<PRE>', middle_token='▁<MID>', suffix_token='▁<SUF>', eot_token='▁<EOT>', fill_token='<FILL_ME>', additional_special_tokens=None, add_bos_token=True, add_eos_token=False, use_default_system_prompt=False, **kwargs)

Initializes an instance of the CodeLlamaTokenizerFast class.

PARAMETER DESCRIPTION
self

The instance of the class.

vocab_file

Path to the vocabulary file. Defaults to None.

TYPE: str DEFAULT: None

tokenizer_file

Path to the tokenizer file. Defaults to None.

TYPE: str DEFAULT: None

clean_up_tokenization_spaces

Whether to clean up tokenization spaces. Defaults to False.

TYPE: bool DEFAULT: False

unk_token

Unknown token. Defaults to ''.

TYPE: str DEFAULT: '<unk>'

bos_token

Beginning of sentence token. Defaults to ''.

TYPE: str DEFAULT: '<s>'

eos_token

End of sentence token. Defaults to ''.

TYPE: str DEFAULT: '</s>'

prefix_token

Prefix token. Defaults to '▁

'.

TYPE: str DEFAULT: '▁<PRE>'

middle_token

Middle token. Defaults to '▁'.

TYPE: str DEFAULT: '▁<MID>'

suffix_token

Suffix token. Defaults to '▁'.

TYPE: str DEFAULT: '▁<SUF>'

eot_token

End of text token. Defaults to '▁'.

TYPE: str DEFAULT: '▁<EOT>'

fill_token

Fill token. Defaults to ''.

TYPE: str DEFAULT: '<FILL_ME>'

additional_special_tokens

Additional special tokens. Defaults to None.

TYPE: List[str] DEFAULT: None

add_bos_token

Whether to add the beginning of sentence token. Defaults to True.

TYPE: bool DEFAULT: True

add_eos_token

Whether to add the end of sentence token. Defaults to False.

TYPE: bool DEFAULT: False

use_default_system_prompt

Whether to use the default system prompt. Defaults to False.

TYPE: bool DEFAULT: False

**kwargs

Additional keyword arguments.

DEFAULT: {}

RETURNS DESCRIPTION

None

Source code in mindnlp\transformers\models\llama\tokenization_code_llama_fast.py
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
def __init__(
    self,
    vocab_file=None,
    tokenizer_file=None,
    clean_up_tokenization_spaces=False,
    unk_token="<unk>",
    bos_token="<s>",
    eos_token="</s>",
    prefix_token="▁<PRE>",
    middle_token="▁<MID>",
    suffix_token="▁<SUF>",
    eot_token="▁<EOT>",
    fill_token="<FILL_ME>",
    additional_special_tokens=None,
    add_bos_token=True,
    add_eos_token=False,
    use_default_system_prompt=False,
    **kwargs,
):
    """
    Initializes an instance of the CodeLlamaTokenizerFast class.

    Args:
        self: The instance of the class.
        vocab_file (str, optional): Path to the vocabulary file. Defaults to None.
        tokenizer_file (str, optional): Path to the tokenizer file. Defaults to None.
        clean_up_tokenization_spaces (bool, optional): Whether to clean up tokenization spaces. Defaults to False.
        unk_token (str, optional): Unknown token. Defaults to '<unk>'.
        bos_token (str, optional): Beginning of sentence token. Defaults to '<s>'.
        eos_token (str, optional): End of sentence token. Defaults to '</s>'.
        prefix_token (str, optional): Prefix token. Defaults to '▁<PRE>'.
        middle_token (str, optional): Middle token. Defaults to '▁<MID>'.
        suffix_token (str, optional): Suffix token. Defaults to '▁<SUF>'.
        eot_token (str, optional): End of text token. Defaults to '▁<EOT>'.
        fill_token (str, optional): Fill token. Defaults to '<FILL_ME>'.
        additional_special_tokens (List[str], optional): Additional special tokens. Defaults to None.
        add_bos_token (bool, optional): Whether to add the beginning of sentence token. Defaults to True.
        add_eos_token (bool, optional): Whether to add the end of sentence token. Defaults to False.
        use_default_system_prompt (bool, optional): Whether to use the default system prompt. Defaults to False.
        **kwargs: Additional keyword arguments.

    Returns:
        None

    Raises:
        None
    """
    # mark tokens special to skip them
    additional_special_tokens = additional_special_tokens or []
    for token in [prefix_token, middle_token, suffix_token, eot_token]:
        additional_special_tokens += [token] if token is not None else []
    self.use_default_system_prompt = use_default_system_prompt

    super().__init__(
        vocab_file=vocab_file,
        tokenizer_file=tokenizer_file,
        clean_up_tokenization_spaces=clean_up_tokenization_spaces,
        additional_special_tokens=additional_special_tokens,
        unk_token=unk_token,
        bos_token=bos_token,
        eos_token=eos_token,
        add_bos_token=add_bos_token,
        add_eos_token=add_eos_token,
        prefix_token=prefix_token,
        middle_token=middle_token,
        suffix_token=suffix_token,
        eot_token=eot_token,
        fill_token=fill_token,
        use_default_system_prompt=use_default_system_prompt,
        **kwargs,
    )
    self._add_bos_token = add_bos_token
    self._add_eos_token = add_eos_token
    self.update_post_processor()

    self.vocab_file = vocab_file

    self._prefix_token = prefix_token
    self._middle_token = middle_token
    self._suffix_token = suffix_token
    self._eot_token = eot_token
    self.fill_token = fill_token

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.build_inputs_with_special_tokens(token_ids_0, token_ids_1=None)

Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and adding special tokens. The special tokens depend on calling set_lang.

An NLLB sequence has the following format, where X represents the sequence:

  • input_ids (for encoder) X [eos, src_lang_code]
  • decoder_input_ids: (for decoder) X [eos, tgt_lang_code]

BOS is never used. Pairs of sequences are not the expected use case, but they will be handled without a separator.

PARAMETER DESCRIPTION
token_ids_0

List of IDs to which the special tokens will be added.

TYPE: `List[int]`

token_ids_1

Optional second list of IDs for sequence pairs.

TYPE: `List[int]`, *optional* DEFAULT: None

RETURNS DESCRIPTION
List[int]

List[int]: list of input IDs with the appropriate special tokens.

Source code in mindnlp\transformers\models\llama\tokenization_code_llama_fast.py
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
def build_inputs_with_special_tokens(
    self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
) -> List[int]:
    """
    Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
    adding special tokens. The special tokens depend on calling set_lang.

    An NLLB sequence has the following format, where `X` represents the sequence:

    - `input_ids` (for encoder) `X [eos, src_lang_code]`
    - `decoder_input_ids`: (for decoder) `X [eos, tgt_lang_code]`

    BOS is never used. Pairs of sequences are not the expected use case, but they will be handled without a
    separator.

    Args:
        token_ids_0 (`List[int]`):
            List of IDs to which the special tokens will be added.
        token_ids_1 (`List[int]`, *optional*):
            Optional second list of IDs for sequence pairs.

    Returns:
        `List[int]`: list of [input IDs](../glossary#input-ids) with the appropriate special tokens.
    """
    if token_ids_1 is None:
        return self.bos_token_id + token_ids_0 + self.eos_token_id
    return self.bos_token_id + token_ids_0 + token_ids_1 + self.eos_token_id

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.encode_plus(text, text_pair=None, suffix_first=False, add_special_tokens=True, **kwargs)

Encodes the given text and text pair into tokens using the CodeLlamaTokenizerFast class.

PARAMETER DESCRIPTION
self

An instance of the CodeLlamaTokenizerFast class.

TYPE: CodeLlamaTokenizerFast

text

The input text to be encoded.

TYPE: str

text_pair

The optional second input text to be encoded. Defaults to None.

TYPE: str DEFAULT: None

suffix_first

Specifies whether the suffix should be placed first. Defaults to False.

TYPE: bool DEFAULT: False

add_special_tokens

Specifies whether to add special tokens. Defaults to True.

TYPE: bool DEFAULT: True

RETURNS DESCRIPTION
tokens

The encoded tokens. This is an instance of a class defined in the CodeLlamaTokenizerFast class.

RAISES DESCRIPTION
ValueError

If the input includes a prefix and a suffix used for the infilling task, the prefix_id, middle_id, suffix_id must all be initialized. Current values: (self.prefix_id, self.middle_id, self.suffix_id)

Source code in mindnlp\transformers\models\llama\tokenization_code_llama_fast.py
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
def encode_plus(self, text, text_pair=None, suffix_first=False, add_special_tokens=True, **kwargs):
    """
    Encodes the given text and text pair into tokens using the CodeLlamaTokenizerFast class.

    Args:
        self (CodeLlamaTokenizerFast): An instance of the CodeLlamaTokenizerFast class.
        text (str): The input text to be encoded.
        text_pair (str, optional): The optional second input text to be encoded. Defaults to None.
        suffix_first (bool, optional): Specifies whether the suffix should be placed first. Defaults to False.
        add_special_tokens (bool, optional): Specifies whether to add special tokens. Defaults to True.

    Returns:
        tokens: The encoded tokens. This is an instance of a class defined in the CodeLlamaTokenizerFast class.

    Raises:
        ValueError: If the input includes a `prefix` and a `suffix` used for the infilling task,
            the `prefix_id, middle_id, suffix_id` must all be initialized.
            Current values: (self.prefix_id, self.middle_id, self.suffix_id)
    """
    # hack to make sure the input is pre-process but outside rust
    text_pair = kwargs.pop("suffix", text_pair)
    if self.fill_token is not None and self.fill_token in text and text_pair is None:
        text, text_pair = text.split(self.fill_token)

    if text_pair is None or len(text_pair) < 1:
        return super().encode_plus(text, text_pair, add_special_tokens=add_special_tokens, **kwargs)

    if None in (self.prefix_id, self.middle_id, self.suffix_id):
        raise ValueError(
            "Then input includes a `prefix` and a `suffix` used for the infilling task,"
            " the `prefix_id, middle_id, suffix_id` must all be initialized. Current"
            f" values : {self.prefix_id, self.middle_id, self.suffix_id}"
        )

    self.set_infilling_processor(False, suffix_first=suffix_first, add_special_tokens=add_special_tokens)
    tokens = super().encode_plus(" " + text, text_pair=text_pair, add_special_tokens=True, **kwargs)
    self.set_infilling_processor(True)
    return tokens

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.save_vocabulary(save_directory, filename_prefix=None)

Save the vocabulary for a fast tokenizer.

PARAMETER DESCRIPTION
self

An instance of the CodeLlamaTokenizerFast class.

TYPE: CodeLlamaTokenizerFast

save_directory

The directory path where the vocabulary will be saved.

TYPE: str

filename_prefix

A prefix to be added to the filename. Defaults to None.

TYPE: Optional[str] DEFAULT: None

RETURNS DESCRIPTION
Tuple[str]

Tuple[str]: A tuple containing the path to the saved vocabulary file.

RAISES DESCRIPTION
ValueError

If the fast tokenizer does not have the necessary information to save the vocabulary for a slow tokenizer.

FileNotFoundError

If the save_directory does not exist.

IsADirectoryError

If the save_directory is not a directory.

Source code in mindnlp\transformers\models\llama\tokenization_code_llama_fast.py
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
    """
    Save the vocabulary for a fast tokenizer.

    Args:
        self (CodeLlamaTokenizerFast): An instance of the CodeLlamaTokenizerFast class.
        save_directory (str): The directory path where the vocabulary will be saved.
        filename_prefix (Optional[str], optional): A prefix to be added to the filename. Defaults to None.

    Returns:
        Tuple[str]: A tuple containing the path to the saved vocabulary file.

    Raises:
        ValueError: If the fast tokenizer does not have the necessary information to save the vocabulary
            for a slow tokenizer.
        FileNotFoundError: If the save_directory does not exist.
        IsADirectoryError: If the save_directory is not a directory.

    """
    if not self.can_save_slow_tokenizer:
        raise ValueError(
            "Your fast tokenizer does not have the necessary information to save the vocabulary for a slow "
            "tokenizer."
        )

    if not os.path.isdir(save_directory):
        logger.error(f"Vocabulary path ({save_directory}) should be a directory")
        return
    out_vocab_file = os.path.join(
        save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
    )

    if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file):
        copyfile(self.vocab_file, out_vocab_file)

    return (out_vocab_file,)

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.set_infilling_processor(reset, suffix_first=False, add_special_tokens=True)

Updates the normalizer to make sure the prompt format for infilling is respected. The infilling format is the following:

  • if suffix_first

    • " <PRE> <SUF>{suf} <MID> {pre}"
  • else:

    • " <PRE> {pre} <SUF>{suf} <MID>"

If reset is set to True, the normalizer and post_processor are reset to their "normal" behaviour, which is to add a prefix space for the normalizer, and add a bos_token to the input text for the post_processor.

Source code in mindnlp\transformers\models\llama\tokenization_code_llama_fast.py
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
def set_infilling_processor(self, reset, suffix_first=False, add_special_tokens=True):
    r"""
    Updates the normalizer to make sure the prompt format for `infilling` is respected. The infilling format is the
    following:

    - if suffix_first

        - `" <PRE> <SUF>{suf} <MID> {pre}"`

    - else:

        - `" <PRE> {pre} <SUF>{suf} <MID>"`

    If `reset` is set to `True`, the `normalizer` and `post_processor` are reset to their "normal" behaviour, which
    is to add a prefix space for the normalizer, and add a `bos_token` to the input text for the `post_processor`.
    """
    if reset:
        self._tokenizer.normalizer = normalizers.Sequence(
            [
                normalizers.Prepend(prepend="▁"),
                normalizers.Replace(pattern=" ", content="▁"),
            ]
        )
        self.update_post_processor()
        return

    self._tokenizer.normalizer = normalizers.Replace(pattern=" ", content="▁")
    pair = [self.bos_token] if self.add_bos_token and add_special_tokens else []
    special_tokens = [(self.bos_token, self.bos_token_id)] if self.add_bos_token and add_special_tokens else []
    if suffix_first:
        # format as " <PRE> <SUF>{suf} <MID> {pre}"
        pair += [self.prefix_token, self.suffix_token, "$B", self.middle_token, "$A"]
        special_tokens += [
            (self.prefix_token, self.prefix_id),
            (self.suffix_token, self.suffix_id),
            (self.middle_token, self.middle_id),
        ]
    else:
        # format as " <PRE> {pre} <SUF>{suf} <MID>"
        pair += [self.prefix_token, "$A", self.suffix_token, "$B", self.middle_token]
        special_tokens += [
            (self.prefix_token, self.prefix_id),
            (self.suffix_token, self.suffix_id),
            (self.middle_token, self.middle_id),
        ]

    if self.add_eos_token and add_special_tokens:
        pair += [self.eos_token]
        special_tokens += [(self.eos_token, self.eos_token_id)]
    self._tokenizer.post_processor = processors.TemplateProcessing(
        single="$A", pair=pair, special_tokens=special_tokens
    )

mindnlp.transformers.models.llama.tokenization_code_llama_fast.CodeLlamaTokenizerFast.update_post_processor()

Updates the underlying post processor with the current bos_token and eos_token.

Source code in mindnlp\transformers\models\llama\tokenization_code_llama_fast.py
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
def update_post_processor(self):
    """
    Updates the underlying post processor with the current `bos_token` and `eos_token`.
    """
    bos = self.bos_token
    bos_token_id = self.bos_token_id
    if bos is None and self.add_bos_token:
        raise ValueError("add_bos_token = True but bos_token = None")

    eos = self.eos_token
    eos_token_id = self.eos_token_id
    if eos is None and self.add_eos_token:
        raise ValueError("add_eos_token = True but eos_token = None")

    single = f"{(bos+':0 ') if self.add_bos_token else ''}$A:0{(' '+eos+':0') if self.add_eos_token else ''}"
    pair = f"{single}{(' '+bos+':1') if self.add_bos_token else ''} $B:1{(' '+eos+':1') if self.add_eos_token else ''}"

    special_tokens = []
    if self.add_bos_token:
        special_tokens.append((bos, bos_token_id))
    if self.add_eos_token:
        special_tokens.append((eos, eos_token_id))
    self._tokenizer.post_processor = processors.TemplateProcessing(
        single=single, pair=pair, special_tokens=special_tokens
    )