跳转至

auto

mindnlp.transformers.models.auto.auto_factory.get_values(model_mapping)

Source code in mindnlp\transformers\models\auto\auto_factory.py
594
595
596
597
598
599
600
601
602
def get_values(model_mapping):
    result = []
    for model in model_mapping.values():
        if isinstance(model, (list, tuple)):
            result += list(model)
        else:
            result.append(model)

    return result

mindnlp.transformers.models.auto.configuration_auto.ALL_PRETRAINED_CONFIG_ARCHIVE_MAP = _LazyLoadAllMappings(CONFIG_ARCHIVE_MAP_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.configuration_auto.CONFIG_MAPPING = _LazyConfigMapping(CONFIG_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.configuration_auto.MODEL_NAMES_MAPPING = OrderedDict([('albert', 'ALBERT'), ('align', 'ALIGN'), ('altclip', 'AltCLIP'), ('audio-spectrogram-transformer', 'Audio Spectrogram Transformer'), ('autoformer', 'Autoformer'), ('bark', 'Bark'), ('bart', 'BART'), ('barthez', 'BARThez'), ('bartpho', 'BARTpho'), ('beit', 'BEiT'), ('bert', 'BERT'), ('bert-generation', 'Bert Generation'), ('bert-japanese', 'BertJapanese'), ('bertweet', 'BERTweet'), ('bge-m3', 'BgeM3'), ('big_bird', 'BigBird'), ('bigbird_pegasus', 'BigBird-Pegasus'), ('biogpt', 'BioGpt'), ('bit', 'BiT'), ('blenderbot', 'Blenderbot'), ('blenderbot-small', 'BlenderbotSmall'), ('blip', 'BLIP'), ('blip-2', 'BLIP-2'), ('bloom', 'BLOOM'), ('bort', 'BORT'), ('bridgetower', 'BridgeTower'), ('bros', 'BROS'), ('byt5', 'ByT5'), ('camembert', 'CamemBERT'), ('canine', 'CANINE'), ('chinese_clip', 'Chinese-CLIP'), ('chatglm', 'ChatGLM'), ('chatglm2', 'ChatGLM2'), ('chatglm3', 'ChatGLM3'), ('chatglm4', 'ChatGLM4'), ('clap', 'CLAP'), ('clip', 'CLIP'), ('clip_vision_model', 'CLIPVisionModel'), ('clipseg', 'CLIPSeg'), ('clipseg_vision_model', 'CLIPSegVisionModel'), ('code_llama', 'CodeLlama'), ('codegen', 'CodeGen'), ('cohere', 'Cohere'), ('conditional_detr', 'Conditional DETR'), ('cogvlm', 'CogVLM'), ('convbert', 'ConvBERT'), ('convnext', 'ConvNeXT'), ('convnextv2', 'ConvNeXTV2'), ('cpm', 'CPM'), ('cpmant', 'CPM-Ant'), ('cpmbee', 'CPM-Bee'), ('ctrl', 'CTRL'), ('cvt', 'CvT'), ('data2vec-audio', 'Data2VecAudio'), ('data2vec-text', 'Data2VecText'), ('data2vec-vision', 'Data2VecVision'), ('dbrx', 'DBRX'), ('deberta', 'DeBERTa'), ('deberta-v2', 'DeBERTa-v2'), ('decision_transformer', 'Decision Transformer'), ('deformable_detr', 'Deformable DETR'), ('deepseek_v2', 'Deepseek_v2'), ('deit', 'DeiT'), ('depth_anything', 'Depth Anything'), ('depth_anything_v2', 'Depth Anything V2'), ('deplot', 'DePlot'), ('deta', 'DETA'), ('detr', 'DETR'), ('dialogpt', 'DialoGPT'), ('dinat', 'DiNAT'), ('dinov2', 'DINOv2'), ('distilbert', 'DistilBERT'), ('donut', 'Donut'), ('donut-swin', 'DonutSwin'), ('dit', 'DiT'), ('donut-swin', 'DonutSwin'), ('dpr', 'DPR'), ('dpt', 'DPT'), ('efficientformer', 'EfficientFormer'), ('efficientnet', 'EfficientNet'), ('electra', 'ELECTRA'), ('encodec', 'EnCodec'), ('encoder-decoder', 'Encoder decoder'), ('ernie', 'ERNIE'), ('ernie_m', 'ErnieM'), ('esm', 'ESM'), ('falcon', 'Falcon'), ('fastspeech2_conformer', 'FastSpeech2ConformerModel'), ('flan-t5', 'FLAN-T5'), ('flan-ul2', 'FLAN-UL2'), ('flaubert', 'FlauBERT'), ('flava', 'FLAVA'), ('florence2', 'Florence2'), ('fnet', 'FNet'), ('focalnet', 'FocalNet'), ('fsmt', 'FairSeq Machine-Translation'), ('funnel', 'Funnel Transformer'), ('fuyu', 'Fuyu'), ('gemma', 'Gemma'), ('gemma2', 'Gemma2'), ('git', 'GIT'), ('glpn', 'GLPN'), ('gpt-sw3', 'GPT-Sw3'), ('gpt2', 'OpenAI GPT-2'), ('gpt_bigcode', 'GPTBigCode'), ('gpt_neo', 'GPT Neo'), ('gpt_neox', 'GPT NeoX'), ('gpt_neox_japanese', 'GPT NeoX Japanese'), ('gpt_pangu', 'GPTPangu'), ('gptj', 'GPT-J'), ('gptsan-japanese', 'GPTSAN-japanese'), ('graphormer', 'Graphormer'), ('groupvit', 'GroupViT'), ('herbert', 'HerBERT'), ('hubert', 'Hubert'), ('ibert', 'I-BERT'), ('idefics', 'IDEFICS'), ('imagegpt', 'ImageGPT'), ('informer', 'Informer'), ('instructblip', 'InstructBLIP'), ('jukebox', 'Jukebox'), ('jamba', 'Jamba'), ('jetmoe', 'JetMoE'), ('kosmos-2', 'KOSMOS-2'), ('layoutlm', 'LayoutLM'), ('layoutlmv2', 'LayoutLMv2'), ('layoutlmv3', 'LayoutLMv3'), ('layoutxlm', 'LayoutXLM'), ('led', 'LED'), ('levit', 'LeViT'), ('lilt', 'LiLT'), ('llama', 'LLaMA'), ('llama2', 'Llama2'), ('llava', 'LLaVa'), ('llava_next', 'LLaVA-NeXT'), ('longformer', 'Longformer'), ('longt5', 'LongT5'), ('luke', 'LUKE'), ('lxmert', 'LXMERT'), ('m2m_100', 'M2M100'), ('mamba', 'Mamba'), ('marian', 'Marian'), ('markuplm', 'MarkupLM'), ('mask2former', 'Mask2Former'), ('maskformer', 'MaskFormer'), ('maskformer-swin', 'MaskFormerSwin'), ('matcha', 'MatCha'), ('mbart', 'mBART'), ('mbart50', 'mBART-50'), ('mctct', 'M-CTC-T'), ('mega', 'MEGA'), ('megatron-bert', 'Megatron-BERT'), ('megatron_gpt2', 'Megatron-GPT2'), ('mgp-str', 'MGP-STR'), ('minicpm', 'MiniCPM'), ('mistral', 'Mistral'), ('mixtral', 'Mixtral'), ('mluke', 'mLUKE'), ('mms', 'MMS'), ('mobilebert', 'MobileBERT'), ('mobilenet_v1', 'MobileNetV1'), ('mobilenet_v2', 'MobileNetV2'), ('mobilevit', 'MobileViT'), ('mobilevitv2', 'MobileViTV2'), ('mpnet', 'MPNet'), ('mpt', 'MPT'), ('mra', 'MRA'), ('mt5', 'MT5'), ('musicgen', 'MusicGen'), ('musicgen_melody', 'MusicGen Melody'), ('mvp', 'MVP'), ('nat', 'NAT'), ('nezha', 'Nezha'), ('nllb', 'NLLB'), ('nllb-moe', 'NLLB-MOE'), ('nougat', 'Nougat'), ('nystromformer', 'Nyströmformer'), ('olmo', 'OLMo'), ('openelm', 'OpenELM'), ('oneformer', 'OneFormer'), ('open-llama', 'OpenLlama'), ('openai-gpt', 'OpenAI GPT'), ('opt', 'OPT'), ('owlv2', 'OWLv2'), ('owlvit', 'OWL-ViT'), ('patchtst', 'PatchTST'), ('pegasus', 'Pegasus'), ('pegasus_x', 'PEGASUS-X'), ('perceiver', 'Perceiver'), ('persimmon', 'Persimmon'), ('phi', 'Phi'), ('phi3', 'Phi3'), ('phobert', 'PhoBERT'), ('pix2struct', 'Pix2Struct'), ('plbart', 'PLBart'), ('poolformer', 'PoolFormer'), ('pop2piano', 'Pop2Piano'), ('prophetnet', 'ProphetNet'), ('pvt', 'PVT'), ('qdqbert', 'QDQBert'), ('qwen2', 'Qwen2'), ('qwen2_moe', 'Qwen2MoE'), ('rag', 'RAG'), ('realm', 'REALM'), ('reformer', 'Reformer'), ('regnet', 'RegNet'), ('rembert', 'RemBERT'), ('resnet', 'ResNet'), ('roberta', 'RoBERTa'), ('roberta-prelayernorm', 'RoBERTa-PreLayerNorm'), ('roc_bert', 'RoCBert'), ('roformer', 'RoFormer'), ('rwkv', 'RWKV'), ('sam', 'SAM'), ('seamless_m4t', 'SeamlessM4T'), ('segformer', 'SegFormer'), ('sew', 'SEW'), ('sew-d', 'SEW-D'), ('speech-encoder-decoder', 'Speech Encoder decoder'), ('speech_to_text', 'Speech2Text'), ('speech_to_text_2', 'Speech2Text2'), ('speecht5', 'SpeechT5'), ('splinter', 'Splinter'), ('squeezebert', 'SqueezeBERT'), ('stablelm', 'StableLm'), ('starcoder2', 'Starcoder2'), ('swiftformer', 'SwiftFormer'), ('swin', 'Swin Transformer'), ('swin2sr', 'Swin2SR'), ('swinv2', 'Swin Transformer V2'), ('switch_transformers', 'SwitchTransformers'), ('t5', 'T5'), ('t5v1.1', 'T5v1.1'), ('table-transformer', 'Table Transformer'), ('tapas', 'TAPAS'), ('tapex', 'TAPEX'), ('time_series_transformer', 'Time Series Transformer'), ('timesformer', 'TimeSformer'), ('timm_backbone', 'TimmBackbone'), ('trajectory_transformer', 'Trajectory Transformer'), ('transfo-xl', 'Transformer-XL'), ('trocr', 'TrOCR'), ('tvlt', 'TVLT'), ('ul2', 'UL2'), ('udop', 'UDOP'), ('umt5', 'UMT5'), ('unispeech', 'UniSpeech'), ('unispeech-sat', 'UniSpeechSat'), ('univnet', 'UnivNet'), ('upernet', 'UPerNet'), ('van', 'VAN'), ('videomae', 'VideoMAE'), ('vilt', 'ViLT'), ('vipllava', 'VipLlava'), ('vision-encoder-decoder', 'Vision Encoder decoder'), ('vision-text-dual-encoder', 'VisionTextDualEncoder'), ('visual_bert', 'VisualBERT'), ('vit', 'ViT'), ('vit_hybrid', 'ViT Hybrid'), ('vit_mae', 'ViTMAE'), ('vit_msn', 'ViTMSN'), ('vitdet', 'VitDet'), ('vitmatte', 'ViTMatte'), ('vits', 'VITS'), ('vivit', 'ViViT'), ('wav2vec2', 'Wav2Vec2'), ('wav2vec2-bert', 'Wav2Vec2-BERT'), ('wav2vec2-conformer', 'Wav2Vec2-Conformer'), ('wav2vec2_phoneme', 'Wav2Vec2Phoneme'), ('wavlm', 'WavLM'), ('whisper', 'Whisper'), ('xclip', 'X-CLIP'), ('xglm', 'XGLM'), ('xlm', 'XLM'), ('xlm-prophetnet', 'XLM-ProphetNet'), ('xlm-roberta', 'XLM-RoBERTa'), ('xlm-roberta-xl', 'XLM-RoBERTa-XL'), ('xlm-v', 'XLM-V'), ('xlnet', 'XLNet'), ('xls_r', 'XLS-R'), ('xlsr_wav2vec2', 'XLSR-Wav2Vec2'), ('xmod', 'X-MOD'), ('yolos', 'YOLOS'), ('yoso', 'YOSO')]) module-attribute

mindnlp.transformers.models.auto.configuration_auto.AutoConfig

This is a generic configuration class that will be instantiated as one of the configuration classes of the library when created with the [~AutoConfig.from_pretrained] class method.

This class cannot be instantiated directly using __init__() (throws an error).

Source code in mindnlp\transformers\models\auto\configuration_auto.py
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
class AutoConfig:
    r"""
    This is a generic configuration class that will be instantiated as one of the configuration classes of the library
    when created with the [`~AutoConfig.from_pretrained`] class method.

    This class cannot be instantiated directly using `__init__()` (throws an error).
    """

    def __init__(self):
        """
        Initialize AutoConfig.

        Args:
            self: The instance of the AutoConfig class.
                It is automatically passed when the method is called.

                - Purpose: Represents the instance of the AutoConfig class.
                - Restrictions: None.

        Returns:
            None.

        Raises:
            EnvironmentError:
                If the AutoConfig is instantiated directly using the `__init__` method,
                it raises an EnvironmentError with the message
                'AutoConfig is designed to be instantiated using the `
                AutoConfig.from_pretrained(pretrained_model_name_or_path)` method.'.
        """
        raise EnvironmentError(
            "AutoConfig is designed to be instantiated "
            "using the `AutoConfig.from_pretrained(pretrained_model_name_or_path)` method."
        )

    @classmethod
    def for_model(cls, model_type: str, *args, **kwargs):
        """
                This class method 'for_model' in the 'AutoConfig' class is used to instantiate a configuration class based on the provided model type.

        Args:
            cls (class): The class itself, automatically passed as the first parameter.
            model_type (str): A string representing the type of the model for which the configuration class needs to be instantiated.
                It must be a key within the CONFIG_MAPPING dictionary.

        Returns:
            None: This method does not return any value directly.
                It instantiates and returns an instance of the appropriate configuration class based on the model type.

        Raises:
            ValueError:
                Raised when the provided 'model_type' is not recognized or is not found as a key in the CONFIG_MAPPING dictionary.
                The exception message indicates the unrecognized model identifier and lists all valid model identifiers
                available in the CONFIG_MAPPING dictionary.
        """
        if model_type in CONFIG_MAPPING:
            config_class = CONFIG_MAPPING[model_type]
            return config_class(*args, **kwargs)
        raise ValueError(
            f"Unrecognized model identifier: {model_type}. Should contain one of {', '.join(CONFIG_MAPPING.keys())}"
        )

    @classmethod
    @replace_list_option_in_docstrings()
    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
        r"""
        Instantiate one of the configuration classes of the library from a pretrained model configuration.

        The configuration class to instantiate is selected based on the `model_type` property of the config object that
        is loaded, or when it's missing, by falling back to using pattern matching on `pretrained_model_name_or_path`:

        List options

        Args:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                Can be either:

                - A string, the *model id* of a pretrained model configuration hosted inside a model repo on
                hf-mirror.com. Valid model ids can be located at the root-level, like `bert-base-uncased`, or
                namespaced under a user or organization name, like `dbmdz/bert-base-german-cased`.
                - A path to a *directory* containing a configuration file saved using the
                [`~PretrainedConfig.save_pretrained`] method, or the [`~PreTrainedModel.save_pretrained`] method,
                e.g., `./my_model_directory/`.
                - A path or url to a saved configuration JSON *file*, e.g.,
                `./my_model_directory/configuration.json`.
            cache_dir (`str` or `os.PathLike`, *optional*):
                Path to a directory in which a downloaded pretrained model configuration should be cached if the
                standard cache should not be used.
            force_download (`bool`, *optional*, defaults to `False`):
                Whether or not to force the (re-)download the model weights and configuration files and override the
                cached versions if they exist.
            resume_download (`bool`, *optional*, defaults to `False`):
                Whether or not to delete incompletely received files. Will attempt to resume the download if such a
                file exists.
            proxies (`Dict[str, str]`, *optional*):
                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
                'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
            revision (`str`, *optional*, defaults to `"main"`):
                The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
                git-based system for storing models and other artifacts on hf-mirror.com, so `revision` can be any
                identifier allowed by git.
            return_unused_kwargs (`bool`, *optional*, defaults to `False`):
                If `False`, then this function returns just the final configuration object.
                If `True`, then this functions returns a `Tuple(config, unused_kwargs)` where *unused_kwargs* is a
                dictionary consisting of the key/value pairs whose keys are not configuration attributes: i.e., the
                part of `kwargs` which has not been used to update `config` and is otherwise ignored.
            trust_remote_code (`bool`, *optional*, defaults to `False`):
                Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
                should only be set to `True` for repositories you trust and in which you have read the code, as it will
                execute code present on the Hub on your local machine.
            kwargs(additional keyword arguments, *optional*):
                The values in kwargs of any keys which are configuration attributes will be used to override the loaded
                values. Behavior concerning key/value pairs whose keys are *not* configuration attributes is controlled
                by the `return_unused_kwargs` keyword parameter.

        Example:
            ```python
            >>> from transformers import AutoConfig
            ...
            >>> # Download configuration from hf-mirror.com and cache.
            >>> config = AutoConfig.from_pretrained("bert-base-uncased")
            ...
            >>> # Download configuration from hf-mirror.com (user-uploaded) and cache.
            >>> config = AutoConfig.from_pretrained("dbmdz/bert-base-german-cased")
            ...
            >>> # If configuration file is in a directory (e.g., was saved using *save_pretrained('./test/saved_model/')*).
            >>> config = AutoConfig.from_pretrained("./test/bert_saved_model/")
            ...
            >>> # Load a specific configuration file.
            >>> config = AutoConfig.from_pretrained("./test/bert_saved_model/my_configuration.json")
            ...
            >>> # Change some config attributes when loading a pretrained config.
            >>> config = AutoConfig.from_pretrained("bert-base-uncased", output_attentions=True, foo=False)
            >>> config.output_attentions
            True
            >>> config, unused_kwargs = AutoConfig.from_pretrained(
            ...     "bert-base-uncased", output_attentions=True, foo=False, return_unused_kwargs=True
            ... )
            >>> config.output_attentions
            True
            >>> unused_kwargs
            {'foo': False}
            ```
        """
        kwargs["name_or_path"] = pretrained_model_name_or_path

        config_dict, unused_kwargs = PretrainedConfig.get_config_dict(
            pretrained_model_name_or_path, **kwargs
        )

        fix_chatglm_name(config_dict)

        if "model_type" in config_dict:
            config_class = CONFIG_MAPPING[config_dict["model_type"]]
            return config_class.from_dict(config_dict, **unused_kwargs)
        # Fallback: use pattern matching on the string.
        # We go from longer names to shorter names to catch roberta before bert (for instance)
        for pattern in sorted(CONFIG_MAPPING.keys(), key=len, reverse=True):
            if pattern in str(pretrained_model_name_or_path).lower():
                return CONFIG_MAPPING[pattern].from_dict(config_dict, **unused_kwargs)

        raise ValueError(
            f"Unrecognized model in {pretrained_model_name_or_path}. "
            f"Should have a `model_type` key in its {CONFIG_NAME}, or contain one of the following strings "
            f"in its name: {', '.join(CONFIG_MAPPING.keys())}"
        )

    @staticmethod
    def register(model_type, config, exist_ok=False):
        """
        Register a new configuration for this class.

        Args:
            model_type (`str`): The model type like "bert" or "gpt".
            config ([`PretrainedConfig`]): The config to register.
        """
        if issubclass(config, PretrainedConfig) and config.model_type != model_type:
            raise ValueError(
                "The config you are passing has a `model_type` attribute that is not consistent with the model type "
                f"you passed (config has {config.model_type} and you passed {model_type}. Fix one of those so they "
                "match!"
            )
        CONFIG_MAPPING.register(model_type, config, exist_ok=exist_ok)

mindnlp.transformers.models.auto.configuration_auto.AutoConfig.__init__()

Initialize AutoConfig.

PARAMETER DESCRIPTION
self

The instance of the AutoConfig class. It is automatically passed when the method is called.

  • Purpose: Represents the instance of the AutoConfig class.
  • Restrictions: None.

RETURNS DESCRIPTION

None.

RAISES DESCRIPTION
EnvironmentError

If the AutoConfig is instantiated directly using the __init__ method, it raises an EnvironmentError with the message 'AutoConfig is designed to be instantiated using the AutoConfig.from_pretrained(pretrained_model_name_or_path) method.'.

Source code in mindnlp\transformers\models\auto\configuration_auto.py
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
def __init__(self):
    """
    Initialize AutoConfig.

    Args:
        self: The instance of the AutoConfig class.
            It is automatically passed when the method is called.

            - Purpose: Represents the instance of the AutoConfig class.
            - Restrictions: None.

    Returns:
        None.

    Raises:
        EnvironmentError:
            If the AutoConfig is instantiated directly using the `__init__` method,
            it raises an EnvironmentError with the message
            'AutoConfig is designed to be instantiated using the `
            AutoConfig.from_pretrained(pretrained_model_name_or_path)` method.'.
    """
    raise EnvironmentError(
        "AutoConfig is designed to be instantiated "
        "using the `AutoConfig.from_pretrained(pretrained_model_name_or_path)` method."
    )

mindnlp.transformers.models.auto.configuration_auto.AutoConfig.for_model(model_type, *args, **kwargs) classmethod

    This class method 'for_model' in the 'AutoConfig' class is used to instantiate a configuration class based on the provided model type.
PARAMETER DESCRIPTION
cls

The class itself, automatically passed as the first parameter.

TYPE: class

model_type

A string representing the type of the model for which the configuration class needs to be instantiated. It must be a key within the CONFIG_MAPPING dictionary.

TYPE: str

RETURNS DESCRIPTION
None

This method does not return any value directly. It instantiates and returns an instance of the appropriate configuration class based on the model type.

RAISES DESCRIPTION
ValueError

Raised when the provided 'model_type' is not recognized or is not found as a key in the CONFIG_MAPPING dictionary. The exception message indicates the unrecognized model identifier and lists all valid model identifiers available in the CONFIG_MAPPING dictionary.

Source code in mindnlp\transformers\models\auto\configuration_auto.py
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
@classmethod
def for_model(cls, model_type: str, *args, **kwargs):
    """
            This class method 'for_model' in the 'AutoConfig' class is used to instantiate a configuration class based on the provided model type.

    Args:
        cls (class): The class itself, automatically passed as the first parameter.
        model_type (str): A string representing the type of the model for which the configuration class needs to be instantiated.
            It must be a key within the CONFIG_MAPPING dictionary.

    Returns:
        None: This method does not return any value directly.
            It instantiates and returns an instance of the appropriate configuration class based on the model type.

    Raises:
        ValueError:
            Raised when the provided 'model_type' is not recognized or is not found as a key in the CONFIG_MAPPING dictionary.
            The exception message indicates the unrecognized model identifier and lists all valid model identifiers
            available in the CONFIG_MAPPING dictionary.
    """
    if model_type in CONFIG_MAPPING:
        config_class = CONFIG_MAPPING[model_type]
        return config_class(*args, **kwargs)
    raise ValueError(
        f"Unrecognized model identifier: {model_type}. Should contain one of {', '.join(CONFIG_MAPPING.keys())}"
    )

mindnlp.transformers.models.auto.configuration_auto.AutoConfig.from_pretrained(pretrained_model_name_or_path, **kwargs) classmethod

Instantiate one of the configuration classes of the library from a pretrained model configuration.

The configuration class to instantiate is selected based on the model_type property of the config object that is loaded, or when it's missing, by falling back to using pattern matching on pretrained_model_name_or_path:

List options

PARAMETER DESCRIPTION
pretrained_model_name_or_path

Can be either:

  • A string, the model id of a pretrained model configuration hosted inside a model repo on hf-mirror.com. Valid model ids can be located at the root-level, like bert-base-uncased, or namespaced under a user or organization name, like dbmdz/bert-base-german-cased.
  • A path to a directory containing a configuration file saved using the [~PretrainedConfig.save_pretrained] method, or the [~PreTrainedModel.save_pretrained] method, e.g., ./my_model_directory/.
  • A path or url to a saved configuration JSON file, e.g., ./my_model_directory/configuration.json.

TYPE: `str` or `os.PathLike`

cache_dir

Path to a directory in which a downloaded pretrained model configuration should be cached if the standard cache should not be used.

TYPE: `str` or `os.PathLike`, *optional*

force_download

Whether or not to force the (re-)download the model weights and configuration files and override the cached versions if they exist.

TYPE: `bool`, *optional*, defaults to `False`

resume_download

Whether or not to delete incompletely received files. Will attempt to resume the download if such a file exists.

TYPE: `bool`, *optional*, defaults to `False`

proxies

A dictionary of proxy servers to use by protocol or endpoint, e.g., {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}. The proxies are used on each request.

TYPE: `Dict[str, str]`, *optional*

revision

The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a git-based system for storing models and other artifacts on hf-mirror.com, so revision can be any identifier allowed by git.

TYPE: `str`, *optional*, defaults to `"main"`

return_unused_kwargs

If False, then this function returns just the final configuration object. If True, then this functions returns a Tuple(config, unused_kwargs) where unused_kwargs is a dictionary consisting of the key/value pairs whose keys are not configuration attributes: i.e., the part of kwargs which has not been used to update config and is otherwise ignored.

TYPE: `bool`, *optional*, defaults to `False`

trust_remote_code

Whether or not to allow for custom models defined on the Hub in their own modeling files. This option should only be set to True for repositories you trust and in which you have read the code, as it will execute code present on the Hub on your local machine.

TYPE: `bool`, *optional*, defaults to `False`

kwargs(additional

The values in kwargs of any keys which are configuration attributes will be used to override the loaded values. Behavior concerning key/value pairs whose keys are not configuration attributes is controlled by the return_unused_kwargs keyword parameter.

TYPE: keyword arguments, *optional*

Example
>>> from transformers import AutoConfig
...
>>> # Download configuration from hf-mirror.com and cache.
>>> config = AutoConfig.from_pretrained("bert-base-uncased")
...
>>> # Download configuration from hf-mirror.com (user-uploaded) and cache.
>>> config = AutoConfig.from_pretrained("dbmdz/bert-base-german-cased")
...
>>> # If configuration file is in a directory (e.g., was saved using *save_pretrained('./test/saved_model/')*).
>>> config = AutoConfig.from_pretrained("./test/bert_saved_model/")
...
>>> # Load a specific configuration file.
>>> config = AutoConfig.from_pretrained("./test/bert_saved_model/my_configuration.json")
...
>>> # Change some config attributes when loading a pretrained config.
>>> config = AutoConfig.from_pretrained("bert-base-uncased", output_attentions=True, foo=False)
>>> config.output_attentions
True
>>> config, unused_kwargs = AutoConfig.from_pretrained(
...     "bert-base-uncased", output_attentions=True, foo=False, return_unused_kwargs=True
... )
>>> config.output_attentions
True
>>> unused_kwargs
{'foo': False}
Source code in mindnlp\transformers\models\auto\configuration_auto.py
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
@classmethod
@replace_list_option_in_docstrings()
def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
    r"""
    Instantiate one of the configuration classes of the library from a pretrained model configuration.

    The configuration class to instantiate is selected based on the `model_type` property of the config object that
    is loaded, or when it's missing, by falling back to using pattern matching on `pretrained_model_name_or_path`:

    List options

    Args:
        pretrained_model_name_or_path (`str` or `os.PathLike`):
            Can be either:

            - A string, the *model id* of a pretrained model configuration hosted inside a model repo on
            hf-mirror.com. Valid model ids can be located at the root-level, like `bert-base-uncased`, or
            namespaced under a user or organization name, like `dbmdz/bert-base-german-cased`.
            - A path to a *directory* containing a configuration file saved using the
            [`~PretrainedConfig.save_pretrained`] method, or the [`~PreTrainedModel.save_pretrained`] method,
            e.g., `./my_model_directory/`.
            - A path or url to a saved configuration JSON *file*, e.g.,
            `./my_model_directory/configuration.json`.
        cache_dir (`str` or `os.PathLike`, *optional*):
            Path to a directory in which a downloaded pretrained model configuration should be cached if the
            standard cache should not be used.
        force_download (`bool`, *optional*, defaults to `False`):
            Whether or not to force the (re-)download the model weights and configuration files and override the
            cached versions if they exist.
        resume_download (`bool`, *optional*, defaults to `False`):
            Whether or not to delete incompletely received files. Will attempt to resume the download if such a
            file exists.
        proxies (`Dict[str, str]`, *optional*):
            A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
            'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
        revision (`str`, *optional*, defaults to `"main"`):
            The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
            git-based system for storing models and other artifacts on hf-mirror.com, so `revision` can be any
            identifier allowed by git.
        return_unused_kwargs (`bool`, *optional*, defaults to `False`):
            If `False`, then this function returns just the final configuration object.
            If `True`, then this functions returns a `Tuple(config, unused_kwargs)` where *unused_kwargs* is a
            dictionary consisting of the key/value pairs whose keys are not configuration attributes: i.e., the
            part of `kwargs` which has not been used to update `config` and is otherwise ignored.
        trust_remote_code (`bool`, *optional*, defaults to `False`):
            Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
            should only be set to `True` for repositories you trust and in which you have read the code, as it will
            execute code present on the Hub on your local machine.
        kwargs(additional keyword arguments, *optional*):
            The values in kwargs of any keys which are configuration attributes will be used to override the loaded
            values. Behavior concerning key/value pairs whose keys are *not* configuration attributes is controlled
            by the `return_unused_kwargs` keyword parameter.

    Example:
        ```python
        >>> from transformers import AutoConfig
        ...
        >>> # Download configuration from hf-mirror.com and cache.
        >>> config = AutoConfig.from_pretrained("bert-base-uncased")
        ...
        >>> # Download configuration from hf-mirror.com (user-uploaded) and cache.
        >>> config = AutoConfig.from_pretrained("dbmdz/bert-base-german-cased")
        ...
        >>> # If configuration file is in a directory (e.g., was saved using *save_pretrained('./test/saved_model/')*).
        >>> config = AutoConfig.from_pretrained("./test/bert_saved_model/")
        ...
        >>> # Load a specific configuration file.
        >>> config = AutoConfig.from_pretrained("./test/bert_saved_model/my_configuration.json")
        ...
        >>> # Change some config attributes when loading a pretrained config.
        >>> config = AutoConfig.from_pretrained("bert-base-uncased", output_attentions=True, foo=False)
        >>> config.output_attentions
        True
        >>> config, unused_kwargs = AutoConfig.from_pretrained(
        ...     "bert-base-uncased", output_attentions=True, foo=False, return_unused_kwargs=True
        ... )
        >>> config.output_attentions
        True
        >>> unused_kwargs
        {'foo': False}
        ```
    """
    kwargs["name_or_path"] = pretrained_model_name_or_path

    config_dict, unused_kwargs = PretrainedConfig.get_config_dict(
        pretrained_model_name_or_path, **kwargs
    )

    fix_chatglm_name(config_dict)

    if "model_type" in config_dict:
        config_class = CONFIG_MAPPING[config_dict["model_type"]]
        return config_class.from_dict(config_dict, **unused_kwargs)
    # Fallback: use pattern matching on the string.
    # We go from longer names to shorter names to catch roberta before bert (for instance)
    for pattern in sorted(CONFIG_MAPPING.keys(), key=len, reverse=True):
        if pattern in str(pretrained_model_name_or_path).lower():
            return CONFIG_MAPPING[pattern].from_dict(config_dict, **unused_kwargs)

    raise ValueError(
        f"Unrecognized model in {pretrained_model_name_or_path}. "
        f"Should have a `model_type` key in its {CONFIG_NAME}, or contain one of the following strings "
        f"in its name: {', '.join(CONFIG_MAPPING.keys())}"
    )

mindnlp.transformers.models.auto.configuration_auto.AutoConfig.register(model_type, config, exist_ok=False) staticmethod

Register a new configuration for this class.

PARAMETER DESCRIPTION
model_type

The model type like "bert" or "gpt".

TYPE: `str`

config

The config to register.

TYPE: [`PretrainedConfig`]

Source code in mindnlp\transformers\models\auto\configuration_auto.py
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
@staticmethod
def register(model_type, config, exist_ok=False):
    """
    Register a new configuration for this class.

    Args:
        model_type (`str`): The model type like "bert" or "gpt".
        config ([`PretrainedConfig`]): The config to register.
    """
    if issubclass(config, PretrainedConfig) and config.model_type != model_type:
        raise ValueError(
            "The config you are passing has a `model_type` attribute that is not consistent with the model type "
            f"you passed (config has {config.model_type} and you passed {model_type}. Fix one of those so they "
            "match!"
        )
    CONFIG_MAPPING.register(model_type, config, exist_ok=exist_ok)

mindnlp.transformers.models.auto.tokenization_auto.TOKENIZER_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, TOKENIZER_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.tokenization_auto.AutoTokenizer

This is a generic tokenizer class that will be instantiated as one of the tokenizer classes of the library when created with the [AutoTokenizer.from_pretrained] class method.

This class cannot be instantiated directly using __init__() (throws an error).

Source code in mindnlp\transformers\models\auto\tokenization_auto.py
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
class AutoTokenizer:
    r"""
    This is a generic tokenizer class that will be instantiated as one of the tokenizer classes of the library when
    created with the [`AutoTokenizer.from_pretrained`] class method.

    This class cannot be instantiated directly using `__init__()` (throws an error).
    """

    def __init__(self):
        raise EnvironmentError(
            "AutoTokenizer is designed to be instantiated "
            "using the `AutoTokenizer.from_pretrained(pretrained_model_name_or_path)` method."
        )

    @classmethod
    @replace_list_option_in_docstrings(TOKENIZER_MAPPING_NAMES)
    def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs):
        r"""
        Instantiate one of the tokenizer classes of the library from a pretrained model vocabulary.

        The tokenizer class to instantiate is selected based on the `model_type` property of the config object (either
        passed as an argument or loaded from `pretrained_model_name_or_path` if possible), or when it's missing, by
        falling back to using pattern matching on `pretrained_model_name_or_path`:

        List options

        Params:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                Can be either:

                    - A string, the *model id* of a predefined tokenizer hosted inside a model repo on huggingface.co.
                    - A path to a *directory* containing vocabulary files required by the tokenizer, for instance saved
                      using the [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`.
                    - A path or url to a single saved vocabulary file if and only if the tokenizer only requires a
                      single vocabulary file (like Bert or XLNet), e.g.: `./my_model_directory/vocab.txt`. (Not
                      applicable to all derived classes)
            inputs (additional positional arguments, *optional*):
                Will be passed along to the Tokenizer `__init__()` method.
            config ([`PretrainedConfig`], *optional*)
                The configuration object used to determine the tokenizer class to instantiate.
            cache_dir (`str` or `os.PathLike`, *optional*):
                Path to a directory in which a downloaded pretrained model configuration should be cached if the
                standard cache should not be used.
            force_download (`bool`, *optional*, defaults to `False`):
                Whether or not to force the (re-)download the model weights and configuration files and override the
                cached versions if they exist.
            resume_download:
                Deprecated and ignored. All downloads are now resumed by default when possible.
                Will be removed in v5 of Transformers.
            proxies (`Dict[str, str]`, *optional*):
                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
                'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
            revision (`str`, *optional*, defaults to `"main"`):
                The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
                git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
                identifier allowed by git.
            subfolder (`str`, *optional*):
                In case the relevant files are located inside a subfolder of the model repo on huggingface.co (e.g. for
                facebook/rag-token-base), specify it here.
            use_fast (`bool`, *optional*, defaults to `True`):
                Use a [fast Rust-based tokenizer](https://huggingface.co/docs/tokenizers/index) if it is supported for
                a given model. If a fast tokenizer is not available for a given model, a normal Python-based tokenizer
                is returned instead.
            tokenizer_type (`str`, *optional*):
                Tokenizer type to be loaded.
            trust_remote_code (`bool`, *optional*, defaults to `False`):
                Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
                should only be set to `True` for repositories you trust and in which you have read the code, as it will
                execute code present on the Hub on your local machine.
            kwargs (additional keyword arguments, *optional*):
                Will be passed to the Tokenizer `__init__()` method. Can be used to set special tokens like
                `bos_token`, `eos_token`, `unk_token`, `sep_token`, `pad_token`, `cls_token`, `mask_token`,
                `additional_special_tokens`. See parameters in the `__init__()` for more details.

        Examples:

        ```python
        >>> from transformers import AutoTokenizer

        >>> # Download vocabulary from huggingface.co and cache.
        >>> tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")

        >>> # Download vocabulary from huggingface.co (user-uploaded) and cache.
        >>> tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-german-cased")

        >>> # If vocabulary files are in a directory (e.g. tokenizer was saved using *save_pretrained('./test/saved_model/')*)
        >>> # tokenizer = AutoTokenizer.from_pretrained("./test/bert_saved_model/")

        >>> # Download vocabulary from huggingface.co and define model-specific arguments
        >>> tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-base", add_prefix_space=True)
        ```"""
        use_auth_token = kwargs.pop("use_auth_token", None)
        if use_auth_token is not None:
            warnings.warn(
                "The `use_auth_token` argument is deprecated. Please use `token` instead.",
                FutureWarning,
            )
            if kwargs.get("token", None) is not None:
                raise ValueError(
                    "`token` and `use_auth_token` are both specified. Please set only the argument `token`."
                )
            kwargs["token"] = use_auth_token

        config = kwargs.pop("config", None)
        kwargs["_from_auto"] = True

        use_fast = kwargs.pop("use_fast", True)
        tokenizer_type = kwargs.pop("tokenizer_type", None)
        trust_remote_code = kwargs.pop("trust_remote_code", None)
        gguf_file = kwargs.get("gguf_file", None)

        # First, let's see whether the tokenizer_type is passed so that we can leverage it
        if tokenizer_type is not None:
            tokenizer_class = None
            tokenizer_class_tuple = TOKENIZER_MAPPING_NAMES.get(tokenizer_type, None)

            if tokenizer_class_tuple is None:
                raise ValueError(
                    f"Passed `tokenizer_type` {tokenizer_type} does not exist. `tokenizer_type` should be one of "
                    f"{', '.join(c for c in TOKENIZER_MAPPING_NAMES.keys())}."
                )

            tokenizer_class_name, tokenizer_fast_class_name = tokenizer_class_tuple

            if use_fast:
                if tokenizer_fast_class_name is not None:
                    tokenizer_class = tokenizer_class_from_name(tokenizer_fast_class_name)
                else:
                    logger.warning(
                        "`use_fast` is set to `True` but the tokenizer class does not have a fast version. "
                        " Falling back to the slow version."
                    )
            if tokenizer_class is None:
                tokenizer_class = tokenizer_class_from_name(tokenizer_class_name)

            if tokenizer_class is None:
                raise ValueError(f"Tokenizer class {tokenizer_class_name} is not currently imported.")

            return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)

        # Next, let's try to use the tokenizer_config file to get the tokenizer class.
        tokenizer_config = get_tokenizer_config(pretrained_model_name_or_path, **kwargs)
        if "_commit_hash" in tokenizer_config:
            kwargs["_commit_hash"] = tokenizer_config["_commit_hash"]
        config_tokenizer_class = tokenizer_config.get("tokenizer_class")
        tokenizer_auto_map = None
        if "auto_map" in tokenizer_config:
            if isinstance(tokenizer_config["auto_map"], (tuple, list)):
                # Legacy format for dynamic tokenizers
                tokenizer_auto_map = tokenizer_config["auto_map"]
            else:
                tokenizer_auto_map = tokenizer_config["auto_map"].get("AutoTokenizer", None)

        # If that did not work, let's try to use the config.
        if config_tokenizer_class is None:
            if not isinstance(config, PretrainedConfig):
                # if gguf_file:
                #     gguf_path = cached_file(pretrained_model_name_or_path, gguf_file, **kwargs)
                #     config_dict = load_gguf_checkpoint(gguf_path, return_tensors=False)["config"]
                #     config = AutoConfig.for_model(**config_dict)
                # else:
                config = AutoConfig.from_pretrained(
                    pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
                )
            config_tokenizer_class = config.tokenizer_class
            if hasattr(config, "auto_map") and "AutoTokenizer" in config.auto_map:
                tokenizer_auto_map = config.auto_map["AutoTokenizer"]

        has_remote_code = tokenizer_auto_map is not None
        has_local_code = type(config) in TOKENIZER_MAPPING or (
            config_tokenizer_class is not None
            and (
                tokenizer_class_from_name(config_tokenizer_class) is not None
                or tokenizer_class_from_name(config_tokenizer_class + "Fast") is not None
            )
        )
        # trust_remote_code = resolve_trust_remote_code(
        #     trust_remote_code, pretrained_model_name_or_path, has_local_code, has_remote_code
        # )

        # if has_remote_code and trust_remote_code:
        #     if use_fast and tokenizer_auto_map[1] is not None:
        #         class_ref = tokenizer_auto_map[1]
        #     else:
        #         class_ref = tokenizer_auto_map[0]
        #     tokenizer_class = get_class_from_dynamic_module(class_ref, pretrained_model_name_or_path, **kwargs)
        #     _ = kwargs.pop("code_revision", None)
        #     if os.path.isdir(pretrained_model_name_or_path):
        #         tokenizer_class.register_for_auto_class()
        #     return tokenizer_class.from_pretrained(
        #         pretrained_model_name_or_path, *inputs, trust_remote_code=trust_remote_code, **kwargs
        #     )
        if config_tokenizer_class is not None:
            tokenizer_class = None
            if use_fast and not config_tokenizer_class.endswith("Fast"):
                tokenizer_class_candidate = f"{config_tokenizer_class}Fast"
                tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate)
            if tokenizer_class is None:
                tokenizer_class_candidate = config_tokenizer_class
                tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate)
            if tokenizer_class is None:
                raise ValueError(
                    f"Tokenizer class {tokenizer_class_candidate} does not exist or is not currently imported."
                )
            return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)

        # Otherwise we have to be creative.
        # if model is an encoder decoder, the encoder tokenizer class is used by default
        if isinstance(config, EncoderDecoderConfig):
            if type(config.decoder) is not type(config.encoder):  # noqa: E721
                logger.warning(
                    f"The encoder model config class: {config.encoder.__class__} is different from the decoder model "
                    f"config class: {config.decoder.__class__}. It is not recommended to use the "
                    "`AutoTokenizer.from_pretrained()` method in this case. Please use the encoder and decoder "
                    "specific tokenizer classes."
                )
            config = config.encoder

        model_type = config_class_to_model_type(type(config).__name__)
        if model_type is not None:
            tokenizer_class_py, tokenizer_class_fast = TOKENIZER_MAPPING[type(config)]

            if tokenizer_class_fast and (use_fast or tokenizer_class_py is None):
                return tokenizer_class_fast.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
            else:
                if tokenizer_class_py is not None:
                    return tokenizer_class_py.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
                else:
                    raise ValueError(
                        "This tokenizer cannot be instantiated. Please make sure you have `sentencepiece` installed "
                        "in order to use this tokenizer."
                    )

        raise ValueError(
            f"Unrecognized configuration class {config.__class__} to build an AutoTokenizer.\n"
            f"Model type should be one of {', '.join(c.__name__ for c in TOKENIZER_MAPPING.keys())}."
        )

    def register(config_class, slow_tokenizer_class=None, fast_tokenizer_class=None, exist_ok=False):
        """
        Register a new tokenizer in this mapping.


        Args:
            config_class ([`PretrainedConfig`]):
                The configuration corresponding to the model to register.
            slow_tokenizer_class ([`PretrainedTokenizer`], *optional*):
                The slow tokenizer to register.
            fast_tokenizer_class ([`PretrainedTokenizerFast`], *optional*):
                The fast tokenizer to register.
        """
        if slow_tokenizer_class is None and fast_tokenizer_class is None:
            raise ValueError("You need to pass either a `slow_tokenizer_class` or a `fast_tokenizer_class")
        if slow_tokenizer_class is not None and issubclass(slow_tokenizer_class, PreTrainedTokenizerFast):
            raise ValueError("You passed a fast tokenizer in the `slow_tokenizer_class`.")
        if fast_tokenizer_class is not None and issubclass(fast_tokenizer_class, PreTrainedTokenizer):
            raise ValueError("You passed a slow tokenizer in the `fast_tokenizer_class`.")

        if (
            slow_tokenizer_class is not None
            and fast_tokenizer_class is not None
            and issubclass(fast_tokenizer_class, PreTrainedTokenizerFast)
            and fast_tokenizer_class.slow_tokenizer_class != slow_tokenizer_class
        ):
            raise ValueError(
                "The fast tokenizer class you are passing has a `slow_tokenizer_class` attribute that is not "
                "consistent with the slow tokenizer class you passed (fast tokenizer has "
                f"{fast_tokenizer_class.slow_tokenizer_class} and you passed {slow_tokenizer_class}. Fix one of those "
                "so they match!"
            )

        # Avoid resetting a set slow/fast tokenizer if we are passing just the other ones.
        if config_class in TOKENIZER_MAPPING._extra_content:
            existing_slow, existing_fast = TOKENIZER_MAPPING[config_class]
            if slow_tokenizer_class is None:
                slow_tokenizer_class = existing_slow
            if fast_tokenizer_class is None:
                fast_tokenizer_class = existing_fast

        TOKENIZER_MAPPING.register(config_class, (slow_tokenizer_class, fast_tokenizer_class), exist_ok=exist_ok)

mindnlp.transformers.models.auto.tokenization_auto.AutoTokenizer.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs) classmethod

Instantiate one of the tokenizer classes of the library from a pretrained model vocabulary.

The tokenizer class to instantiate is selected based on the model_type property of the config object (either passed as an argument or loaded from pretrained_model_name_or_path if possible), or when it's missing, by falling back to using pattern matching on pretrained_model_name_or_path:

List options

PARAMETER DESCRIPTION
pretrained_model_name_or_path

Can be either:

- A string, the *model id* of a predefined tokenizer hosted inside a model repo on huggingface.co.
- A path to a *directory* containing vocabulary files required by the tokenizer, for instance saved
  using the [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`.
- A path or url to a single saved vocabulary file if and only if the tokenizer only requires a
  single vocabulary file (like Bert or XLNet), e.g.: `./my_model_directory/vocab.txt`. (Not
  applicable to all derived classes)

TYPE: `str` or `os.PathLike`

inputs

Will be passed along to the Tokenizer __init__() method.

TYPE: additional positional arguments, *optional* DEFAULT: ()

cache_dir

Path to a directory in which a downloaded pretrained model configuration should be cached if the standard cache should not be used.

TYPE: `str` or `os.PathLike`, *optional*

force_download

Whether or not to force the (re-)download the model weights and configuration files and override the cached versions if they exist.

TYPE: `bool`, *optional*, defaults to `False`

resume_download

Deprecated and ignored. All downloads are now resumed by default when possible. Will be removed in v5 of Transformers.

proxies

A dictionary of proxy servers to use by protocol or endpoint, e.g., {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}. The proxies are used on each request.

TYPE: `Dict[str, str]`, *optional*

revision

The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a git-based system for storing models and other artifacts on huggingface.co, so revision can be any identifier allowed by git.

TYPE: `str`, *optional*, defaults to `"main"`

subfolder

In case the relevant files are located inside a subfolder of the model repo on huggingface.co (e.g. for facebook/rag-token-base), specify it here.

TYPE: `str`, *optional*

use_fast

Use a fast Rust-based tokenizer if it is supported for a given model. If a fast tokenizer is not available for a given model, a normal Python-based tokenizer is returned instead.

TYPE: `bool`, *optional*, defaults to `True`

tokenizer_type

Tokenizer type to be loaded.

TYPE: `str`, *optional*

trust_remote_code

Whether or not to allow for custom models defined on the Hub in their own modeling files. This option should only be set to True for repositories you trust and in which you have read the code, as it will execute code present on the Hub on your local machine.

TYPE: `bool`, *optional*, defaults to `False`

kwargs

Will be passed to the Tokenizer __init__() method. Can be used to set special tokens like bos_token, eos_token, unk_token, sep_token, pad_token, cls_token, mask_token, additional_special_tokens. See parameters in the __init__() for more details.

TYPE: additional keyword arguments, *optional* DEFAULT: {}

>>> from transformers import AutoTokenizer

>>> # Download vocabulary from huggingface.co and cache.
>>> tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")

>>> # Download vocabulary from huggingface.co (user-uploaded) and cache.
>>> tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-german-cased")

>>> # If vocabulary files are in a directory (e.g. tokenizer was saved using *save_pretrained('./test/saved_model/')*)
>>> # tokenizer = AutoTokenizer.from_pretrained("./test/bert_saved_model/")

>>> # Download vocabulary from huggingface.co and define model-specific arguments
>>> tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-base", add_prefix_space=True)
Source code in mindnlp\transformers\models\auto\tokenization_auto.py
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
@classmethod
@replace_list_option_in_docstrings(TOKENIZER_MAPPING_NAMES)
def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs):
    r"""
    Instantiate one of the tokenizer classes of the library from a pretrained model vocabulary.

    The tokenizer class to instantiate is selected based on the `model_type` property of the config object (either
    passed as an argument or loaded from `pretrained_model_name_or_path` if possible), or when it's missing, by
    falling back to using pattern matching on `pretrained_model_name_or_path`:

    List options

    Params:
        pretrained_model_name_or_path (`str` or `os.PathLike`):
            Can be either:

                - A string, the *model id* of a predefined tokenizer hosted inside a model repo on huggingface.co.
                - A path to a *directory* containing vocabulary files required by the tokenizer, for instance saved
                  using the [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`.
                - A path or url to a single saved vocabulary file if and only if the tokenizer only requires a
                  single vocabulary file (like Bert or XLNet), e.g.: `./my_model_directory/vocab.txt`. (Not
                  applicable to all derived classes)
        inputs (additional positional arguments, *optional*):
            Will be passed along to the Tokenizer `__init__()` method.
        config ([`PretrainedConfig`], *optional*)
            The configuration object used to determine the tokenizer class to instantiate.
        cache_dir (`str` or `os.PathLike`, *optional*):
            Path to a directory in which a downloaded pretrained model configuration should be cached if the
            standard cache should not be used.
        force_download (`bool`, *optional*, defaults to `False`):
            Whether or not to force the (re-)download the model weights and configuration files and override the
            cached versions if they exist.
        resume_download:
            Deprecated and ignored. All downloads are now resumed by default when possible.
            Will be removed in v5 of Transformers.
        proxies (`Dict[str, str]`, *optional*):
            A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
            'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
        revision (`str`, *optional*, defaults to `"main"`):
            The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
            git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
            identifier allowed by git.
        subfolder (`str`, *optional*):
            In case the relevant files are located inside a subfolder of the model repo on huggingface.co (e.g. for
            facebook/rag-token-base), specify it here.
        use_fast (`bool`, *optional*, defaults to `True`):
            Use a [fast Rust-based tokenizer](https://huggingface.co/docs/tokenizers/index) if it is supported for
            a given model. If a fast tokenizer is not available for a given model, a normal Python-based tokenizer
            is returned instead.
        tokenizer_type (`str`, *optional*):
            Tokenizer type to be loaded.
        trust_remote_code (`bool`, *optional*, defaults to `False`):
            Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
            should only be set to `True` for repositories you trust and in which you have read the code, as it will
            execute code present on the Hub on your local machine.
        kwargs (additional keyword arguments, *optional*):
            Will be passed to the Tokenizer `__init__()` method. Can be used to set special tokens like
            `bos_token`, `eos_token`, `unk_token`, `sep_token`, `pad_token`, `cls_token`, `mask_token`,
            `additional_special_tokens`. See parameters in the `__init__()` for more details.

    Examples:

    ```python
    >>> from transformers import AutoTokenizer

    >>> # Download vocabulary from huggingface.co and cache.
    >>> tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")

    >>> # Download vocabulary from huggingface.co (user-uploaded) and cache.
    >>> tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-german-cased")

    >>> # If vocabulary files are in a directory (e.g. tokenizer was saved using *save_pretrained('./test/saved_model/')*)
    >>> # tokenizer = AutoTokenizer.from_pretrained("./test/bert_saved_model/")

    >>> # Download vocabulary from huggingface.co and define model-specific arguments
    >>> tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-base", add_prefix_space=True)
    ```"""
    use_auth_token = kwargs.pop("use_auth_token", None)
    if use_auth_token is not None:
        warnings.warn(
            "The `use_auth_token` argument is deprecated. Please use `token` instead.",
            FutureWarning,
        )
        if kwargs.get("token", None) is not None:
            raise ValueError(
                "`token` and `use_auth_token` are both specified. Please set only the argument `token`."
            )
        kwargs["token"] = use_auth_token

    config = kwargs.pop("config", None)
    kwargs["_from_auto"] = True

    use_fast = kwargs.pop("use_fast", True)
    tokenizer_type = kwargs.pop("tokenizer_type", None)
    trust_remote_code = kwargs.pop("trust_remote_code", None)
    gguf_file = kwargs.get("gguf_file", None)

    # First, let's see whether the tokenizer_type is passed so that we can leverage it
    if tokenizer_type is not None:
        tokenizer_class = None
        tokenizer_class_tuple = TOKENIZER_MAPPING_NAMES.get(tokenizer_type, None)

        if tokenizer_class_tuple is None:
            raise ValueError(
                f"Passed `tokenizer_type` {tokenizer_type} does not exist. `tokenizer_type` should be one of "
                f"{', '.join(c for c in TOKENIZER_MAPPING_NAMES.keys())}."
            )

        tokenizer_class_name, tokenizer_fast_class_name = tokenizer_class_tuple

        if use_fast:
            if tokenizer_fast_class_name is not None:
                tokenizer_class = tokenizer_class_from_name(tokenizer_fast_class_name)
            else:
                logger.warning(
                    "`use_fast` is set to `True` but the tokenizer class does not have a fast version. "
                    " Falling back to the slow version."
                )
        if tokenizer_class is None:
            tokenizer_class = tokenizer_class_from_name(tokenizer_class_name)

        if tokenizer_class is None:
            raise ValueError(f"Tokenizer class {tokenizer_class_name} is not currently imported.")

        return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)

    # Next, let's try to use the tokenizer_config file to get the tokenizer class.
    tokenizer_config = get_tokenizer_config(pretrained_model_name_or_path, **kwargs)
    if "_commit_hash" in tokenizer_config:
        kwargs["_commit_hash"] = tokenizer_config["_commit_hash"]
    config_tokenizer_class = tokenizer_config.get("tokenizer_class")
    tokenizer_auto_map = None
    if "auto_map" in tokenizer_config:
        if isinstance(tokenizer_config["auto_map"], (tuple, list)):
            # Legacy format for dynamic tokenizers
            tokenizer_auto_map = tokenizer_config["auto_map"]
        else:
            tokenizer_auto_map = tokenizer_config["auto_map"].get("AutoTokenizer", None)

    # If that did not work, let's try to use the config.
    if config_tokenizer_class is None:
        if not isinstance(config, PretrainedConfig):
            # if gguf_file:
            #     gguf_path = cached_file(pretrained_model_name_or_path, gguf_file, **kwargs)
            #     config_dict = load_gguf_checkpoint(gguf_path, return_tensors=False)["config"]
            #     config = AutoConfig.for_model(**config_dict)
            # else:
            config = AutoConfig.from_pretrained(
                pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
            )
        config_tokenizer_class = config.tokenizer_class
        if hasattr(config, "auto_map") and "AutoTokenizer" in config.auto_map:
            tokenizer_auto_map = config.auto_map["AutoTokenizer"]

    has_remote_code = tokenizer_auto_map is not None
    has_local_code = type(config) in TOKENIZER_MAPPING or (
        config_tokenizer_class is not None
        and (
            tokenizer_class_from_name(config_tokenizer_class) is not None
            or tokenizer_class_from_name(config_tokenizer_class + "Fast") is not None
        )
    )
    # trust_remote_code = resolve_trust_remote_code(
    #     trust_remote_code, pretrained_model_name_or_path, has_local_code, has_remote_code
    # )

    # if has_remote_code and trust_remote_code:
    #     if use_fast and tokenizer_auto_map[1] is not None:
    #         class_ref = tokenizer_auto_map[1]
    #     else:
    #         class_ref = tokenizer_auto_map[0]
    #     tokenizer_class = get_class_from_dynamic_module(class_ref, pretrained_model_name_or_path, **kwargs)
    #     _ = kwargs.pop("code_revision", None)
    #     if os.path.isdir(pretrained_model_name_or_path):
    #         tokenizer_class.register_for_auto_class()
    #     return tokenizer_class.from_pretrained(
    #         pretrained_model_name_or_path, *inputs, trust_remote_code=trust_remote_code, **kwargs
    #     )
    if config_tokenizer_class is not None:
        tokenizer_class = None
        if use_fast and not config_tokenizer_class.endswith("Fast"):
            tokenizer_class_candidate = f"{config_tokenizer_class}Fast"
            tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate)
        if tokenizer_class is None:
            tokenizer_class_candidate = config_tokenizer_class
            tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate)
        if tokenizer_class is None:
            raise ValueError(
                f"Tokenizer class {tokenizer_class_candidate} does not exist or is not currently imported."
            )
        return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)

    # Otherwise we have to be creative.
    # if model is an encoder decoder, the encoder tokenizer class is used by default
    if isinstance(config, EncoderDecoderConfig):
        if type(config.decoder) is not type(config.encoder):  # noqa: E721
            logger.warning(
                f"The encoder model config class: {config.encoder.__class__} is different from the decoder model "
                f"config class: {config.decoder.__class__}. It is not recommended to use the "
                "`AutoTokenizer.from_pretrained()` method in this case. Please use the encoder and decoder "
                "specific tokenizer classes."
            )
        config = config.encoder

    model_type = config_class_to_model_type(type(config).__name__)
    if model_type is not None:
        tokenizer_class_py, tokenizer_class_fast = TOKENIZER_MAPPING[type(config)]

        if tokenizer_class_fast and (use_fast or tokenizer_class_py is None):
            return tokenizer_class_fast.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
        else:
            if tokenizer_class_py is not None:
                return tokenizer_class_py.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
            else:
                raise ValueError(
                    "This tokenizer cannot be instantiated. Please make sure you have `sentencepiece` installed "
                    "in order to use this tokenizer."
                )

    raise ValueError(
        f"Unrecognized configuration class {config.__class__} to build an AutoTokenizer.\n"
        f"Model type should be one of {', '.join(c.__name__ for c in TOKENIZER_MAPPING.keys())}."
    )

mindnlp.transformers.models.auto.tokenization_auto.AutoTokenizer.register(config_class, slow_tokenizer_class=None, fast_tokenizer_class=None, exist_ok=False)

Register a new tokenizer in this mapping.

PARAMETER DESCRIPTION
config_class

The configuration corresponding to the model to register.

TYPE: [`PretrainedConfig`]

slow_tokenizer_class

The slow tokenizer to register.

TYPE: [`PretrainedTokenizer`], *optional* DEFAULT: None

fast_tokenizer_class

The fast tokenizer to register.

TYPE: [`PretrainedTokenizerFast`], *optional* DEFAULT: None

Source code in mindnlp\transformers\models\auto\tokenization_auto.py
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
def register(config_class, slow_tokenizer_class=None, fast_tokenizer_class=None, exist_ok=False):
    """
    Register a new tokenizer in this mapping.


    Args:
        config_class ([`PretrainedConfig`]):
            The configuration corresponding to the model to register.
        slow_tokenizer_class ([`PretrainedTokenizer`], *optional*):
            The slow tokenizer to register.
        fast_tokenizer_class ([`PretrainedTokenizerFast`], *optional*):
            The fast tokenizer to register.
    """
    if slow_tokenizer_class is None and fast_tokenizer_class is None:
        raise ValueError("You need to pass either a `slow_tokenizer_class` or a `fast_tokenizer_class")
    if slow_tokenizer_class is not None and issubclass(slow_tokenizer_class, PreTrainedTokenizerFast):
        raise ValueError("You passed a fast tokenizer in the `slow_tokenizer_class`.")
    if fast_tokenizer_class is not None and issubclass(fast_tokenizer_class, PreTrainedTokenizer):
        raise ValueError("You passed a slow tokenizer in the `fast_tokenizer_class`.")

    if (
        slow_tokenizer_class is not None
        and fast_tokenizer_class is not None
        and issubclass(fast_tokenizer_class, PreTrainedTokenizerFast)
        and fast_tokenizer_class.slow_tokenizer_class != slow_tokenizer_class
    ):
        raise ValueError(
            "The fast tokenizer class you are passing has a `slow_tokenizer_class` attribute that is not "
            "consistent with the slow tokenizer class you passed (fast tokenizer has "
            f"{fast_tokenizer_class.slow_tokenizer_class} and you passed {slow_tokenizer_class}. Fix one of those "
            "so they match!"
        )

    # Avoid resetting a set slow/fast tokenizer if we are passing just the other ones.
    if config_class in TOKENIZER_MAPPING._extra_content:
        existing_slow, existing_fast = TOKENIZER_MAPPING[config_class]
        if slow_tokenizer_class is None:
            slow_tokenizer_class = existing_slow
        if fast_tokenizer_class is None:
            fast_tokenizer_class = existing_fast

    TOKENIZER_MAPPING.register(config_class, (slow_tokenizer_class, fast_tokenizer_class), exist_ok=exist_ok)

mindnlp.transformers.models.auto.feature_extraction_auto.FEATURE_EXTRACTOR_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, FEATURE_EXTRACTOR_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.feature_extraction_auto.AutoFeatureExtractor

This is a generic feature extractor class that will be instantiated as one of the feature extractor classes of the library when created with the [AutoFeatureExtractor.from_pretrained] class method.

This class cannot be instantiated directly using __init__() (throws an error).

Source code in mindnlp\transformers\models\auto\feature_extraction_auto.py
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
class AutoFeatureExtractor:
    r"""
    This is a generic feature extractor class that will be instantiated as one of the feature extractor classes of the
    library when created with the [`AutoFeatureExtractor.from_pretrained`] class method.

    This class cannot be instantiated directly using `__init__()` (throws an error).
    """
    def __init__(self):
        """
        Initializes an instance of the AutoFeatureExtractor class.

        Args:
            self: An instance of the AutoFeatureExtractor class.

        Returns:
            None.

        Raises:
            EnvironmentError: This exception is raised with the message 'AutoFeatureExtractor is designed to be
                instantiated using the `AutoFeatureExtractor.from_pretrained(pretrained_model_name_or_path)` method.'
        """
        raise EnvironmentError(
            "AutoFeatureExtractor is designed to be instantiated "
            "using the `AutoFeatureExtractor.from_pretrained(pretrained_model_name_or_path)` method."
        )

    @classmethod
    @replace_list_option_in_docstrings(FEATURE_EXTRACTOR_MAPPING_NAMES)
    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
        r"""
        Instantiate one of the feature extractor classes of the library from a pretrained model vocabulary.

        The feature extractor class to instantiate is selected based on the `model_type` property of the config object
        (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible), or when it's
        missing, by falling back to using pattern matching on `pretrained_model_name_or_path`:

        List options

        Params:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                This can be either:

                - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
                  hf-mirror.com.
                - a path to a *directory* containing a feature extractor file saved using the
                  [`~feature_extraction_utils.FeatureExtractionMixin.save_pretrained`] method, e.g.,
                  `./my_model_directory/`.
                - a path or url to a saved feature extractor JSON *file*, e.g.,
                  `./my_model_directory/preprocessor_config.json`.
            cache_dir (`str` or `os.PathLike`, *optional*):
                Path to a directory in which a downloaded pretrained model feature extractor should be cached if the
                standard cache should not be used.
            force_download (`bool`, *optional*, defaults to `False`):
                Whether or not to force to (re-)download the feature extractor files and override the cached versions
                if they exist.
            resume_download (`bool`, *optional*, defaults to `False`):
                Whether or not to delete incompletely received file. Attempts to resume the download if such a file
                exists.
            proxies (`Dict[str, str]`, *optional*):
                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
                'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
            token (`str` or *bool*, *optional*):
                The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
                when running `huggingface-cli login` (stored in `~/.huggingface`).
            revision (`str`, *optional*, defaults to `"main"`):
                The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
                git-based system for storing models and other artifacts on hf-mirror.com, so `revision` can be any
                identifier allowed by git.
            return_unused_kwargs (`bool`, *optional*, defaults to `False`):
                If `False`, then this function returns just the final feature extractor object. If `True`, then this
                functions returns a `Tuple(feature_extractor, unused_kwargs)` where *unused_kwargs* is a dictionary
                consisting of the key/value pairs whose keys are not feature extractor attributes: i.e., the part of
                `kwargs` which has not been used to update `feature_extractor` and is otherwise ignored.
            trust_remote_code (`bool`, *optional*, defaults to `False`):
                Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
                should only be set to `True` for repositories you trust and in which you have read the code, as it will
                execute code present on the Hub on your local machine.
            kwargs (`Dict[str, Any]`, *optional*):
                The values in kwargs of any keys which are feature extractor attributes will be used to override the
                loaded values. Behavior concerning key/value pairs whose keys are *not* feature extractor attributes is
                controlled by the `return_unused_kwargs` keyword parameter.

        <Tip>

        Passing `token=True` is required when you want to use a private model.

        </Tip>

        Example:
            ```python
            >>> from transformers import AutoFeatureExtractor
            ...
            >>> # Download feature extractor from hf-mirror.com and cache.
            >>> feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base-960h")
            ...
            >>> # If feature extractor files are in a directory (e.g. feature extractor was saved using *save_pretrained('./test/saved_model/')*)
            >>> # feature_extractor = AutoFeatureExtractor.from_pretrained("./test/saved_model/")
            ```
        """
        use_auth_token = kwargs.pop("use_auth_token", None)
        if use_auth_token is not None:
            warnings.warn(
                "The `use_auth_token` argument is deprecated. Please use `token` instead.",
                FutureWarning,
            )
            if kwargs.get("token", None) is not None:
                raise ValueError(
                    "`token` and `use_auth_token` are both specified. Please set only the argument `token`."
                )
            kwargs["token"] = use_auth_token

        config = kwargs.pop("config", None)
        kwargs["_from_auto"] = True

        config_dict, _ = FeatureExtractionMixin.get_feature_extractor_dict(pretrained_model_name_or_path, **kwargs)
        feature_extractor_class = config_dict.get("feature_extractor_type", None)
        feature_extractor_auto_map = None
        if "AutoFeatureExtractor" in config_dict.get("auto_map", {}):
            feature_extractor_auto_map = config_dict["auto_map"]["AutoFeatureExtractor"]

        # If we don't find the feature extractor class in the feature extractor config, let's try the model config.
        if feature_extractor_class is None and feature_extractor_auto_map is None:
            if not isinstance(config, PretrainedConfig):
                config = AutoConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
            # It could be in `config.feature_extractor_type``
            feature_extractor_class = getattr(config, "feature_extractor_type", None)
            if hasattr(config, "auto_map") and "AutoFeatureExtractor" in config.auto_map:
                feature_extractor_auto_map = config.auto_map["AutoFeatureExtractor"]

        if feature_extractor_class is not None:
            feature_extractor_class = feature_extractor_class_from_name(feature_extractor_class)

        if feature_extractor_class is not None:
            return feature_extractor_class.from_dict(config_dict, **kwargs)
        # Last try: we use the FEATURE_EXTRACTOR_MAPPING.
        if type(config) in FEATURE_EXTRACTOR_MAPPING:
            feature_extractor_class = FEATURE_EXTRACTOR_MAPPING[type(config)]
            return feature_extractor_class.from_dict(config_dict, **kwargs)

        raise ValueError(
            f"Unrecognized feature extractor in {pretrained_model_name_or_path}. Should have a "
            f"`feature_extractor_type` key in its {FEATURE_EXTRACTOR_NAME} of {CONFIG_NAME}, or one of the following "
            f"`model_type` keys in its {CONFIG_NAME}: {', '.join(c for c in FEATURE_EXTRACTOR_MAPPING_NAMES.keys())}"
        )

    @staticmethod
    def register(config_class, feature_extractor_class, exist_ok=False):
        """
        Register a new feature extractor for this class.

        Args:
            config_class ([`PretrainedConfig`]):
                The configuration corresponding to the model to register.
            feature_extractor_class ([`FeatureExtractorMixin`]): The feature extractor to register.
        """
        FEATURE_EXTRACTOR_MAPPING.register(config_class, feature_extractor_class, exist_ok=exist_ok)

mindnlp.transformers.models.auto.feature_extraction_auto.AutoFeatureExtractor.__init__()

Initializes an instance of the AutoFeatureExtractor class.

PARAMETER DESCRIPTION
self

An instance of the AutoFeatureExtractor class.

RETURNS DESCRIPTION

None.

RAISES DESCRIPTION
EnvironmentError

This exception is raised with the message 'AutoFeatureExtractor is designed to be instantiated using the AutoFeatureExtractor.from_pretrained(pretrained_model_name_or_path) method.'

Source code in mindnlp\transformers\models\auto\feature_extraction_auto.py
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
def __init__(self):
    """
    Initializes an instance of the AutoFeatureExtractor class.

    Args:
        self: An instance of the AutoFeatureExtractor class.

    Returns:
        None.

    Raises:
        EnvironmentError: This exception is raised with the message 'AutoFeatureExtractor is designed to be
            instantiated using the `AutoFeatureExtractor.from_pretrained(pretrained_model_name_or_path)` method.'
    """
    raise EnvironmentError(
        "AutoFeatureExtractor is designed to be instantiated "
        "using the `AutoFeatureExtractor.from_pretrained(pretrained_model_name_or_path)` method."
    )

mindnlp.transformers.models.auto.feature_extraction_auto.AutoFeatureExtractor.from_pretrained(pretrained_model_name_or_path, **kwargs) classmethod

Instantiate one of the feature extractor classes of the library from a pretrained model vocabulary.

The feature extractor class to instantiate is selected based on the model_type property of the config object (either passed as an argument or loaded from pretrained_model_name_or_path if possible), or when it's missing, by falling back to using pattern matching on pretrained_model_name_or_path:

List options

PARAMETER DESCRIPTION
pretrained_model_name_or_path

This can be either:

  • a string, the model id of a pretrained feature_extractor hosted inside a model repo on hf-mirror.com.
  • a path to a directory containing a feature extractor file saved using the [~feature_extraction_utils.FeatureExtractionMixin.save_pretrained] method, e.g., ./my_model_directory/.
  • a path or url to a saved feature extractor JSON file, e.g., ./my_model_directory/preprocessor_config.json.

TYPE: `str` or `os.PathLike`

cache_dir

Path to a directory in which a downloaded pretrained model feature extractor should be cached if the standard cache should not be used.

TYPE: `str` or `os.PathLike`, *optional*

force_download

Whether or not to force to (re-)download the feature extractor files and override the cached versions if they exist.

TYPE: `bool`, *optional*, defaults to `False`

resume_download

Whether or not to delete incompletely received file. Attempts to resume the download if such a file exists.

TYPE: `bool`, *optional*, defaults to `False`

proxies

A dictionary of proxy servers to use by protocol or endpoint, e.g., {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}. The proxies are used on each request.

TYPE: `Dict[str, str]`, *optional*

token

The token to use as HTTP bearer authorization for remote files. If True, will use the token generated when running huggingface-cli login (stored in ~/.huggingface).

TYPE: `str` or *bool*, *optional*

revision

The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a git-based system for storing models and other artifacts on hf-mirror.com, so revision can be any identifier allowed by git.

TYPE: `str`, *optional*, defaults to `"main"`

return_unused_kwargs

If False, then this function returns just the final feature extractor object. If True, then this functions returns a Tuple(feature_extractor, unused_kwargs) where unused_kwargs is a dictionary consisting of the key/value pairs whose keys are not feature extractor attributes: i.e., the part of kwargs which has not been used to update feature_extractor and is otherwise ignored.

TYPE: `bool`, *optional*, defaults to `False`

trust_remote_code

Whether or not to allow for custom models defined on the Hub in their own modeling files. This option should only be set to True for repositories you trust and in which you have read the code, as it will execute code present on the Hub on your local machine.

TYPE: `bool`, *optional*, defaults to `False`

kwargs

The values in kwargs of any keys which are feature extractor attributes will be used to override the loaded values. Behavior concerning key/value pairs whose keys are not feature extractor attributes is controlled by the return_unused_kwargs keyword parameter.

TYPE: `Dict[str, Any]`, *optional* DEFAULT: {}

Passing token=True is required when you want to use a private model.

Example
>>> from transformers import AutoFeatureExtractor
...
>>> # Download feature extractor from hf-mirror.com and cache.
>>> feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base-960h")
...
>>> # If feature extractor files are in a directory (e.g. feature extractor was saved using *save_pretrained('./test/saved_model/')*)
>>> # feature_extractor = AutoFeatureExtractor.from_pretrained("./test/saved_model/")
Source code in mindnlp\transformers\models\auto\feature_extraction_auto.py
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
@classmethod
@replace_list_option_in_docstrings(FEATURE_EXTRACTOR_MAPPING_NAMES)
def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
    r"""
    Instantiate one of the feature extractor classes of the library from a pretrained model vocabulary.

    The feature extractor class to instantiate is selected based on the `model_type` property of the config object
    (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible), or when it's
    missing, by falling back to using pattern matching on `pretrained_model_name_or_path`:

    List options

    Params:
        pretrained_model_name_or_path (`str` or `os.PathLike`):
            This can be either:

            - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
              hf-mirror.com.
            - a path to a *directory* containing a feature extractor file saved using the
              [`~feature_extraction_utils.FeatureExtractionMixin.save_pretrained`] method, e.g.,
              `./my_model_directory/`.
            - a path or url to a saved feature extractor JSON *file*, e.g.,
              `./my_model_directory/preprocessor_config.json`.
        cache_dir (`str` or `os.PathLike`, *optional*):
            Path to a directory in which a downloaded pretrained model feature extractor should be cached if the
            standard cache should not be used.
        force_download (`bool`, *optional*, defaults to `False`):
            Whether or not to force to (re-)download the feature extractor files and override the cached versions
            if they exist.
        resume_download (`bool`, *optional*, defaults to `False`):
            Whether or not to delete incompletely received file. Attempts to resume the download if such a file
            exists.
        proxies (`Dict[str, str]`, *optional*):
            A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
            'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
        token (`str` or *bool*, *optional*):
            The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
            when running `huggingface-cli login` (stored in `~/.huggingface`).
        revision (`str`, *optional*, defaults to `"main"`):
            The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
            git-based system for storing models and other artifacts on hf-mirror.com, so `revision` can be any
            identifier allowed by git.
        return_unused_kwargs (`bool`, *optional*, defaults to `False`):
            If `False`, then this function returns just the final feature extractor object. If `True`, then this
            functions returns a `Tuple(feature_extractor, unused_kwargs)` where *unused_kwargs* is a dictionary
            consisting of the key/value pairs whose keys are not feature extractor attributes: i.e., the part of
            `kwargs` which has not been used to update `feature_extractor` and is otherwise ignored.
        trust_remote_code (`bool`, *optional*, defaults to `False`):
            Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
            should only be set to `True` for repositories you trust and in which you have read the code, as it will
            execute code present on the Hub on your local machine.
        kwargs (`Dict[str, Any]`, *optional*):
            The values in kwargs of any keys which are feature extractor attributes will be used to override the
            loaded values. Behavior concerning key/value pairs whose keys are *not* feature extractor attributes is
            controlled by the `return_unused_kwargs` keyword parameter.

    <Tip>

    Passing `token=True` is required when you want to use a private model.

    </Tip>

    Example:
        ```python
        >>> from transformers import AutoFeatureExtractor
        ...
        >>> # Download feature extractor from hf-mirror.com and cache.
        >>> feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base-960h")
        ...
        >>> # If feature extractor files are in a directory (e.g. feature extractor was saved using *save_pretrained('./test/saved_model/')*)
        >>> # feature_extractor = AutoFeatureExtractor.from_pretrained("./test/saved_model/")
        ```
    """
    use_auth_token = kwargs.pop("use_auth_token", None)
    if use_auth_token is not None:
        warnings.warn(
            "The `use_auth_token` argument is deprecated. Please use `token` instead.",
            FutureWarning,
        )
        if kwargs.get("token", None) is not None:
            raise ValueError(
                "`token` and `use_auth_token` are both specified. Please set only the argument `token`."
            )
        kwargs["token"] = use_auth_token

    config = kwargs.pop("config", None)
    kwargs["_from_auto"] = True

    config_dict, _ = FeatureExtractionMixin.get_feature_extractor_dict(pretrained_model_name_or_path, **kwargs)
    feature_extractor_class = config_dict.get("feature_extractor_type", None)
    feature_extractor_auto_map = None
    if "AutoFeatureExtractor" in config_dict.get("auto_map", {}):
        feature_extractor_auto_map = config_dict["auto_map"]["AutoFeatureExtractor"]

    # If we don't find the feature extractor class in the feature extractor config, let's try the model config.
    if feature_extractor_class is None and feature_extractor_auto_map is None:
        if not isinstance(config, PretrainedConfig):
            config = AutoConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
        # It could be in `config.feature_extractor_type``
        feature_extractor_class = getattr(config, "feature_extractor_type", None)
        if hasattr(config, "auto_map") and "AutoFeatureExtractor" in config.auto_map:
            feature_extractor_auto_map = config.auto_map["AutoFeatureExtractor"]

    if feature_extractor_class is not None:
        feature_extractor_class = feature_extractor_class_from_name(feature_extractor_class)

    if feature_extractor_class is not None:
        return feature_extractor_class.from_dict(config_dict, **kwargs)
    # Last try: we use the FEATURE_EXTRACTOR_MAPPING.
    if type(config) in FEATURE_EXTRACTOR_MAPPING:
        feature_extractor_class = FEATURE_EXTRACTOR_MAPPING[type(config)]
        return feature_extractor_class.from_dict(config_dict, **kwargs)

    raise ValueError(
        f"Unrecognized feature extractor in {pretrained_model_name_or_path}. Should have a "
        f"`feature_extractor_type` key in its {FEATURE_EXTRACTOR_NAME} of {CONFIG_NAME}, or one of the following "
        f"`model_type` keys in its {CONFIG_NAME}: {', '.join(c for c in FEATURE_EXTRACTOR_MAPPING_NAMES.keys())}"
    )

mindnlp.transformers.models.auto.feature_extraction_auto.AutoFeatureExtractor.register(config_class, feature_extractor_class, exist_ok=False) staticmethod

Register a new feature extractor for this class.

PARAMETER DESCRIPTION
config_class

The configuration corresponding to the model to register.

TYPE: [`PretrainedConfig`]

feature_extractor_class

The feature extractor to register.

TYPE: [`FeatureExtractorMixin`]

Source code in mindnlp\transformers\models\auto\feature_extraction_auto.py
406
407
408
409
410
411
412
413
414
415
416
@staticmethod
def register(config_class, feature_extractor_class, exist_ok=False):
    """
    Register a new feature extractor for this class.

    Args:
        config_class ([`PretrainedConfig`]):
            The configuration corresponding to the model to register.
        feature_extractor_class ([`FeatureExtractorMixin`]): The feature extractor to register.
    """
    FEATURE_EXTRACTOR_MAPPING.register(config_class, feature_extractor_class, exist_ok=exist_ok)

mindnlp.transformers.models.auto.image_processing_auto.IMAGE_PROCESSOR_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, IMAGE_PROCESSOR_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.image_processing_auto.AutoImageProcessor

This is a generic image processor class that will be instantiated as one of the image processor classes of the library when created with the [AutoImageProcessor.from_pretrained] class method.

This class cannot be instantiated directly using __init__() (throws an error).

Source code in mindnlp\transformers\models\auto\image_processing_auto.py
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
class AutoImageProcessor:
    r"""
    This is a generic image processor class that will be instantiated as one of the image processor classes of the
    library when created with the [`AutoImageProcessor.from_pretrained`] class method.

    This class cannot be instantiated directly using `__init__()` (throws an error).
    """
    def __init__(self):
        """
        Initializes an instance of AutoImageProcessor.

        Args:
            self: The object itself.

        Returns:
            None.
        Raises:
            EnvironmentError:
                Raised when attempting to directly instantiate an AutoImageProcessor object.
                AutoImageProcessor is designed to be instantiated using the
                `AutoImageProcessor.from_pretrained(pretrained_model_name_or_path)` method.
        """
        raise EnvironmentError(
            "AutoImageProcessor is designed to be instantiated "
            "using the `AutoImageProcessor.from_pretrained(pretrained_model_name_or_path)` method."
        )

    @classmethod
    @replace_list_option_in_docstrings(IMAGE_PROCESSOR_MAPPING_NAMES)
    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
        r"""
        Instantiate one of the image processor classes of the library from a pretrained model vocabulary.

        The image processor class to instantiate is selected based on the `model_type` property of the config object
        (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible), or when it's
        missing, by falling back to using pattern matching on `pretrained_model_name_or_path`:

        List options

        Params:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                This can be either:

                - a string, the *model id* of a pretrained image_processor hosted inside a model repo on
                  hf-mirror.com.
                - a path to a *directory* containing a image processor file saved using the
                  [`~image_processing_utils.ImageProcessingMixin.save_pretrained`] method, e.g.,
                  `./my_model_directory/`.
                - a path or url to a saved image processor JSON *file*, e.g.,
                  `./my_model_directory/preprocessor_config.json`.
            cache_dir (`str` or `os.PathLike`, *optional*):
                Path to a directory in which a downloaded pretrained model image processor should be cached if the
                standard cache should not be used.
            force_download (`bool`, *optional*, defaults to `False`):
                Whether or not to force to (re-)download the image processor files and override the cached versions if
                they exist.
            resume_download (`bool`, *optional*, defaults to `False`):
                Whether or not to delete incompletely received file. Attempts to resume the download if such a file
                exists.
            proxies (`Dict[str, str]`, *optional*):
                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
                'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
            token (`str` or *bool*, *optional*):
                The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
                when running `huggingface-cli login` (stored in `~/.huggingface`).
            revision (`str`, *optional*, defaults to `"main"`):
                The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
                git-based system for storing models and other artifacts on hf-mirror.com, so `revision` can be any
                identifier allowed by git.
            return_unused_kwargs (`bool`, *optional*, defaults to `False`):
                If `False`, then this function returns just the final image processor object. If `True`, then this
                functions returns a `Tuple(image_processor, unused_kwargs)` where *unused_kwargs* is a dictionary
                consisting of the key/value pairs whose keys are not image processor attributes: i.e., the part of
                `kwargs` which has not been used to update `image_processor` and is otherwise ignored.
            trust_remote_code (`bool`, *optional*, defaults to `False`):
                Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
                should only be set to `True` for repositories you trust and in which you have read the code, as it will
                execute code present on the Hub on your local machine.
            kwargs (`Dict[str, Any]`, *optional*):
                The values in kwargs of any keys which are image processor attributes will be used to override the
                loaded values. Behavior concerning key/value pairs whose keys are *not* image processor attributes is
                controlled by the `return_unused_kwargs` keyword parameter.

        <Tip>

        Passing `token=True` is required when you want to use a private model.

        </Tip>

        Example:
            ```python
            >>> from transformers import AutoImageProcessor
            ...
            >>> # Download image processor from hf-mirror.com and cache.
            >>> image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")
            ...
            >>> # If image processor files are in a directory (e.g. image processor was saved using *save_pretrained('./test/saved_model/')*)
            >>> # image_processor = AutoImageProcessor.from_pretrained("./test/saved_model/")
            ```
        """
        use_auth_token = kwargs.pop("use_auth_token", None)
        if use_auth_token is not None:
            warnings.warn(
                "The `use_auth_token` argument is deprecated. Please use `token` instead.",
                FutureWarning,
            )
            if kwargs.get("token", None) is not None:
                raise ValueError(
                    "`token` and `use_auth_token` are both specified. Please set only the argument `token`."
                )
            kwargs["token"] = use_auth_token

        config = kwargs.pop("config", None)
        kwargs["_from_auto"] = True

        config_dict, _ = ImageProcessingMixin.get_image_processor_dict(pretrained_model_name_or_path, **kwargs)
        image_processor_class = config_dict.get("image_processor_type", None)
        image_processor_auto_map = None
        if "AutoImageProcessor" in config_dict.get("auto_map", {}):
            image_processor_auto_map = config_dict["auto_map"]["AutoImageProcessor"]

        # If we still don't have the image processor class, check if we're loading from a previous feature extractor config
        # and if so, infer the image processor class from there.
        if image_processor_class is None and image_processor_auto_map is None:
            feature_extractor_class = config_dict.pop("feature_extractor_type", None)
            if feature_extractor_class is not None:
                logger.warning(
                    "Could not find image processor class in the image processor config or the model config. Loading "
                    "based on pattern matching with the model's feature extractor configuration. Please open a "
                    "PR/issue to update `preprocessor_config.json` to use `image_processor_type` instead of "
                    "`feature_extractor_type`. This warning will be removed in v4.40."
                )
                image_processor_class = feature_extractor_class.replace("FeatureExtractor", "ImageProcessor")
            if "AutoFeatureExtractor" in config_dict.get("auto_map", {}):
                feature_extractor_auto_map = config_dict["auto_map"]["AutoFeatureExtractor"]
                image_processor_auto_map = feature_extractor_auto_map.replace("FeatureExtractor", "ImageProcessor")
                logger.warning(
                    "Could not find image processor auto map in the image processor config or the model config. "
                    "Loading based on pattern matching with the model's feature extractor configuration. Please open a "
                    "PR/issue to update `preprocessor_config.json` to use `AutoImageProcessor` instead of "
                    "`AutoFeatureExtractor`. This warning will be removed in v4.40."
                )

        # If we don't find the image processor class in the image processor config, let's try the model config.
        if image_processor_class is None and image_processor_auto_map is None:
            if not isinstance(config, PretrainedConfig):
                config = AutoConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
            # It could be in `config.image_processor_type``
            image_processor_class = getattr(config, "image_processor_type", None)
            if hasattr(config, "auto_map") and "AutoImageProcessor" in config.auto_map:
                image_processor_auto_map = config.auto_map["AutoImageProcessor"]

        if image_processor_class is not None:
            image_processor_class = image_processor_class_from_name(image_processor_class)

        if image_processor_class is not None:
            return image_processor_class.from_dict(config_dict, **kwargs)
        # Last try: we use the IMAGE_PROCESSOR_MAPPING.
        if type(config) in IMAGE_PROCESSOR_MAPPING:
            image_processor_class = IMAGE_PROCESSOR_MAPPING[type(config)]
            return image_processor_class.from_dict(config_dict, **kwargs)

        raise ValueError(
            f"Unrecognized image processor in {pretrained_model_name_or_path}. Should have a "
            f"`image_processor_type` key in its {IMAGE_PROCESSOR_NAME} of {CONFIG_NAME}, or one of the following "
            f"`model_type` keys in its {CONFIG_NAME}: {', '.join(c for c in IMAGE_PROCESSOR_MAPPING_NAMES.keys())}"
        )

    @staticmethod
    def register(config_class, image_processor_class, exist_ok=False):
        """
        Register a new image processor for this class.

        Args:
            config_class ([`PretrainedConfig`]):
                The configuration corresponding to the model to register.
            image_processor_class ([`ImageProcessingMixin`]): The image processor to register.
        """
        IMAGE_PROCESSOR_MAPPING.register(config_class, image_processor_class, exist_ok=exist_ok)

mindnlp.transformers.models.auto.image_processing_auto.AutoImageProcessor.__init__()

Initializes an instance of AutoImageProcessor.

PARAMETER DESCRIPTION
self

The object itself.

RETURNS DESCRIPTION

None.

Source code in mindnlp\transformers\models\auto\image_processing_auto.py
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
def __init__(self):
    """
    Initializes an instance of AutoImageProcessor.

    Args:
        self: The object itself.

    Returns:
        None.
    Raises:
        EnvironmentError:
            Raised when attempting to directly instantiate an AutoImageProcessor object.
            AutoImageProcessor is designed to be instantiated using the
            `AutoImageProcessor.from_pretrained(pretrained_model_name_or_path)` method.
    """
    raise EnvironmentError(
        "AutoImageProcessor is designed to be instantiated "
        "using the `AutoImageProcessor.from_pretrained(pretrained_model_name_or_path)` method."
    )

mindnlp.transformers.models.auto.image_processing_auto.AutoImageProcessor.from_pretrained(pretrained_model_name_or_path, **kwargs) classmethod

Instantiate one of the image processor classes of the library from a pretrained model vocabulary.

The image processor class to instantiate is selected based on the model_type property of the config object (either passed as an argument or loaded from pretrained_model_name_or_path if possible), or when it's missing, by falling back to using pattern matching on pretrained_model_name_or_path:

List options

PARAMETER DESCRIPTION
pretrained_model_name_or_path

This can be either:

  • a string, the model id of a pretrained image_processor hosted inside a model repo on hf-mirror.com.
  • a path to a directory containing a image processor file saved using the [~image_processing_utils.ImageProcessingMixin.save_pretrained] method, e.g., ./my_model_directory/.
  • a path or url to a saved image processor JSON file, e.g., ./my_model_directory/preprocessor_config.json.

TYPE: `str` or `os.PathLike`

cache_dir

Path to a directory in which a downloaded pretrained model image processor should be cached if the standard cache should not be used.

TYPE: `str` or `os.PathLike`, *optional*

force_download

Whether or not to force to (re-)download the image processor files and override the cached versions if they exist.

TYPE: `bool`, *optional*, defaults to `False`

resume_download

Whether or not to delete incompletely received file. Attempts to resume the download if such a file exists.

TYPE: `bool`, *optional*, defaults to `False`

proxies

A dictionary of proxy servers to use by protocol or endpoint, e.g., {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}. The proxies are used on each request.

TYPE: `Dict[str, str]`, *optional*

token

The token to use as HTTP bearer authorization for remote files. If True, will use the token generated when running huggingface-cli login (stored in ~/.huggingface).

TYPE: `str` or *bool*, *optional*

revision

The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a git-based system for storing models and other artifacts on hf-mirror.com, so revision can be any identifier allowed by git.

TYPE: `str`, *optional*, defaults to `"main"`

return_unused_kwargs

If False, then this function returns just the final image processor object. If True, then this functions returns a Tuple(image_processor, unused_kwargs) where unused_kwargs is a dictionary consisting of the key/value pairs whose keys are not image processor attributes: i.e., the part of kwargs which has not been used to update image_processor and is otherwise ignored.

TYPE: `bool`, *optional*, defaults to `False`

trust_remote_code

Whether or not to allow for custom models defined on the Hub in their own modeling files. This option should only be set to True for repositories you trust and in which you have read the code, as it will execute code present on the Hub on your local machine.

TYPE: `bool`, *optional*, defaults to `False`

kwargs

The values in kwargs of any keys which are image processor attributes will be used to override the loaded values. Behavior concerning key/value pairs whose keys are not image processor attributes is controlled by the return_unused_kwargs keyword parameter.

TYPE: `Dict[str, Any]`, *optional* DEFAULT: {}

Passing token=True is required when you want to use a private model.

Example
>>> from transformers import AutoImageProcessor
...
>>> # Download image processor from hf-mirror.com and cache.
>>> image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")
...
>>> # If image processor files are in a directory (e.g. image processor was saved using *save_pretrained('./test/saved_model/')*)
>>> # image_processor = AutoImageProcessor.from_pretrained("./test/saved_model/")
Source code in mindnlp\transformers\models\auto\image_processing_auto.py
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
@classmethod
@replace_list_option_in_docstrings(IMAGE_PROCESSOR_MAPPING_NAMES)
def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
    r"""
    Instantiate one of the image processor classes of the library from a pretrained model vocabulary.

    The image processor class to instantiate is selected based on the `model_type` property of the config object
    (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible), or when it's
    missing, by falling back to using pattern matching on `pretrained_model_name_or_path`:

    List options

    Params:
        pretrained_model_name_or_path (`str` or `os.PathLike`):
            This can be either:

            - a string, the *model id* of a pretrained image_processor hosted inside a model repo on
              hf-mirror.com.
            - a path to a *directory* containing a image processor file saved using the
              [`~image_processing_utils.ImageProcessingMixin.save_pretrained`] method, e.g.,
              `./my_model_directory/`.
            - a path or url to a saved image processor JSON *file*, e.g.,
              `./my_model_directory/preprocessor_config.json`.
        cache_dir (`str` or `os.PathLike`, *optional*):
            Path to a directory in which a downloaded pretrained model image processor should be cached if the
            standard cache should not be used.
        force_download (`bool`, *optional*, defaults to `False`):
            Whether or not to force to (re-)download the image processor files and override the cached versions if
            they exist.
        resume_download (`bool`, *optional*, defaults to `False`):
            Whether or not to delete incompletely received file. Attempts to resume the download if such a file
            exists.
        proxies (`Dict[str, str]`, *optional*):
            A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
            'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
        token (`str` or *bool*, *optional*):
            The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
            when running `huggingface-cli login` (stored in `~/.huggingface`).
        revision (`str`, *optional*, defaults to `"main"`):
            The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
            git-based system for storing models and other artifacts on hf-mirror.com, so `revision` can be any
            identifier allowed by git.
        return_unused_kwargs (`bool`, *optional*, defaults to `False`):
            If `False`, then this function returns just the final image processor object. If `True`, then this
            functions returns a `Tuple(image_processor, unused_kwargs)` where *unused_kwargs* is a dictionary
            consisting of the key/value pairs whose keys are not image processor attributes: i.e., the part of
            `kwargs` which has not been used to update `image_processor` and is otherwise ignored.
        trust_remote_code (`bool`, *optional*, defaults to `False`):
            Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
            should only be set to `True` for repositories you trust and in which you have read the code, as it will
            execute code present on the Hub on your local machine.
        kwargs (`Dict[str, Any]`, *optional*):
            The values in kwargs of any keys which are image processor attributes will be used to override the
            loaded values. Behavior concerning key/value pairs whose keys are *not* image processor attributes is
            controlled by the `return_unused_kwargs` keyword parameter.

    <Tip>

    Passing `token=True` is required when you want to use a private model.

    </Tip>

    Example:
        ```python
        >>> from transformers import AutoImageProcessor
        ...
        >>> # Download image processor from hf-mirror.com and cache.
        >>> image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")
        ...
        >>> # If image processor files are in a directory (e.g. image processor was saved using *save_pretrained('./test/saved_model/')*)
        >>> # image_processor = AutoImageProcessor.from_pretrained("./test/saved_model/")
        ```
    """
    use_auth_token = kwargs.pop("use_auth_token", None)
    if use_auth_token is not None:
        warnings.warn(
            "The `use_auth_token` argument is deprecated. Please use `token` instead.",
            FutureWarning,
        )
        if kwargs.get("token", None) is not None:
            raise ValueError(
                "`token` and `use_auth_token` are both specified. Please set only the argument `token`."
            )
        kwargs["token"] = use_auth_token

    config = kwargs.pop("config", None)
    kwargs["_from_auto"] = True

    config_dict, _ = ImageProcessingMixin.get_image_processor_dict(pretrained_model_name_or_path, **kwargs)
    image_processor_class = config_dict.get("image_processor_type", None)
    image_processor_auto_map = None
    if "AutoImageProcessor" in config_dict.get("auto_map", {}):
        image_processor_auto_map = config_dict["auto_map"]["AutoImageProcessor"]

    # If we still don't have the image processor class, check if we're loading from a previous feature extractor config
    # and if so, infer the image processor class from there.
    if image_processor_class is None and image_processor_auto_map is None:
        feature_extractor_class = config_dict.pop("feature_extractor_type", None)
        if feature_extractor_class is not None:
            logger.warning(
                "Could not find image processor class in the image processor config or the model config. Loading "
                "based on pattern matching with the model's feature extractor configuration. Please open a "
                "PR/issue to update `preprocessor_config.json` to use `image_processor_type` instead of "
                "`feature_extractor_type`. This warning will be removed in v4.40."
            )
            image_processor_class = feature_extractor_class.replace("FeatureExtractor", "ImageProcessor")
        if "AutoFeatureExtractor" in config_dict.get("auto_map", {}):
            feature_extractor_auto_map = config_dict["auto_map"]["AutoFeatureExtractor"]
            image_processor_auto_map = feature_extractor_auto_map.replace("FeatureExtractor", "ImageProcessor")
            logger.warning(
                "Could not find image processor auto map in the image processor config or the model config. "
                "Loading based on pattern matching with the model's feature extractor configuration. Please open a "
                "PR/issue to update `preprocessor_config.json` to use `AutoImageProcessor` instead of "
                "`AutoFeatureExtractor`. This warning will be removed in v4.40."
            )

    # If we don't find the image processor class in the image processor config, let's try the model config.
    if image_processor_class is None and image_processor_auto_map is None:
        if not isinstance(config, PretrainedConfig):
            config = AutoConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
        # It could be in `config.image_processor_type``
        image_processor_class = getattr(config, "image_processor_type", None)
        if hasattr(config, "auto_map") and "AutoImageProcessor" in config.auto_map:
            image_processor_auto_map = config.auto_map["AutoImageProcessor"]

    if image_processor_class is not None:
        image_processor_class = image_processor_class_from_name(image_processor_class)

    if image_processor_class is not None:
        return image_processor_class.from_dict(config_dict, **kwargs)
    # Last try: we use the IMAGE_PROCESSOR_MAPPING.
    if type(config) in IMAGE_PROCESSOR_MAPPING:
        image_processor_class = IMAGE_PROCESSOR_MAPPING[type(config)]
        return image_processor_class.from_dict(config_dict, **kwargs)

    raise ValueError(
        f"Unrecognized image processor in {pretrained_model_name_or_path}. Should have a "
        f"`image_processor_type` key in its {IMAGE_PROCESSOR_NAME} of {CONFIG_NAME}, or one of the following "
        f"`model_type` keys in its {CONFIG_NAME}: {', '.join(c for c in IMAGE_PROCESSOR_MAPPING_NAMES.keys())}"
    )

mindnlp.transformers.models.auto.image_processing_auto.AutoImageProcessor.register(config_class, image_processor_class, exist_ok=False) staticmethod

Register a new image processor for this class.

PARAMETER DESCRIPTION
config_class

The configuration corresponding to the model to register.

TYPE: [`PretrainedConfig`]

image_processor_class

The image processor to register.

TYPE: [`ImageProcessingMixin`]

Source code in mindnlp\transformers\models\auto\image_processing_auto.py
434
435
436
437
438
439
440
441
442
443
444
@staticmethod
def register(config_class, image_processor_class, exist_ok=False):
    """
    Register a new image processor for this class.

    Args:
        config_class ([`PretrainedConfig`]):
            The configuration corresponding to the model to register.
        image_processor_class ([`ImageProcessingMixin`]): The image processor to register.
    """
    IMAGE_PROCESSOR_MAPPING.register(config_class, image_processor_class, exist_ok=exist_ok)

mindnlp.transformers.models.auto.processing_auto.PROCESSOR_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, PROCESSOR_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.processing_auto.AutoProcessor

This is a generic processor class that will be instantiated as one of the processor classes of the library when created with the [AutoProcessor.from_pretrained] class method.

This class cannot be instantiated directly using __init__() (throws an error).

Source code in mindnlp\transformers\models\auto\processing_auto.py
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
class AutoProcessor:
    r"""
    This is a generic processor class that will be instantiated as one of the processor classes of the library when
    created with the [`AutoProcessor.from_pretrained`] class method.

    This class cannot be instantiated directly using `__init__()` (throws an error).
    """

    def __init__(self):
        """
        Class: AutoProcessor

        __init__(self)
            Initializes a new instance of the AutoProcessor class.

        Args:
            self (object): The instance of the AutoProcessor class.

        Returns:
            None.

        Raises:
            EnvironmentError: This method raises an EnvironmentError with the message 'AutoProcessor is designed to be
            instantiated using the `AutoProcessor.from_pretrained(pretrained_model_name_or_path)`method.'

        """
        raise EnvironmentError(
            "AutoProcessor is designed to be instantiated "
            "using the `AutoProcessor.from_pretrained(pretrained_model_name_or_path)` method."
        )

    @classmethod
    @replace_list_option_in_docstrings(PROCESSOR_MAPPING_NAMES)
    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
        r"""
        Instantiate one of the processor classes of the library from a pretrained model vocabulary.

        The processor class to instantiate is selected based on the `model_type` property of the config object (either
        passed as an argument or loaded from `pretrained_model_name_or_path` if possible):

        List options

        Params:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                This can be either:

               - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
                  hf-mirror.com.
               - a path to a *directory* containing a processor files saved using the `save_pretrained()` method,
                  e.g., `./my_model_directory/`.
            cache_dir (`str` or `os.PathLike`, *optional*):
                Path to a directory in which a downloaded pretrained model feature extractor should be cached if the
                standard cache should not be used.
            force_download (`bool`, *optional*, defaults to `False`):
                Whether or not to force to (re-)download the feature extractor files and override the cached versions
                if they exist.
            resume_download (`bool`, *optional*, defaults to `False`):
                Whether or not to delete incompletely received file. Attempts to resume the download if such a file
                exists.
            proxies (`Dict[str, str]`, *optional*):
                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
                'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
            token (`str` or *bool*, *optional*):
                The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
                when running `huggingface-cli login` (stored in `~/.huggingface`).
            revision (`str`, *optional*, defaults to `"main"`):
                The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
                git-based system for storing models and other artifacts on hf-mirror.com, so `revision` can be any
                identifier allowed by git.
            return_unused_kwargs (`bool`, *optional*, defaults to `False`):
                If `False`, then this function returns just the final feature extractor object. If `True`, then this
                functions returns a `Tuple(feature_extractor, unused_kwargs)` where *unused_kwargs* is a dictionary
                consisting of the key/value pairs whose keys are not feature extractor attributes: i.e., the part of
                `kwargs` which has not been used to update `feature_extractor` and is otherwise ignored.
            trust_remote_code (`bool`, *optional*, defaults to `False`):
                Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
                should only be set to `True` for repositories you trust and in which you have read the code, as it will
                execute code present on the Hub on your local machine.
            kwargs (`Dict[str, Any]`, *optional*):
                The values in kwargs of any keys which are feature extractor attributes will be used to override the
                loaded values. Behavior concerning key/value pairs whose keys are *not* feature extractor attributes is
                controlled by the `return_unused_kwargs` keyword parameter.

        <Tip>

        Passing `token=True` is required when you want to use a private model.

        </Tip>

        Example:
            ```python
            >>> from transformers import AutoProcessor
            ...
            >>> # Download processor from hf-mirror.com and cache.
            >>> processor = AutoProcessor.from_pretrained("facebook/wav2vec2-base-960h")
            ...
            >>> # If processor files are in a directory (e.g. processor was saved using *save_pretrained('./test/saved_model/')*)
            >>> # processor = AutoProcessor.from_pretrained("./test/saved_model/")
            ```
        """
        use_auth_token = kwargs.pop("use_auth_token", None)
        if use_auth_token is not None:
            warnings.warn(
                "The `use_auth_token` argument is deprecated. Please use `token` instead.",
                FutureWarning,
            )
            if kwargs.get("token", None) is not None:
                raise ValueError(
                    "`token` and `use_auth_token` are both specified. Please set only the argument `token`."
                )
            kwargs["token"] = use_auth_token

        config = kwargs.pop("config", None)
        trust_remote_code = kwargs.pop("trust_remote_code", None)
        kwargs["_from_auto"] = True

        processor_class = None

        # First, let's see if we have a processor or preprocessor config.
        # Filter the kwargs for `get_file_from_repo`.
        get_file_from_repo_kwargs = {
            key: kwargs[key]
            for key in inspect.signature(get_file_from_repo).parameters.keys()
            if key in kwargs
        }

        # Let's start by checking whether the processor class is saved in a processor config
        processor_config_file = get_file_from_repo(
            pretrained_model_name_or_path, PROCESSOR_NAME, **get_file_from_repo_kwargs
        )
        if processor_config_file is not None:
            config_dict, _ = ProcessorMixin.get_processor_dict(
                pretrained_model_name_or_path, **kwargs
            )
            processor_class = config_dict.get("processor_class", None)

        if processor_class is None:
            # If not found, let's check whether the processor class is saved in an image processor config
            preprocessor_config_file = get_file_from_repo(
                pretrained_model_name_or_path,
                FEATURE_EXTRACTOR_NAME,
                **get_file_from_repo_kwargs,
            )
            if preprocessor_config_file is not None:
                config_dict, _ = ImageProcessingMixin.get_image_processor_dict(
                    pretrained_model_name_or_path, **kwargs
                )
                processor_class = config_dict.get("processor_class", None)

            # If not found, let's check whether the processor class is saved in a feature extractor config
            if preprocessor_config_file is not None and processor_class is None:
                config_dict, _ = FeatureExtractionMixin.get_feature_extractor_dict(
                    pretrained_model_name_or_path, **kwargs
                )
                processor_class = config_dict.get("processor_class", None)

        if processor_class is None:
            # Next, let's check whether the processor class is saved in a tokenizer
            tokenizer_config_file = get_file_from_repo(
                pretrained_model_name_or_path,
                TOKENIZER_CONFIG_FILE,
                **get_file_from_repo_kwargs,
            )
            if tokenizer_config_file is not None:
                with open(tokenizer_config_file, encoding="utf-8") as reader:
                    config_dict = json.load(reader)

                processor_class = config_dict.get("processor_class", None)

        if processor_class is None:
            # Otherwise, load config, if it can be loaded.
            if not isinstance(config, PretrainedConfig):
                config = AutoConfig.from_pretrained(
                    pretrained_model_name_or_path,
                    trust_remote_code=trust_remote_code,
                    **kwargs,
                )

            # And check if the config contains the processor class.
            processor_class = getattr(config, "processor_class", None)

        if processor_class is not None:
            processor_class = processor_class_from_name(processor_class)

        if processor_class is not None:
            return processor_class.from_pretrained(
                pretrained_model_name_or_path,
                trust_remote_code=trust_remote_code,
                **kwargs,
            )
        # Last try: we use the PROCESSOR_MAPPING.
        if type(config) in PROCESSOR_MAPPING:
            return PROCESSOR_MAPPING[type(config)].from_pretrained(
                pretrained_model_name_or_path, **kwargs
            )

        # At this stage, there doesn't seem to be a `Processor` class available for this model, so let's try a
        # tokenizer.
        try:
            return AutoTokenizer.from_pretrained(
                pretrained_model_name_or_path,
                trust_remote_code=trust_remote_code,
                **kwargs,
            )
        except Exception:
            try:
                return AutoImageProcessor.from_pretrained(
                    pretrained_model_name_or_path,
                    trust_remote_code=trust_remote_code,
                    **kwargs,
                )
            except Exception:
                pass

            try:
                return AutoFeatureExtractor.from_pretrained(
                    pretrained_model_name_or_path,
                    trust_remote_code=trust_remote_code,
                    **kwargs,
                )
            except Exception:
                pass

        raise ValueError(
            f"Unrecognized processing class in {pretrained_model_name_or_path}. Can't instantiate a processor, a "
            "tokenizer, an image processor or a feature extractor for this model. Make sure the repository contains "
            "the files of at least one of those processing classes."
        )

    @staticmethod
    def register(config_class, processor_class, exist_ok=False):
        """
        Register a new processor for this class.

        Args:
            config_class ([`PretrainedConfig`]):
                The configuration corresponding to the model to register.
            processor_class ([`FeatureExtractorMixin`]): The processor to register.
        """
        PROCESSOR_MAPPING.register(config_class, processor_class, exist_ok=exist_ok)

mindnlp.transformers.models.auto.processing_auto.AutoProcessor.__init__()

init(self) Initializes a new instance of the AutoProcessor class.

PARAMETER DESCRIPTION
self

The instance of the AutoProcessor class.

TYPE: object

RETURNS DESCRIPTION

None.

RAISES DESCRIPTION
EnvironmentError

This method raises an EnvironmentError with the message 'AutoProcessor is designed to be

Source code in mindnlp\transformers\models\auto\processing_auto.py
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
def __init__(self):
    """
    Class: AutoProcessor

    __init__(self)
        Initializes a new instance of the AutoProcessor class.

    Args:
        self (object): The instance of the AutoProcessor class.

    Returns:
        None.

    Raises:
        EnvironmentError: This method raises an EnvironmentError with the message 'AutoProcessor is designed to be
        instantiated using the `AutoProcessor.from_pretrained(pretrained_model_name_or_path)`method.'

    """
    raise EnvironmentError(
        "AutoProcessor is designed to be instantiated "
        "using the `AutoProcessor.from_pretrained(pretrained_model_name_or_path)` method."
    )

mindnlp.transformers.models.auto.processing_auto.AutoProcessor.from_pretrained(pretrained_model_name_or_path, **kwargs) classmethod

Instantiate one of the processor classes of the library from a pretrained model vocabulary.

The processor class to instantiate is selected based on the model_type property of the config object (either passed as an argument or loaded from pretrained_model_name_or_path if possible):

List options

PARAMETER DESCRIPTION
pretrained_model_name_or_path

This can be either:

  • a string, the model id of a pretrained feature_extractor hosted inside a model repo on hf-mirror.com.
  • a path to a directory containing a processor files saved using the save_pretrained() method, e.g., ./my_model_directory/.

TYPE: `str` or `os.PathLike`

cache_dir

Path to a directory in which a downloaded pretrained model feature extractor should be cached if the standard cache should not be used.

TYPE: `str` or `os.PathLike`, *optional*

force_download

Whether or not to force to (re-)download the feature extractor files and override the cached versions if they exist.

TYPE: `bool`, *optional*, defaults to `False`

resume_download

Whether or not to delete incompletely received file. Attempts to resume the download if such a file exists.

TYPE: `bool`, *optional*, defaults to `False`

proxies

A dictionary of proxy servers to use by protocol or endpoint, e.g., {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}. The proxies are used on each request.

TYPE: `Dict[str, str]`, *optional*

token

The token to use as HTTP bearer authorization for remote files. If True, will use the token generated when running huggingface-cli login (stored in ~/.huggingface).

TYPE: `str` or *bool*, *optional*

revision

The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a git-based system for storing models and other artifacts on hf-mirror.com, so revision can be any identifier allowed by git.

TYPE: `str`, *optional*, defaults to `"main"`

return_unused_kwargs

If False, then this function returns just the final feature extractor object. If True, then this functions returns a Tuple(feature_extractor, unused_kwargs) where unused_kwargs is a dictionary consisting of the key/value pairs whose keys are not feature extractor attributes: i.e., the part of kwargs which has not been used to update feature_extractor and is otherwise ignored.

TYPE: `bool`, *optional*, defaults to `False`

trust_remote_code

Whether or not to allow for custom models defined on the Hub in their own modeling files. This option should only be set to True for repositories you trust and in which you have read the code, as it will execute code present on the Hub on your local machine.

TYPE: `bool`, *optional*, defaults to `False`

kwargs

The values in kwargs of any keys which are feature extractor attributes will be used to override the loaded values. Behavior concerning key/value pairs whose keys are not feature extractor attributes is controlled by the return_unused_kwargs keyword parameter.

TYPE: `Dict[str, Any]`, *optional* DEFAULT: {}

Passing token=True is required when you want to use a private model.

Example
>>> from transformers import AutoProcessor
...
>>> # Download processor from hf-mirror.com and cache.
>>> processor = AutoProcessor.from_pretrained("facebook/wav2vec2-base-960h")
...
>>> # If processor files are in a directory (e.g. processor was saved using *save_pretrained('./test/saved_model/')*)
>>> # processor = AutoProcessor.from_pretrained("./test/saved_model/")
Source code in mindnlp\transformers\models\auto\processing_auto.py
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
@classmethod
@replace_list_option_in_docstrings(PROCESSOR_MAPPING_NAMES)
def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
    r"""
    Instantiate one of the processor classes of the library from a pretrained model vocabulary.

    The processor class to instantiate is selected based on the `model_type` property of the config object (either
    passed as an argument or loaded from `pretrained_model_name_or_path` if possible):

    List options

    Params:
        pretrained_model_name_or_path (`str` or `os.PathLike`):
            This can be either:

           - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
              hf-mirror.com.
           - a path to a *directory* containing a processor files saved using the `save_pretrained()` method,
              e.g., `./my_model_directory/`.
        cache_dir (`str` or `os.PathLike`, *optional*):
            Path to a directory in which a downloaded pretrained model feature extractor should be cached if the
            standard cache should not be used.
        force_download (`bool`, *optional*, defaults to `False`):
            Whether or not to force to (re-)download the feature extractor files and override the cached versions
            if they exist.
        resume_download (`bool`, *optional*, defaults to `False`):
            Whether or not to delete incompletely received file. Attempts to resume the download if such a file
            exists.
        proxies (`Dict[str, str]`, *optional*):
            A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
            'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
        token (`str` or *bool*, *optional*):
            The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
            when running `huggingface-cli login` (stored in `~/.huggingface`).
        revision (`str`, *optional*, defaults to `"main"`):
            The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
            git-based system for storing models and other artifacts on hf-mirror.com, so `revision` can be any
            identifier allowed by git.
        return_unused_kwargs (`bool`, *optional*, defaults to `False`):
            If `False`, then this function returns just the final feature extractor object. If `True`, then this
            functions returns a `Tuple(feature_extractor, unused_kwargs)` where *unused_kwargs* is a dictionary
            consisting of the key/value pairs whose keys are not feature extractor attributes: i.e., the part of
            `kwargs` which has not been used to update `feature_extractor` and is otherwise ignored.
        trust_remote_code (`bool`, *optional*, defaults to `False`):
            Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
            should only be set to `True` for repositories you trust and in which you have read the code, as it will
            execute code present on the Hub on your local machine.
        kwargs (`Dict[str, Any]`, *optional*):
            The values in kwargs of any keys which are feature extractor attributes will be used to override the
            loaded values. Behavior concerning key/value pairs whose keys are *not* feature extractor attributes is
            controlled by the `return_unused_kwargs` keyword parameter.

    <Tip>

    Passing `token=True` is required when you want to use a private model.

    </Tip>

    Example:
        ```python
        >>> from transformers import AutoProcessor
        ...
        >>> # Download processor from hf-mirror.com and cache.
        >>> processor = AutoProcessor.from_pretrained("facebook/wav2vec2-base-960h")
        ...
        >>> # If processor files are in a directory (e.g. processor was saved using *save_pretrained('./test/saved_model/')*)
        >>> # processor = AutoProcessor.from_pretrained("./test/saved_model/")
        ```
    """
    use_auth_token = kwargs.pop("use_auth_token", None)
    if use_auth_token is not None:
        warnings.warn(
            "The `use_auth_token` argument is deprecated. Please use `token` instead.",
            FutureWarning,
        )
        if kwargs.get("token", None) is not None:
            raise ValueError(
                "`token` and `use_auth_token` are both specified. Please set only the argument `token`."
            )
        kwargs["token"] = use_auth_token

    config = kwargs.pop("config", None)
    trust_remote_code = kwargs.pop("trust_remote_code", None)
    kwargs["_from_auto"] = True

    processor_class = None

    # First, let's see if we have a processor or preprocessor config.
    # Filter the kwargs for `get_file_from_repo`.
    get_file_from_repo_kwargs = {
        key: kwargs[key]
        for key in inspect.signature(get_file_from_repo).parameters.keys()
        if key in kwargs
    }

    # Let's start by checking whether the processor class is saved in a processor config
    processor_config_file = get_file_from_repo(
        pretrained_model_name_or_path, PROCESSOR_NAME, **get_file_from_repo_kwargs
    )
    if processor_config_file is not None:
        config_dict, _ = ProcessorMixin.get_processor_dict(
            pretrained_model_name_or_path, **kwargs
        )
        processor_class = config_dict.get("processor_class", None)

    if processor_class is None:
        # If not found, let's check whether the processor class is saved in an image processor config
        preprocessor_config_file = get_file_from_repo(
            pretrained_model_name_or_path,
            FEATURE_EXTRACTOR_NAME,
            **get_file_from_repo_kwargs,
        )
        if preprocessor_config_file is not None:
            config_dict, _ = ImageProcessingMixin.get_image_processor_dict(
                pretrained_model_name_or_path, **kwargs
            )
            processor_class = config_dict.get("processor_class", None)

        # If not found, let's check whether the processor class is saved in a feature extractor config
        if preprocessor_config_file is not None and processor_class is None:
            config_dict, _ = FeatureExtractionMixin.get_feature_extractor_dict(
                pretrained_model_name_or_path, **kwargs
            )
            processor_class = config_dict.get("processor_class", None)

    if processor_class is None:
        # Next, let's check whether the processor class is saved in a tokenizer
        tokenizer_config_file = get_file_from_repo(
            pretrained_model_name_or_path,
            TOKENIZER_CONFIG_FILE,
            **get_file_from_repo_kwargs,
        )
        if tokenizer_config_file is not None:
            with open(tokenizer_config_file, encoding="utf-8") as reader:
                config_dict = json.load(reader)

            processor_class = config_dict.get("processor_class", None)

    if processor_class is None:
        # Otherwise, load config, if it can be loaded.
        if not isinstance(config, PretrainedConfig):
            config = AutoConfig.from_pretrained(
                pretrained_model_name_or_path,
                trust_remote_code=trust_remote_code,
                **kwargs,
            )

        # And check if the config contains the processor class.
        processor_class = getattr(config, "processor_class", None)

    if processor_class is not None:
        processor_class = processor_class_from_name(processor_class)

    if processor_class is not None:
        return processor_class.from_pretrained(
            pretrained_model_name_or_path,
            trust_remote_code=trust_remote_code,
            **kwargs,
        )
    # Last try: we use the PROCESSOR_MAPPING.
    if type(config) in PROCESSOR_MAPPING:
        return PROCESSOR_MAPPING[type(config)].from_pretrained(
            pretrained_model_name_or_path, **kwargs
        )

    # At this stage, there doesn't seem to be a `Processor` class available for this model, so let's try a
    # tokenizer.
    try:
        return AutoTokenizer.from_pretrained(
            pretrained_model_name_or_path,
            trust_remote_code=trust_remote_code,
            **kwargs,
        )
    except Exception:
        try:
            return AutoImageProcessor.from_pretrained(
                pretrained_model_name_or_path,
                trust_remote_code=trust_remote_code,
                **kwargs,
            )
        except Exception:
            pass

        try:
            return AutoFeatureExtractor.from_pretrained(
                pretrained_model_name_or_path,
                trust_remote_code=trust_remote_code,
                **kwargs,
            )
        except Exception:
            pass

    raise ValueError(
        f"Unrecognized processing class in {pretrained_model_name_or_path}. Can't instantiate a processor, a "
        "tokenizer, an image processor or a feature extractor for this model. Make sure the repository contains "
        "the files of at least one of those processing classes."
    )

mindnlp.transformers.models.auto.processing_auto.AutoProcessor.register(config_class, processor_class, exist_ok=False) staticmethod

Register a new processor for this class.

PARAMETER DESCRIPTION
config_class

The configuration corresponding to the model to register.

TYPE: [`PretrainedConfig`]

processor_class

The processor to register.

TYPE: [`FeatureExtractorMixin`]

Source code in mindnlp\transformers\models\auto\processing_auto.py
376
377
378
379
380
381
382
383
384
385
386
@staticmethod
def register(config_class, processor_class, exist_ok=False):
    """
    Register a new processor for this class.

    Args:
        config_class ([`PretrainedConfig`]):
            The configuration corresponding to the model to register.
        processor_class ([`FeatureExtractorMixin`]): The processor to register.
    """
    PROCESSOR_MAPPING.register(config_class, processor_class, exist_ok=exist_ok)

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_AUDIO_FRAME_CLASSIFICATION_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_AUDIO_FRAME_CLASSIFICATION_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_AUDIO_XVECTOR_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_AUDIO_XVECTOR_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_BACKBONE_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_BACKBONE_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_CAUSAL_LM_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_CAUSAL_LM_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_CTC_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_CTC_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_DEPTH_ESTIMATION_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_IMAGE_TO_IMAGE_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_IMAGE_TO_IMAGE_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_MASK_GENERATION_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_MASK_GENERATION_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_MASKED_LM_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_MASKED_LM_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_MULTIPLE_CHOICE_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_MULTIPLE_CHOICE_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_OBJECT_DETECTION_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_OBJECT_DETECTION_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_PRETRAINING_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_PRETRAINING_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_QUESTION_ANSWERING_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_TEXT_ENCODING_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_TEXT_ENCODING_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_TIME_SERIES_CLASSIFICATION_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_TIME_SERIES_CLASSIFICATION_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_TIME_SERIES_REGRESSION_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_TIME_SERIES_REGRESSION_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_TEXT_TO_SPECTROGRAM_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_TEXT_TO_SPECTROGRAM_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_TEXT_TO_WAVEFORM_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_TEXT_TO_WAVEFORM_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_VISION_2_SEQ_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_WITH_LM_HEAD_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_WITH_LM_HEAD_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.AutoBackbone

Bases: _BaseAutoBackboneClass

Source code in mindnlp\transformers\models\auto\modeling_auto.py
1732
1733
class AutoBackbone(_BaseAutoBackboneClass):
    _model_mapping = MODEL_FOR_BACKBONE_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModel = auto_class_update(AutoModel) module-attribute

mindnlp.transformers.models.auto.modeling_auto.AutoModelForAudioClassification = auto_class_update(AutoModelForAudioClassification, head_doc='audio classification') module-attribute

mindnlp.transformers.models.auto.modeling_auto.AutoModelForAudioFrameClassification = auto_class_update(AutoModelForAudioFrameClassification, head_doc='audio frame (token) classification') module-attribute

mindnlp.transformers.models.auto.modeling_auto.AutoModelForAudioXVector = auto_class_update(AutoModelForAudioXVector, head_doc='audio retrieval via x-vector') module-attribute

mindnlp.transformers.models.auto.modeling_auto.AutoModelForCausalLM = auto_class_update(AutoModelForCausalLM, head_doc='causal language modeling') module-attribute

mindnlp.transformers.models.auto.modeling_auto.AutoModelForCTC = auto_class_update(AutoModelForCTC, head_doc='connectionist temporal classification') module-attribute

mindnlp.transformers.models.auto.modeling_auto.AutoModelForDepthEstimation = auto_class_update(AutoModelForDepthEstimation, head_doc='depth estimation') module-attribute

mindnlp.transformers.models.auto.modeling_auto.AutoModelForDocumentQuestionAnswering = auto_class_update(AutoModelForDocumentQuestionAnswering, head_doc='document question answering', checkpoint_for_example='impira/layoutlm-document-qa", revision="52e01b3') module-attribute

mindnlp.transformers.models.auto.modeling_auto.AutoModelForImageToImage

Bases: _BaseAutoModelClass

Source code in mindnlp\transformers\models\auto\modeling_auto.py
1478
1479
class AutoModelForImageToImage(_BaseAutoModelClass):
    _model_mapping = MODEL_FOR_IMAGE_TO_IMAGE_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModelForInstanceSegmentation = auto_class_update(AutoModelForInstanceSegmentation, head_doc='instance segmentation') module-attribute

mindnlp.transformers.models.auto.modeling_auto.AutoModelForMaskedLM = auto_class_update(AutoModelForMaskedLM, head_doc='masked language modeling') module-attribute

mindnlp.transformers.models.auto.modeling_auto.AutoModelForMaskGeneration

Bases: _BaseAutoModelClass

Source code in mindnlp\transformers\models\auto\modeling_auto.py
1466
1467
class AutoModelForMaskGeneration(_BaseAutoModelClass):
    _model_mapping = MODEL_FOR_MASK_GENERATION_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModelForMultipleChoice = auto_class_update(AutoModelForMultipleChoice, head_doc='multiple choice') module-attribute

mindnlp.transformers.models.auto.modeling_auto.AutoModelForNextSentencePrediction = auto_class_update(AutoModelForNextSentencePrediction, head_doc='next sentence prediction') module-attribute

mindnlp.transformers.models.auto.modeling_auto.AutoModelForObjectDetection = auto_class_update(AutoModelForObjectDetection, head_doc='object detection') module-attribute

mindnlp.transformers.models.auto.modeling_auto.AutoModelForPreTraining = auto_class_update(AutoModelForPreTraining, head_doc='pretraining') module-attribute

mindnlp.transformers.models.auto.modeling_auto.AutoModelForQuestionAnswering = auto_class_update(AutoModelForQuestionAnswering, head_doc='question answering') module-attribute

mindnlp.transformers.models.auto.modeling_auto.AutoModelForSeq2SeqLM = auto_class_update(AutoModelForSeq2SeqLM, head_doc='sequence-to-sequence language modeling', checkpoint_for_example='google-t5/t5-base') module-attribute

mindnlp.transformers.models.auto.modeling_auto.AutoModelForSequenceClassification = auto_class_update(AutoModelForSequenceClassification, head_doc='sequence classification') module-attribute

mindnlp.transformers.models.auto.modeling_auto.AutoModelForSpeechSeq2Seq = auto_class_update(AutoModelForSpeechSeq2Seq, head_doc='sequence-to-sequence speech-to-text modeling') module-attribute

mindnlp.transformers.models.auto.modeling_auto.AutoModelForTableQuestionAnswering = auto_class_update(AutoModelForTableQuestionAnswering, head_doc='table question answering', checkpoint_for_example='google/tapas-base-finetuned-wtq') module-attribute

mindnlp.transformers.models.auto.modeling_auto.AutoModelForTextEncoding

Bases: _BaseAutoModelClass

Source code in mindnlp\transformers\models\auto\modeling_auto.py
1474
1475
class AutoModelForTextEncoding(_BaseAutoModelClass):
    _model_mapping = MODEL_FOR_TEXT_ENCODING_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModelForTextToSpectrogram

Bases: _BaseAutoModelClass

Source code in mindnlp\transformers\models\auto\modeling_auto.py
1724
1725
class AutoModelForTextToSpectrogram(_BaseAutoModelClass):
    _model_mapping = MODEL_FOR_TEXT_TO_SPECTROGRAM_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModelForTextToWaveform

Bases: _BaseAutoModelClass

Source code in mindnlp\transformers\models\auto\modeling_auto.py
1728
1729
class AutoModelForTextToWaveform(_BaseAutoModelClass):
    _model_mapping = MODEL_FOR_TEXT_TO_WAVEFORM_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModelForTokenClassification = auto_class_update(AutoModelForTokenClassification, head_doc='token classification') module-attribute

mindnlp.transformers.models.auto.modeling_auto.AutoModelForUniversalSegmentation = auto_class_update(AutoModelForUniversalSegmentation, head_doc='universal image segmentation') module-attribute

mindnlp.transformers.models.auto.modeling_auto.AutoModelForVideoClassification = auto_class_update(AutoModelForVideoClassification, head_doc='video classification') module-attribute

mindnlp.transformers.models.auto.modeling_auto.AutoModelForVision2Seq = auto_class_update(AutoModelForVision2Seq, head_doc='vision-to-text modeling') module-attribute

mindnlp.transformers.models.auto.modeling_auto.AutoModelForVisualQuestionAnswering = auto_class_update(AutoModelForVisualQuestionAnswering, head_doc='visual question answering', checkpoint_for_example='dandelin/vilt-b32-finetuned-vqa') module-attribute

mindnlp.transformers.models.auto.modeling_auto.AutoModelForZeroShotImageClassification = auto_class_update(AutoModelForZeroShotImageClassification, head_doc='zero-shot image classification') module-attribute

mindnlp.transformers.models.auto.modeling_auto.AutoModelForZeroShotObjectDetection = auto_class_update(AutoModelForZeroShotObjectDetection, head_doc='zero-shot object detection') module-attribute

mindnlp.transformers.models.auto.modeling_auto.AutoModelWithLMHead

Bases: _AutoModelWithLMHead

Source code in mindnlp\transformers\models\auto\modeling_auto.py
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
class AutoModelWithLMHead(_AutoModelWithLMHead):
    @classmethod
    def from_config(cls, config):
        warnings.warn(
            "The class `AutoModelWithLMHead` is deprecated. Please use "
            "`AutoModelForCausalLM` for causal language models, `AutoModelForMaskedLM` for masked language models and "
            "`AutoModelForSeq2SeqLM` for encoder-decoder models.",
            FutureWarning,
        )
        return super().from_config(config)

    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
        warnings.warn(
            "The class `AutoModelWithLMHead` is deprecated. Please use "
            "`AutoModelForCausalLM` for causal language models, `AutoModelForMaskedLM` for masked language models and "
            "`AutoModelForSeq2SeqLM` for encoder-decoder models.",
            FutureWarning,
        )
        return super().from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs)