
    h                     "   d Z ddlZddlZddlZddlZddlmZ ddlmZm	Z	m
Z
 ddlmZ ddlmZ ddlmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZmZmZmZmZ ddlm Z  ddl!m"Z" ddl#m$Z$m%Z%m&Z&m'Z'm(Z(  e       rddl)m*Z* ndZ* ejV                  e,      Z- ee.e/e	e.   e	e.   f   f   g dd e       rdndffd e       rdnd e       rdndffdd e       rdndffdd e       rdndffdd e       rdndffd d e       rd!ndffd"d e       rdndffd#d$ e       rd%nd e       rd&ndffd'd(d e       rdndffd) e       rd*nddffd+d,d- e       rd.nd e       rd/ndffd0d1 e       rd2ndffd3d4d e       rd5ndffd6d7d8d e       rdndffd9d: e       rd;ndffd<d e       rd=ndffd>d e       rd5ndffd?d@ e       rdAndffdBd e       rdndffdCdD e       rdEnd e       rdFndffdGdH e       rdnd e       rdndffdId e       rdndffdJd@ e       rdAndffdKd e       rdndffdLd e       rdndffdMdN e       rdOnd e       rdPndffdQdR e       rdSndffdTd e       rd!ndffdUd e       rd!ndffdVd e       rdndffdWdX e       rdYndffdZd[ e       rd\ndffd] e       rd^nd e       rd_ndffd`dad e       rd5ndffdbdcddd@ e       rdAndffded: e       rd;ndffdfdg e       rdhndffdi e       rdjnd e       rdkndffdl e       rdnd e       rdndffdm e       rdnd e       rdndffdn e       rdnd e       rdndffdo e       rdnd e       rdndffdpdq e       rdnd e       rdndffdrds e       rdtndffdudv e       rdwndffdxdy e       rdzndffd{d: e       rd;ndffd|d e       rdndffd}d e       rdndffd~d e       rdndffd e       rdnddffdd e       rd:nd e       rd;ndffdd e       rd5ndffdd e       rdndffd e       rdnddffddd e       rd;ndffdd e       rdndffddd e       rdndffd e       rdnd e       rdndffd e       rdnd e       rdndffd e       rdnd e       rdndffd e       rdnd e       rdndffd e       rdnd e       rdndffd e       rdnd e       rdndffdd e       rdndffdd e       rd5ndffdd e       rd5ndffdd e       rd5ndffdd e       rd5ndffdd e       rd5ndffd e       rdnddffdd: e       rd;ndffdd: e       rd;ndffdd: e       rd;ndffdd e       rdndffddd e       rd5ndffdd: e       rd;ndffddddddd e       rdndffdd e       rdndffdd e       rd5ndffdd e       rdndffddd@ e       rdAndffdd e       rdndffdd e       rdndffdd e       rdndffdd: e       rd;ndffdd: e       rd;ndffddX e       rdYndffd e       rdnd e       rdndffdd e       rdndffd e       rdnd e       rdndffdd e       rdnd e       rdndffdd e       rd5ndffdd e       rdndffdd e       rdndffdd e       rdndffdd e       rdndffdd e       rdndffdd e       rdndffd e       rdnd e       rdndffd e       rdnd e       rdndffd e       rdnd e       rdndffdd e       rdndffdd e       rdndffdd e       rdndffdd e       rdndffdd e       rdndffd e       rdnd e       rdndffddd e       rdndffd e       rdnddffdd e       rdndffdd e       rdndffd e       rdnddffd e       rdnd e       rdndffd e       rdnd e       rdndffdd@ e       rdAndffdd e       rdndffdd e       rdndffdd e       rd:nd e       rd;ndffd e       rdn
 e       rdnd e       r	 e       sdndffd e       rdn
 e       rdnd e       r	 e       sdndffdd e       rdndffd e       rdnddffdd e       rdndffdd e       rdndffdd e       rd5ndffd d e       rd5ndffdd e       rd5ndffdd e       rdndffdd e       rdndffdd@ e       rdAndffd e       rdnd e       rd	ndffd
d e       rdndffdd e       rdndffdd e       rdndffddd e       rd5ndffdd e       rdndffd e       rdnd e       rdndffd e       rdnd e       rdndffd e       rdnd e       rdndffdd e       rdndffdd e       rdndffdd e       rd;ndffdd e       rdndffdd e       rdndffdd e       rdndffdd e       rdndffd d: e       rd;ndffd!d e       rdndffd"d e       rdndffd#d e       rdndffd$d% e       rd1nd e       rd2ndffd& e       rd1nd e       rd2ndffd'd( e       rdnd e       rdndffd)dR e       rdSndffd*d e       rdndffd+d e       rdndffd,d-d e       rdndffd.d e       rdn
 e       rd5ndffd/ e       rd0nddffd1d2d e       rdndffd3dX e       rdYndffd4dX e       rdYndffd5dX e       rdYndffd6dX e       rdYndffd7dX e       rdYndffd8dX e       rdYndffd9dX e       rdYndffd:dX e       rdYndffd;dX e       rdYndffd<dX e       rdYndffd=dX e       rdYndffd>dX e       rdYndffd?d@dA e       rdBndffdC e       rdnd e       rdndffdD e       rdEnd e       rdFndffdG e       rdHnd e       rdIndffdJdK e       rdLndffdMd@ e       rdAndffdNd@ e       rdAndffdOdPdQ e       rdRndffdSd e       rdndffdT e       rdUnd e       rdVndffdW e       rdUnd e       rdVndffdX e       rdnd e       rdndffdY e       rdZnddffd[ e       rdnd e       rdndffd\d e       rd5ndffd] e       rd^nddffd_d` e       rdanddffdbdcdd e       rdendffdfd e       rdndffdgd: e       rd;ndffdh e       rdnd e       rdndffdi e       rdnd e       rdndffdj e       rdnd e       rdndffdkdldmdnd e       rdndffdo e       rdpnd e       rdqndffdr e       rdnd e       rdndffdsd e       rdndffdtd e       rdndffdud e       rdndffdvd e       rdndffdwdx e       rdn
 e       rdnd e       r	 e       sdndffdydzd{d|d}d~ e       rdndffdd e       rdndffd e       rdnd e       rdndffdd e       rdnddffd e       rdnd e       rdndffd e       rdnd e       rdndffd e       rdnd e       rdndffdd e       rdndffd e       rdnd e       rdndffd e       rdnd e       rdndffd e       rdnd e       rdndffd e       rdnd e       rdndff      Z0 e"e$e0      Z1 e$jd                         D  ci c]  \  } }|| 
 c}} Z3de.de
e4e   df   fdZ5	 	 	 	 	 	 	 	 dde
e.ejl                  e.   f   de	e
e.ejl                  e.   f      de7de	e7   de	e8e.e.f      de	e
e7e.f      de	e.   de7de.de8e.ef   fdZ9 G d d      Z:ddgZ;yc c}} w (  zAuto Tokenizer class.    N)OrderedDict)AnyOptionalUnion)is_mistral_common_available   )PretrainedConfig)get_class_from_dynamic_moduleresolve_trust_remote_code)load_gguf_checkpoint)PreTrainedTokenizer)TOKENIZER_CONFIG_FILE)cached_fileextract_commit_hashis_g2p_en_availableis_sentencepiece_availableis_tokenizers_availablelogging   )EncoderDecoderConfig   )_LazyAutoMapping)CONFIG_MAPPING_NAMES
AutoConfigconfig_class_to_model_typemodel_type_to_module_name!replace_list_option_in_docstrings)PreTrainedTokenizerFastaimv2CLIPTokenizerCLIPTokenizerFastalbertAlbertTokenizerAlbertTokenizerFastalignBertTokenizerBertTokenizerFastarceeLlamaTokenizerLlamaTokenizerFastaria
aya_visionCohereTokenizerFastbark)bart)BartTokenizerBartTokenizerFastbarthezBarthezTokenizerBarthezTokenizerFast)bartpho)BartphoTokenizerNbertzbert-generationBertGenerationTokenizer)zbert-japanese)BertJapaneseTokenizerN)bertweet)BertweetTokenizerNbig_birdBigBirdTokenizerBigBirdTokenizerFastbigbird_pegasusPegasusTokenizerPegasusTokenizerFast)biogpt)BioGptTokenizerNbitnetr   )
blenderbot)BlenderbotTokenizerBlenderbotTokenizerFast)zblenderbot-small)BlenderbotSmallTokenizerNblipzblip-2GPT2TokenizerGPT2TokenizerFastbloomBloomTokenizerFastbltbridgetowerRobertaTokenizerRobertaTokenizerFastbros)byt5)ByT5TokenizerN	camembertCamembertTokenizerCamembertTokenizerFast)canine)CanineTokenizerN	chameleonchinese_clipclapclipclipseg)clvp)ClvpTokenizerN
code_llamaCodeLlamaTokenizerCodeLlamaTokenizerFastcodegenCodeGenTokenizerCodeGenTokenizerFastcoherecohere2colpalicolqwen2Qwen2TokenizerQwen2TokenizerFastconvbertConvBertTokenizerConvBertTokenizerFastcpmCpmTokenizerCpmTokenizerFast)cpmant)CpmAntTokenizerNcsm)ctrl)CTRLTokenizerN)zdata2vec-audioWav2Vec2CTCTokenizerNzdata2vec-textdbrxdebertaDebertaTokenizerDebertaTokenizerFastz
deberta-v2DebertaV2TokenizerDebertaV2TokenizerFastdeepseek_v2deepseek_v3deepseek_vldeepseek_vl_hybrid)dia)DiaTokenizerN	diffllama
distilbertDistilBertTokenizerDistilBertTokenizerFastdprDPRQuestionEncoderTokenizerDPRQuestionEncoderTokenizerFastelectraElectraTokenizerElectraTokenizerFastemu3ernieernie4_5ernie4_5_moeernie_mErnieMTokenizer)esm)EsmTokenizerNexaone4falconfalcon_mambaGPTNeoXTokenizerFastfastspeech2_conformerFastSpeech2ConformerTokenizer)flaubert)FlaubertTokenizerN	flex_olmofnetFNetTokenizerFNetTokenizerFast)fsmt)FSMTTokenizerNfunnelFunnelTokenizerFunnelTokenizerFastgemmaGemmaTokenizerGemmaTokenizerFastgemma2gemma3gemma3_textgemma3ngemma3n_textgitglmglm4glm4_moeglm4v	glm4v_moezgpt-sw3GPTSw3Tokenizergpt2gpt_bigcodegpt_neogpt_neox)gpt_neox_japanese)GPTNeoXJapaneseTokenizerNgpt_ossgptj)zgptsan-japanese)GPTSanJapaneseTokenizerN)graniterJ   N)
granitemoer   )granitemoehybridr   )granitemoesharedr   zgrounding-dinogroupvitheliumherbertHerbertTokenizerHerbertTokenizerFast)hubertrx   ibertideficsidefics2idefics3instructblipinstructblipvideointernvljambajanusjetmoe)jukebox)JukeboxTokenizerNzkosmos-2XLMRobertaTokenizerXLMRobertaTokenizerFastz
kosmos-2.5layoutlmLayoutLMTokenizerLayoutLMTokenizerFast
layoutlmv2LayoutLMv2TokenizerLayoutLMv2TokenizerFast
layoutlmv3LayoutLMv3TokenizerLayoutLMv3TokenizerFast	layoutxlmLayoutXLMTokenizerLayoutXLMTokenizerFastledLEDTokenizerLEDTokenizerFastliltllamallama4llama4_textllava
llava_nextllava_next_videollava_onevision
longformerLongformerTokenizerLongformerTokenizerFastlongt5T5TokenizerT5TokenizerFast)luke)LukeTokenizerNlxmertLxmertTokenizerLxmertTokenizerFastm2m_100M2M100Tokenizermambamamba2marianMarianTokenizermbartMBartTokenizerMBartTokenizerFastmbart50MBart50TokenizerMBart50TokenizerFastmegazmegatron-bert
metaclip_2)zmgp-str)MgpstrTokenizerNminimaxmistralMistralCommonTokenizermixtralmllamamlukeMLukeTokenizerzmm-grounding-dino
mobilebertMobileBertTokenizerMobileBertTokenizerFast
modernbert	moonshinemoshimpnetMPNetTokenizerMPNetTokenizerFastmptmramt5MT5TokenizerMT5TokenizerFastmusicgenmusicgen_melodymvpMvpTokenizerMvpTokenizerFast)myt5)MyT5TokenizerNnemotronnezhanllbNllbTokenizerNllbTokenizerFastznllb-moenystromformerolmoolmo2olmo3olmoezomdet-turbo	oneformerz
openai-gptOpenAIGPTTokenizerOpenAIGPTTokenizerFastoptowlv2owlvit	paligemma)parakeet)ParakeetCTCTokenizerNpegasus	pegasus_x)	perceiver)PerceiverTokenizerN	persimmonphiphi3phimoe)phobert)PhobertTokenizerN
pix2structpixtralplbartPLBartTokenizer)
prophetnet)ProphetNetTokenizerNqdqbertqwen2qwen2_5_omni
qwen2_5_vlqwen2_audio	qwen2_moeqwen2_vlqwen3	qwen3_moe
qwen3_nextqwen3_omni_moeqwen3_vlqwen3_vl_moe)rag)RagTokenizerNrealmRealmTokenizerRealmTokenizerFastrecurrent_gemmareformerReformerTokenizerReformerTokenizerFastrembertRemBertTokenizerRemBertTokenizerFast	retribertRetriBertTokenizerRetriBertTokenizerFastrobertazroberta-prelayernorm)roc_bert)RoCBertTokenizerNroformerRoFormerTokenizerRoFormerTokenizerFastrwkvseamless_m4tSeamlessM4TTokenizerSeamlessM4TTokenizerFastseamless_m4t_v2shieldgemma2siglipSiglipTokenizersiglip2smollm3speech_to_textSpeech2TextTokenizer)speech_to_text_2)Speech2Text2TokenizerNspeecht5SpeechT5Tokenizer)splinter)SplinterTokenizerSplinterTokenizerFastsqueezebertSqueezeBertTokenizerSqueezeBertTokenizerFaststablelm
starcoder2switch_transformerst5t5gemma)tapas)TapasTokenizerN)tapex)TapexTokenizerN)z
transfo-xl)TransfoXLTokenizerNtvpudopUdopTokenizerUdopTokenizerFastumt5video_llavaviltvipllavavisual_bert)vits)VitsTokenizerNvoxtral)wav2vec2rx   )zwav2vec2-bertrx   )zwav2vec2-conformerrx   )wav2vec2_phoneme)Wav2Vec2PhonemeCTCTokenizerNwhisperWhisperTokenizerWhisperTokenizerFastxclipxglmXGLMTokenizerXGLMTokenizerFast)xlm)XLMTokenizerNzxlm-prophetnetXLMProphetNetTokenizerzxlm-robertazxlm-roberta-xlxlnetXLNetTokenizerXLNetTokenizerFastxlstmxmodyosozambazamba2
class_namereturnc                    | dk(  rt         S t        j                         D ]\  \  }}| |v st        |      }|dv r| dk(  rt	        j
                  dd      }nt	        j
                  d| d      }	 t        ||       c S  t        j                  j                         D ]  }|D ]  }t        |dd       | k(  s|c c S  ! t	        j
                  d      }t        ||       rt        ||       S y # t        $ r Y w xY w)	Nr   )r  r  	ministralr  z.tokenization_mistral_commontransformers.ztransformers.models__name__)r   TOKENIZER_MAPPING_NAMESitemsr   	importlibimport_modulegetattrAttributeErrorTOKENIZER_MAPPING_extra_contentvalueshasattr)r  module_name
tokenizersmodule	tokenizermain_modules         i/var/www/html/aiagenthome/venv/lib/python3.12/site-packages/transformers/models/auto/tokenization_auto.pytokenizer_class_from_namer    s
   ..&&#:#@#@#BZ#3K@KAAjTlFl"001OQ_`"001[M1BDYZvz22 $C (66==?
#Iy*d3zA   $ @ )).9K{J'{J// " s   0C--	C98C9pretrained_model_name_or_path	cache_dirforce_downloadresume_downloadproxiestokenrevisionlocal_files_only	subfolderc	                    |	j                  dd      }
|
)t        j                  dt               |t	        d      |
}|	j                  d      }t        | t        ||||||||ddd|      }|t        j                  d       i S t        ||      }t        |d	
      5 }t        j                  |      }ddd       |d<   |S # 1 sw Y   xY w)a  
    Loads the tokenizer configuration from a pretrained model tokenizer configuration.

    Args:
        pretrained_model_name_or_path (`str` or `os.PathLike`):
            This can be either:

            - a string, the *model id* of a pretrained model configuration hosted inside a model repo on
              huggingface.co.
            - a path to a *directory* containing a configuration file saved using the
              [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`.

        cache_dir (`str` or `os.PathLike`, *optional*):
            Path to a directory in which a downloaded pretrained model configuration should be cached if the standard
            cache should not be used.
        force_download (`bool`, *optional*, defaults to `False`):
            Whether or not to force to (re-)download the configuration files and override the cached versions if they
            exist.
        resume_download:
            Deprecated and ignored. All downloads are now resumed by default when possible.
            Will be removed in v5 of Transformers.
        proxies (`dict[str, str]`, *optional*):
            A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
            'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
        token (`str` or *bool*, *optional*):
            The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
            when running `hf auth login` (stored in `~/.huggingface`).
        revision (`str`, *optional*, defaults to `"main"`):
            The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
            git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
            identifier allowed by git.
        local_files_only (`bool`, *optional*, defaults to `False`):
            If `True`, will only try to load the tokenizer configuration from local files.
        subfolder (`str`, *optional*, defaults to `""`):
            In case the tokenizer config is located inside a subfolder of the model repo on huggingface.co, you can
            specify the folder name here.

    <Tip>

    Passing `token=True` is required when you want to use a private model.

    </Tip>

    Returns:
        `dict`: The configuration of the tokenizer.

    Examples:

    ```python
    # Download configuration from huggingface.co and cache.
    tokenizer_config = get_tokenizer_config("google-bert/bert-base-uncased")
    # This model does not have a tokenizer config so the result will be an empty dict.
    tokenizer_config = get_tokenizer_config("FacebookAI/xlm-roberta-base")

    # Save a pretrained tokenizer locally and you can reload its config
    from transformers import AutoTokenizer

    tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
    tokenizer.save_pretrained("tokenizer-test")
    tokenizer_config = get_tokenizer_config("tokenizer-test")
    ```use_auth_tokenNrThe `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.V`token` and `use_auth_token` are both specified. Please set only the argument `token`._commit_hashF)r  r  r  r  r  r  r  r   _raise_exceptions_for_gated_repo%_raise_exceptions_for_missing_entries'_raise_exceptions_for_connection_errorsr  z\Could not locate the tokenizer configuration file, will try to use the model config instead.zutf-8)encoding)popwarningswarnFutureWarning
ValueErrorgetr   r   loggerinfor   openjsonload)r  r  r  r  r  r  r  r  r  kwargsr  commit_hashresolved_config_filereaderresults                  r  get_tokenizer_configr  5  s    R ZZ 0$7N! A	
 uvv**^,K&%%'))..305   #rs	%&:KHK	"W	56" 
6(F>M 
6	5s   CCc                   N    e Zd ZdZd Ze ee      d               Ze	dd       Z
y)AutoTokenizera  
    This is a generic tokenizer class that will be instantiated as one of the tokenizer classes of the library when
    created with the [`AutoTokenizer.from_pretrained`] class method.

    This class cannot be instantiated directly using `__init__()` (throws an error).
    c                     t        d      )Nz}AutoTokenizer is designed to be instantiated using the `AutoTokenizer.from_pretrained(pretrained_model_name_or_path)` method.)OSError)selfs    r  __init__zAutoTokenizer.__init__  s    _
 	
    c           	      B	   |j                  dd      }|;t        j                  dt               |j	                  d      t        d      ||d<   |j                  dd      }d|d<   |j                  d	d      }|j                  d
d      }|j                  dd      }|j	                  d      }	|d}
t        j	                  |d      }|,t        d| ddj                  d t        D               d      |\  }}|r#|t        |      }
nt        j                  d       |
t        |      }
|
t        d| d       |
j                  |g|i |S t        |fi |}d|v r|d   |d<   |j	                  d      }d}d|v r4t        |d   t        t        f      r|d   }n|d   j	                  dd      }|t        |t               sM|	r3t#        ||	fi |}t%        |d      d   }t'        j(                  d(i |}nt'        j                  |fd|i|}|j*                  }t-        |d      rd|j.                  v r|j.                  d   }|du}t1        |      t2        v xs% |duxr t        |      duxs t        |dz         du}|r<|r|d   |d   }n|d   }d|v r|j5                  d      d   }nd}t7        |||||      }|rI|rGt9        |fi |}
|j                  dd      }|
j;                           |
j                  |g|d|i|S |[d}
|r!|j=                  d      s| d}t        |      }
|
|}t        |      }
|
t        d d        |
j                  |g|i |S t        |t>              rzt1        |j@                        t1        |jB                        urDt        j                  d!|jB                  jD                   d"|j@                  jD                   d#       |jB                  }tG        t1        |      jH                        }|Tt2        t1        |         \  }}|r|s| |j                  |g|i |S | |j                  |g|i |S t        d$      t        d%|jD                   d&dj                  d' t2        D               d      ))a]  
        Instantiate one of the tokenizer classes of the library from a pretrained model vocabulary.

        The tokenizer class to instantiate is selected based on the `model_type` property of the config object (either
        passed as an argument or loaded from `pretrained_model_name_or_path` if possible), or when it's missing, by
        falling back to using pattern matching on `pretrained_model_name_or_path`:

        List options

        Params:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                Can be either:

                    - A string, the *model id* of a predefined tokenizer hosted inside a model repo on huggingface.co.
                    - A path to a *directory* containing vocabulary files required by the tokenizer, for instance saved
                      using the [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`.
                    - A path or url to a single saved vocabulary file if and only if the tokenizer only requires a
                      single vocabulary file (like Bert or XLNet), e.g.: `./my_model_directory/vocab.txt`. (Not
                      applicable to all derived classes)
            inputs (additional positional arguments, *optional*):
                Will be passed along to the Tokenizer `__init__()` method.
            config ([`PretrainedConfig`], *optional*)
                The configuration object used to determine the tokenizer class to instantiate.
            cache_dir (`str` or `os.PathLike`, *optional*):
                Path to a directory in which a downloaded pretrained model configuration should be cached if the
                standard cache should not be used.
            force_download (`bool`, *optional*, defaults to `False`):
                Whether or not to force the (re-)download the model weights and configuration files and override the
                cached versions if they exist.
            resume_download:
                Deprecated and ignored. All downloads are now resumed by default when possible.
                Will be removed in v5 of Transformers.
            proxies (`dict[str, str]`, *optional*):
                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
                'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
            revision (`str`, *optional*, defaults to `"main"`):
                The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
                git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
                identifier allowed by git.
            subfolder (`str`, *optional*):
                In case the relevant files are located inside a subfolder of the model repo on huggingface.co (e.g. for
                facebook/rag-token-base), specify it here.
            use_fast (`bool`, *optional*, defaults to `True`):
                Use a [fast Rust-based tokenizer](https://huggingface.co/docs/tokenizers/index) if it is supported for
                a given model. If a fast tokenizer is not available for a given model, a normal Python-based tokenizer
                is returned instead.
            tokenizer_type (`str`, *optional*):
                Tokenizer type to be loaded.
            trust_remote_code (`bool`, *optional*, defaults to `False`):
                Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
                should only be set to `True` for repositories you trust and in which you have read the code, as it will
                execute code present on the Hub on your local machine.
            kwargs (additional keyword arguments, *optional*):
                Will be passed to the Tokenizer `__init__()` method. Can be used to set special tokens like
                `bos_token`, `eos_token`, `unk_token`, `sep_token`, `pad_token`, `cls_token`, `mask_token`,
                `additional_special_tokens`. See parameters in the `__init__()` for more details.

        Examples:

        ```python
        >>> from transformers import AutoTokenizer

        >>> # Download vocabulary from huggingface.co and cache.
        >>> tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")

        >>> # Download vocabulary from huggingface.co (user-uploaded) and cache.
        >>> tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-german-cased")

        >>> # If vocabulary files are in a directory (e.g. tokenizer was saved using *save_pretrained('./test/saved_model/')*)
        >>> # tokenizer = AutoTokenizer.from_pretrained("./test/bert_saved_model/")

        >>> # Download vocabulary from huggingface.co and define model-specific arguments
        >>> tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-base", add_prefix_space=True)
        ```r  Nr  r  r  configT
_from_autouse_fasttokenizer_typetrust_remote_code	gguf_filezPassed `tokenizer_type` z3 does not exist. `tokenizer_type` should be one of z, c              3       K   | ]  }|  y wN .0cs     r  	<genexpr>z0AutoTokenizer.from_pretrained.<locals>.<genexpr>  s      D,Cq,Cs   r  zt`use_fast` is set to `True` but the tokenizer class does not have a fast version.  Falling back to the slow version.zTokenizer class z is not currently imported.r  tokenizer_classauto_mapr  F)return_tensorsFastr   r   z--code_revisionz- does not exist or is not currently imported.z The encoder model config class: z3 is different from the decoder model config class: z. It is not recommended to use the `AutoTokenizer.from_pretrained()` method in this case. Please use the encoder and decoder specific tokenizer classes.zzThis tokenizer cannot be instantiated. Please make sure you have `sentencepiece` installed in order to use this tokenizer.z!Unrecognized configuration class z8 to build an AutoTokenizer.
Model type should be one of c              3   4   K   | ]  }|j                     y wr  )r  r  s     r  r  z0AutoTokenizer.from_pretrained.<locals>.<genexpr>  s     4[IZAQZZIZs   r  )%r  r  r  r  r  r  r  joinr  r  warningfrom_pretrainedr  
isinstancetuplelistr	   r   r   r   	for_modelr  r  r  typer  splitr   r
   register_for_auto_classendswithr   decoderencoder	__class__r   r  )clsr  inputsr  r  r  r  r  r  r  r  tokenizer_class_tupletokenizer_class_nametokenizer_fast_class_nametokenizer_configconfig_tokenizer_classtokenizer_auto_map	gguf_pathconfig_dicthas_remote_codehas_local_code	class_refupstream_repo_tokenizer_class_candidate
model_typetokenizer_class_pytokenizer_class_fasts                               r  r  zAutoTokenizer.from_pretrained  s   Z  $4d;%MM E zz'". l  -F7OHd+#|::j$/$4d;"JJ':DAJJ{+	 %"O$;$?$?PT$U!$, .~.>>qyy D,C DDEQH 
 ?T; ";,8&?@Y&ZONN= &";<P"Q& #34H3IId!eff2?223PdSYd]cdd 00MXQWX--%5n%EF>"!1!5!56G!H!))*:6F%5j%A"%5j%A%E%EoW[%\" ")f&67 +,I9 _X^ _I"6yQV"WX`"aK'11@K@F'775IZ^dF &,%;%;"vz*&///Q%+___%E",D8f):: 
"$. )*@AM Z,-Cf-LMUYY	 	 .q1=.q1	.q1	y  ) 5a 8 $ 9!#@.Racp! 0;IGdohnoO

?D1A3352?22-06J[_e  $/"O 6 ? ? G/E.Fd,K)";<U"V&,B)";<U"V& &'@&AAno  3?223PdSYd]cdd f23FNN#4+??6v~~7O7O6P Q%%+^^%=%=$> ?22 ^^F/V0E0EF
!7Hf7V4 4#5G5O;+;;<Ym\bmflmm%1=-==>[o^dohnoo$: 
 /0@0@/A B++/994[IZ4[+[*\\]_
 	
r  Nc                    ||t        d      |t        |t              rt        d      |t        |t              rt        d      |=|;t        |t              r+|j                  |k7  rt        d|j                   d| d      | t
        j                  v rt
        |    \  }}||}||}t
        j                  | ||f|       y)	a  
        Register a new tokenizer in this mapping.


        Args:
            config_class ([`PretrainedConfig`]):
                The configuration corresponding to the model to register.
            slow_tokenizer_class ([`PretrainedTokenizer`], *optional*):
                The slow tokenizer to register.
            fast_tokenizer_class ([`PretrainedTokenizerFast`], *optional*):
                The fast tokenizer to register.
        NzKYou need to pass either a `slow_tokenizer_class` or a `fast_tokenizer_classz:You passed a fast tokenizer in the `slow_tokenizer_class`.z:You passed a slow tokenizer in the `fast_tokenizer_class`.zThe fast tokenizer class you are passing has a `slow_tokenizer_class` attribute that is not consistent with the slow tokenizer class you passed (fast tokenizer has z and you passed z!. Fix one of those so they match!)exist_ok)r  
issubclassr   r   slow_tokenizer_classr  r  register)config_classr'  fast_tokenizer_classr%  existing_slowexisting_fasts         r  r(  zAutoTokenizer.register  s     ',@,Hjkk+
;OQh0iYZZ+
;OQd0eYZZ !,$0/1HI$99=QQ['<<==MNbMc d!!  ,;;;+<\+J(M=#+'4$#+'4$""<2FH\1]hp"qr  )NNF)r  
__module____qualname____doc__r  classmethodr   r  r  staticmethodr(  r  r  r  r  r    sH    
 &'>?`
 @ `
D )r )rr  r  r  )NFNNNNF )<r/  r  r  osr  collectionsr   typingr   r   r   transformers.utils.import_utilsr   configuration_utilsr	   dynamic_module_utilsr
   r   modeling_gguf_pytorch_utilsr   tokenization_utilsr   tokenization_utils_baser   utilsr   r   r   r   r   r   encoder_decoderr   auto_factoryr   configuration_autor   r   r   r   r   tokenization_utils_fastr   
get_loggerr  r  strr  r  r  r  CONFIG_TO_TYPEr
  r  PathLikebooldictr  r  __all__)kvs   00r  <module>rJ     s#      	  # ' ' G 3 \ ? 5 <  3 *  B" 
		H	% P+c5#1M+N&NOS'>'@#d	
S %?%A!t)@)B%	
S 
?;R;T$7Z^_`S  
#=T=V%9\`ab!S" 
"<S<U$8[_`a#S$ 
7N7P3VZ[\%S& 
/:Q:S#6Y]^_'S( 	9)S, &@&B"*A*C&	
+S8 	09S: 
/:Q:S#6Y]^_;S< 
:T:V6\`bfgh=S> 	;?S@ 	2ASD &@&B"*A*C&	
CSP 
/KbKd1GjnopQSR 	.SST 
D7N7P3VZ[\USV 	KWSX 	AYSZ 
/:Q:S#6Y]^_[S\ 
O<S<U%8[_`a]S^ 
41H1J-PTUV_S` 
4K4M0SWXYaSb 
+G^G`-CfjklcSd 
/:Q:S#6Y]^_eSf 	*gSj (B(D$$,C,E(4	
iSv 	.wSz $>$@ d(?(A$t	
ySF 
/BYB[+>aefgGSJ "*A*C&	
ISX '>'@#d	
WSf '>'@#d	
eSr 	*sSv (B(D$$,C,E(4	
uSB 
'CZC\)?bfghCSD 
D3J3L/RVWXESF 
T4K4M0SWXYGSH 
%?V?X';^bcdISJ 
&@W@Y(<_cdeKSL 
)F]F_+BeijkMSP "<">D&=&?"T	
OS\ 	.]S^ 
4K4M0SWXY_S` 	*aSb 	;cSd 
-I`Ib/EhlmneSf 
/:Q:S#6Y]^_gSh 
'CZC\)?bfghiSl (B(D$$,C,E(4	
kSz $>$@ d(?(A$t	
ySH $>$@ d(?(A$t	
GSV $>$@ d(?(A$t	
USd !$>$@ d(?(A$t	
cSp 	(qSt $>$@ d(?(A$t	
sS@ 
-LcLe/HkopqASD -5L5N1TX	
CSP 
'CZC\)?bfghQSR 
/:Q:S#6Y]^_SST 
?;R;T$7Z^_`USV 
d4K4M0SWXYWSX 
$8O8Q 4W[\]YSZ 
*D*F&DRVWX[S\ 	(]S` #:#<$'>'@#d	
_Sl 
D7N7P3VZ[\mSn 
$:Q:S 6Y]^_oSr $0C0E,4QUV	
qSx 	2ySz 
t4K4M0SWXY{S| 
/:Q:S#6Y]^_}S~ 	*S@ 
%@W@Y'<_cdeASD $>$@ d(?(A$t	
CSR $>$@ d(?(A$t	
QS` $>$@ d(?(A$t	
_Sn $>$@ d(?(A$t	
mS| $>$@ d(?(A$t	
{SJ $>$@ d(?(A$t	
ISV 
9P9R"5X\]^WSX 
4K4M0SWXYYSZ 
$5L5N1TXYZ[S\ 
d9P9R5X\]^]S^ 
46M6O2UYZ[_S` 
t:Q:S6Y]^_aSb 
*D*F&DRVWXcSd 
/:Q:S#6Y]^_eSf 
AXAZ*=`defgSh 
_=T=V&9\`abiSj 
d6M6O2UYZ[kSl 	BmSn 
T8O8Q4W[\]oSp 
/:Q:S#6Y]^_qSr 	?sSt 	-uSv 	0wSx 	6ySz 	6{S| 
OD[D]-@cghi}S~ 
o>U>W':]abcS@ 
D7N7P3VZ[\ASB 
'CZC\)?bfghCSD 	3ESF 
%AXAZ'=`defGSH 
T3J3L/RVWXISJ 
&@W@Y(<_cdeKSL 
&@W@Y(<_cdeMSN 
/BYB[+>aefgOSP 
G^G`0CfjklQSR 
&@W@Y(<_cdeSSV $>$@ d(?(A$t	
USb 
41H1J-PTUVcSf $>$@ d(?(A$t	
eSr 	0sSv )C)E%4-D-F)D	
uSB	 
;R;T7Z^_`C	SD	 
)F]F_+BeijkE	SF	 
-LcLe/HkopqG	SH	 
-LcLe/HkopqI	SJ	 
+I`Ib-EhlmnK	SL	 
7N7P!3VZ[\M	SN	 
'F]F_)BeijkO	SR	 $>$@ d(?(A$t	
Q	S`	 $>$@ d(?(A$t	
_	Sn	 $>$@ d(?(A$t	
m	Sz	 
#=T=V%9\`ab{	S|	 
(BYB[*>aefg}	S~	 
.H_Ha0Dgklm	S@
 
-G^G`/CfjklA
SB
 
-LcLe/HkopqC
SF
 !;!=4%<%>!D	
E
SR
 	*S
ST
 
%@W@Y'<_cdeU
SV
 
*D*F&DRVWXW
SX
 
43J3L/RVWXY
SZ
 
D4K4M0SWXY[
S\
 
)C)E%4QUVW]
S`
 $>$@ d(?(A$t	
_
Sn
 &@&B"*A*C&	
m
Sz
 
$@W@Y&<_cde{
S|
 
?CZC\,?bfgh}
S@ %-D-F)D	

SL 	/MSP #=#?T'>'@#d	
OS^  /0 )*D*F&D(?(AJeJg$mq		
]Sp  /0 )*D*F&D(?(AJeJg$mq		
oS@ 
$>U>W&:]abcASB 
'A'C#tTUCSD 
G^G`0CfjklESF 
-LcLe/HkopqGSH 
;R;T7Z^_`ISJ 
t:Q:S6Y]^_KSL 
46M6O2UYZ[MSN 
#=T=V%9\`abOSP 
1H1J-PTUVQSR 
#?V?X%;^bcdSSV "<">D&=&?"T	
USb 
m:Q:S%6Y]^_cSd 
]AXAZ,=`defeSf 
7N7P!3VZ[\gSh 	*iSj 
d9P9R5X\]^kSl 
?;R;T$7Z^_`mSp #=#?T'>'@#d	
oS~ #=#?T'>'@#d	
}SL %?%A!t)@)B%	
KSX 
$2I2K.QUVWYSZ 
43J3L/RVWX[S\ 
40G0I,tTU]S^ 
43J3L/RVWX_Sb 5L5N1TXY	
aSh 
?V?X(;^bcdiSl !?V?X#;^bc	
kSr 
9P9R"5X\]^sSt 
?;R;T$7Z^_`uSv 
O<S<U%8[_`awSx 
'AXAZ)=`defySz 	5{S~ &@&B"*A*C&	
}SL &@&B"*A*C&	
KSX	
YSh $>$@ d(?(A$t	
gSt 
#?V?X%;^bcduSv 
"<S<U$8[_`awSx 
$>U>W&:]abcySz 	0{S| 
<S<U'8[_`a}S@ .0 )3J3L/RV		
SP 
)C)E%4QUVWQSR 	6SST 
_=T=V&9\`abUSX  (?(A$t	
WSd 
*D[D],@cghieSf 
(BYB[*>aefggSh 
)CZC\+?bfghiSl  (?(A$t	
kSx 
&@W@Y(<_cdeyS|  (?(A$t	
{SJ  (?(A$t	
ISX  (?(A$t	
WSd 
,F]F_.BeijkeSf 
&@W@Y(<_cdegSh 
*D[D],@cghiiSj 	(kSl 
#=T=V%9\`abmSp $>$@ d(?(A$t	
oS~ 'A'C#+B+D'$	
}SL &@&B"*A*C&	
KSX 
+I`Ib-EhlmnYSZ 
'CZC\)?bfgh[S^ #;R;T!7Z^_	
]Sd 	1eSf 
)F]F_+BeijkgSh 
$2I2K.QUVWiSl *D*F&D.E.G*T	
kSz *D*F&D.E.G*T	
ySH $>$@ d(?(A$t	
GST 
)C)E%4QUVWUSX $>$@ d(?(A$t	
WSd 
T8O8Q4W[\]eSf 
6P6R2X\^bcdgSh 	>iSj 
-G-I)tUYZ[kSl 	EmSp #CZC\%?bfg	
oSv 
d6M6O2UYZ[wSx 
@W@Y)<_cdeyS| "!;!=4%<%>!D	
{SJ !;!=4%<%>!D	
ISX $>$@ d(?(A$t	
WSd 	,eSf 	,gSh 	5iSj 
9P9R"5X\]^kSn #=#?T'>'@#d	
mS| !;!=4%<%>!D	
{SH 
)CZC\+?bfghISJ 
/:Q:S#6Y]^_KSL 
&@W@Y(<_cdeMSN 
AXAZ*=`defOSP 	*QST  /0 )*D*F&D(?(AJeJg$mq		
SSd 	5eSf 	:gSh 	?iSj 	DkSl 
'CZC\)?bfghmSn 
?;R;T$7Z^_`oSr #=#?T'>'@#d	
qS~ 	(S@ 
8R8T4Z^`defASD )C)E%4-D-F)D	
CSR )C)E%4-D-F)D	
QS` $>$@ d(?(A$t	
_Sl 
43J3L/RVWXmSp )C)E%4-D-F)D	
oS~ %?%A!t)@)B%	
}SL $>$@ d(?(A$t	
KSZ $>$@ d(?(A$t	
YSU n %%9;RS #=#7#=#=#?@#?41a!Q$#?@# %S	42H @ 9= &*(,(,""l#(bkk#.>)>#?lc2;;s#3345l l d^	l
 d38n%l E$)$%l sml l l 
#s(^l^\r \r~ 
0_ As   B6AE