ó
    <±h[  ã                   óJ   • S SK Jr  S SKJr  S SKJr  S SKJrJr  S\S\	4S jr
g)	é    )ÚPath)ÚAny)ÚTikTokenConverter)ÚTIKTOKEN_VOCAB_FILEÚTOKENIZER_FILEÚencodingÚ
output_dirc                 óì  • [        U5      nUR                  SS9  US-  [        -  nU[        -  n[	        UR                  5       5      n[	        UR                  5       5      n SSKJn  SSKJ	n  [        U [        5      (       a  U" U 5      n U" U R                  U5        [        X@R                  U R                   S9R#                  5       nUR%                  U5        g	! [         a    [        S5      ef = f)
a—  
Converts given `tiktoken` encoding to `PretrainedTokenizerFast` and saves the configuration of converted tokenizer
on disk.

Args:
    encoding (`str` or `tiktoken.Encoding`):
        Tokenizer from `tiktoken` library. If `encoding` is `str`, the tokenizer will be loaded with
        `tiktoken.get_encoding(encoding)`.
    output_dir (`str`):
        Save path for converted tokenizer configuration file.
T)Úexist_okÚtiktokenr   )Úget_encoding)Údump_tiktoken_bpezY`tiktoken` is required to save a `tiktoken` file. Install it with `pip install tiktoken`.)Ú
vocab_fileÚpatternÚadditional_special_tokensN)r   Úmkdirr   r   ÚstrÚabsoluter   r   Útiktoken.loadr   Ú
isinstanceÚ_mergeable_ranksÚImportErrorÚ
ValueErrorr   Ú_pat_strÚ_special_tokensÚ	convertedÚsave)	r   r	   Ú	save_fileÚtokenizer_fileÚsave_file_absoluteÚoutput_file_absoluter   r   Ú	tokenizers	            ÚZ/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/integrations/tiktoken.pyÚconvert_tiktoken_to_fastr$      së   € ô jÓ!€JØ×Ñ˜dÐÑ#à˜ZÑ'Ô*=Ñ=€IØ¤.Ñ0€Nä˜Y×/Ñ/Ó1Ó2ÐÜ˜~×6Ñ6Ó8Ó9Ðð	vÝ)Ý3äh¤×$Ñ$Ù# HÓ-ˆHá˜(×3Ñ3Ð5GÔHô "Ø%×/@Ñ/@Ð\d×\tÑ\tñçiƒkð ð ‡NNÐ'Õ(øô ó vÜÐtÓuÐuðvús   Á#<C ÃC3N)Úpathlibr   Útypingr   Ú#transformers.convert_slow_tokenizerr   Ú$transformers.tokenization_utils_fastr   r   r   r$   © ó    r#   Ú<module>r+      s$   ðÝ Ý å Aß Tð#) sð #)¸õ #)r*   