
    <ho                        S r SSKJr  SSKrSSKJrJrJrJrJ	r	  SSK
JrJr  SSKJr  SSKJrJrJrJrJr  SS	KJr  SS
KJr  \R2                  " \5      r0 SSSS._SSSSSSSSSSSS.
_SSSSSSSSSSSS.
_SSSSSSSSSSSS.
_SSSSSSSSSSSSSS ._S!SSSSSSSSSSS.
_S"SSSSSSSSSSSSS ._S#SSSSSSSSSSS.
_S$S%S&S'S(S)._S*SSSSSSSSSSS.
_S+S,SS-SS.S/._S0S1S2S3S4S5S6SS.S7S8SS9._S:SSSSSSSS;SS<.	_S=S,S>S?S@S-S.SA._SBSSSSSSSCSD._SESSSS.SSFSGSHSSI.	_SJSSSSSSSSSKSS.
_SSSSSSSSSSSLSSM.SSSSSSSSSSSLSSM.SN.ErSOSPSQSRSSS%S&S'S(STSU.
SVSS%S&S'S(SW.SX.rSY r " SZ S[5      r " S\ S]\5      r  " S^ S_\5      r! " S` Sa\5      r" " Sb Sc\5      r# " Sd Se\5      r$ " Sf Sg\5      r%0 S\ _S\!_Sh\!_S!\!_Si\!_S*\"_S+\#_S#\#_S:\#_S=\#_SB\#_S0\$_SE\#_SJ\#_Sj\%_Sk\%_r&Sl\4Sm jr'g)nz
Integration with GGML / The file is copied and adapted from https://github.com/99991/pygguf
with extra methods beings exposed
    )arrayN)	Tokenizerdecodersnormalizerspre_tokenizers
processors)BPEUnigram   )
AddedToken)GemmaConverterGPT2ConverterLlamaConverterQwen2ConverterT5Converter)logging)tqdmgeneral
model_type_model_name_or_path)architecturenamellamamax_position_embeddingsnum_hidden_layersintermediate_sizehidden_sizehead_dim
rope_thetanum_attention_headsnum_key_value_headsrms_norm_eps
vocab_size)
context_lengthblock_countfeed_forward_lengthembedding_lengthrope.dimension_countrope.freq_baseattention.head_countattention.head_count_kv attention.layer_norm_rms_epsilonr#   mistralqwen2qwen2moenum_expertsnum_experts_per_tok)r$   r%   r&   r'   r(   r)   r*   r+   r,   r#   expert_countexpert_used_countqwen3qwen3moefalcon	tokenizerbos_token_ideos_token_idunk_token_idpad_token_id)ggml.bos_token_idggml.eos_token_idggml.unknown_token_idggml.padding_token_idphi3bloomn_layern_headlayer_norm_epsilon)r%   r'   r*   r#   attention.layer_norm_epsilont5n_positions
num_layersd_ffd_modeld_kv	num_headsrelative_attention_num_bucketsdecoder_start_token_id)r$   r%   r&   r'   attention.key_lengthr*   r+   rE   z attention.relative_buckets_countrN   r#   stablelmlayer_norm_eps)	r$   r%   r&   r'   r(   r*   r+   rE   r#   gpt2n_ctxn_embdr&   )r%   r$   r'   r&   r*   rE   
starcoder2norm_epsilon)r%   r$   r'   r&   r*   r+   rE   mambaconv_kernel
state_sizetime_step_rank)	r#   r$   r'   r,   r%   zssm.conv_kernelzssm.state_sizezssm.time_step_rankzssm.inner_sizenemotronnorm_epssliding_window)r$   r%   r&   r'   r(   r)   rO   r*   r+   r,   zattention.sliding_windowr#   )gemma2gemma3tokenizer_typetokensscores
token_typemergesadd_prefix_space)

ggml.modelzggml.tokenszggml.scoreszggml.token_typezggml.mergesr<   r=   r>   r?   zggml.add_space_prefixchat_template)rg   rf   r<   r=   r>   r?   )r7   tokenizer_configc                    [        U[        5      (       d  U/n[        U5      S:X  a  US   nS nOUS   S:w  a  [        S5      eUu  pUS;   a  [	        U S   5      n U $ US;   a  [        U S   5      n U $ US;   a  [        U S   5      n U $ US;   a3  [        S	[        U 5      5      R                  5       R                  5       n U $ US
;   a  [        X5      n U $ )N   r   	   zPReceived multiple types, therefore expected the first type to indicate an array.)r   rj   r            
      )      )   )   B)rk   )
isinstancelistlen
ValueErrorintfloatboolr   tobytesdecode_gguf_parse_value)_value	data_typearray_data_types      V/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/integrations/ggml.pyr   r     s    i&&K	
9~aL	Q<1opp%."	..VAY M 
g	vay! M 
c	fQi
 M	 
c	sDL)113::< M 
c	"6;M    c                       \ rS rSrS rSrg)GGUFTokenizerSkeletoni.  c                   ^ UR                  5        H  u  p#[        XU5        M     [        U S5      (       Gd2  [        U S5      (       a  [        U S5      (       d  [        S5      eU R                  nU R
                  n[        U5       VVs0 sH
  u  pgXuU   _M     snnm[        R                  S5        / n[        TR                  5       5       Hl  u  p/ n[        S[        U	5      5       H,  nU	S U XS  pX;   d  M  X;   d  M  UR                  XU
45        M.     [        UU4S jSS	9nUR                  U5        Mn     [        US
 SS	9nU Vs/ sH  oS   US   4PM     nnXl        O}U R                   V	s/ sH  n	[!        U	R#                  S5      5      PM     sn	U l        [        U S5      (       d3  [        [        U R                  5      5       Vs/ sH  nS PM     snU l        [        U S5      (       d  / U l        [        U S5      (       d  S U l        [        U S5      (       a   U R&                  c  U R(                  U l        g g g s  snnf s  snf s  sn	f s  snf )Nrd   ra   rb   z\tokens and scores need to be passed for a LLaMa tokenizer without merges to be instantiated.z:Merges were not in checkpoint, building merges on the fly.rj   c                 $   > TU S      TU S      4$ )Nr   rj    )xvocabs    r   <lambda>0GGUFTokenizerSkeleton.__init__.<locals>.<lambda>D  s    U1Q4[%!+4Nr   T)keyreversec                     U S   $ )Nr   r   )vals    r   r   r   F  s    CFr   r    added_tokensr:   unknown_token_id)itemssetattrhasattrry   ra   rb   	enumerateloggerwarningr   rangerx   appendsortedextendrd   tuplesplitr   r:   r   )selfdict_kvra   rb   itrd   mergepiece_scorelocalindexpiece_lpiece_rr   _r   s                    @r   __init__GGUFTokenizerSkeleton.__init__/  s   KKMDADQ " tX&&4**'$2I2I r  [[F[[F.7.?@.?daQq	\.?@ENNWXF&*5;;=&9""1c%j1E',Ve}eFmW(W->g%DE 2 u*NX\]e$ ': F(:DIF289&31vs1v&&F9 K@DLu5S!12LDK4**-23t{{3C-DE-Dt-DEt^,, "Dt^,, $D 4+,,1B1B1J $ 5 5D 2K,7 A : MEs   I
I9#II)r   rd   rb   r:   N)__name__
__module____qualname____firstlineno__r   __static_attributes__r   r   r   r   r   .  s    '6r   r   c                   8    \ rS rSrS rS rS rS rS rS r	Sr
g	)
GGUFLlamaConverteriY  c                     [        U5      U l        U R                  U l        0 U l        [	        U R                  SS5      S:g  U l        g )Nr`   r   )r   protooriginal_tokenizeradditional_kwargsgetattris_llama_3_tokenizerr   tokenizer_dicts     r   r   GGUFLlamaConverter.__init__Z  s>    *>:
"&**!#$+DJJ8H'$RV]$]!r   c                 T    [        [        UR                  UR                  5      5      $ Nrw   zipra   rb   r   r   s     r   r   GGUFLlamaConverter.vocab`      Cell344r   c                     UR                   $ r   rd   r   s     r   rd   GGUFLlamaConverter.mergesc      ||r   c                    U R                  U R                  5      nU R                  U R                  5      n[        U5       VVVs0 sH
  u  nu  pVXT_M     nnnnUR                  b  UR
                  UR                     OS n[        USS 5      b  UR
                  UR                     OS n	[        USS 5      b  UR
                  UR                     OS n
[        [        UUUSSS95      n/ n[        U R                  S5      (       dX  Ub  UR                  [        USSS95        U	b  UR                  [        U	SSS95        U
b  UR                  [        U
SSS95        O~[        R                  " [        R                  " U R                  R                   5      S:H  5      S	   nU H4  nUR                  [        U R                  R
                  U   SSS95        M6     [#        U5      S	:w  a  UR%                  U5        [#        U R                  R&                  5      S	:w  a>  UR)                  U R                  R&                   Vs/ sH  n[        USSS9PM     sn5        XR*                  S
'   XR*                  S'   XR*                  S'   U R,                  (       a>  S U R*                  S'   SU R*                  S'   SU R*                  S'   SU R.                  l        U$ s  snnnf s  snf )Nr8   r9   T)	unk_tokenfuse_unkbyte_fallbackrc   F
normalizedspecialrl   r   r   	eos_token	bos_tokenre   clean_up_tokenization_spaceslegacy)r   r   rd   r   r:   ra   r   r8   r   r	   r   r   r   npwherer   rc   rx   add_special_tokensr   
add_tokensr   r   r   r   )r   r   vocab_scoresrd   r   word_score	bpe_vocabr   r   r   r7   special_tokensspecial_tokens_idxidxadded_tokens                   r   r7   GGUFLlamaConverter.tokenizerf  s   zz$**-TZZ(6?6MN6M!2NTTW6M	N8=8J8J8VELL!3!34\`	8?~W[8\8hELL!3!34nr	8?~W[8\8hELL!3!34nr	#"
	 tzz<00$%%juVZ&[\$%%juVZ&[\$%%juVZ&[\ "$"((4::3H3H*IQ*N!OPQ!R)%%j1B1B31GTYcg&hi * ~!#((8tzz&&'1,  ]a]g]g]t]tu]tkKE5I]tu /8{+.7{+.7{+$$9=D""#56EID""#AB/4D""8,-2D##*k ON vs   KK c                 D   [         R                  " 5       [         R                  " 5       [         R                  " SS5      /nU R                  (       a  U[         R
                  " SSSS9/-  nU(       a  U[         R                  " SSS9/-  n[         R                  " U5      $ )N   ▁r   FTre   trim_offsets	use_regexrj   contentleft)r   ByteFallbackFuseReplacer   	ByteLevelStripSequencer   replacementre   sequences       r   decoderGGUFLlamaConverter.decoder  s    !!#MMOUC(
 $$++UQVbfghhH!<==H  **r   c                    U R                  U R                  5      nU R                  U R                  5      nUb  X!l        SnSn[        U R                  S5      (       a  U R                  R
                  nU R                  X45      nUb  XQl        U R                  X45      Ul        U R                  5       nU(       a  Xal        U R                  (       a6  [        R                  " SSSS9Ul        [        R                  " / 5      Ul        U$ )Nr   Tre   Fr   )r7   r   
normalizerr   r   re   pre_tokenizerr   post_processorr   r   r   r   r   )r   r7   r   r   re   r   r   s          r   	convertedGGUFLlamaConverter.converted  s    NN4::.	 __TZZ0
!#- 4**,>??#66GG**;I$&3# LLG	,,.'5$ $$&4&>&>!&Ud'I#
 $/#7#7#;I r   )r   r   r   r   N)r   r   r   r   r   r   rd   r7   r   r   r   r   r   r   r   r   Y  s"    ^58t+!r   r   c                   6   ^  \ rS rSrS rS\4U 4S jjrSrU =r$ )GGUFQwen2Converteri  c                 2    [        U5      U l        0 U l        g r   r   r   r   r   s     r   r   GGUFQwen2Converter.__init__      "7"G!#r   returnc           
         > [        U R                  R                  5       VVs0 sH  u  pX!_M	     nnnU R                  R                  n[        TU ]  X45      nUR                  [        SSSS9[        SSSS9[        SSSS9/5        U$ s  snnf )N<|endoftext|>FTr   z<|im_start|>z
<|im_end|>)r   r   ra   rd   superr   r   r   r   r   r   r   rd   r7   	__class__s         r   r   GGUFQwen2Converter.converted  s    (1$2I2I2P2P(QR(QWQ(QR((//G%e4	$$?udK>eTJ<E4H	
  Ss   B
r   r   	r   r   r   r   r   r   r   r   __classcell__r  s   @r   r   r     s    $9  r   r   c                   @    \ rS rSrS rS rS rS rS rS\	4S jr
S	rg
)GGUFPhi3Converteri  c                 T    [        U5      U l        U R                  U l        0 U l        g r   r   r   r   r   r   s     r   r   GGUFPhi3Converter.__init__  s"    *>:
"&**!#r   c                 T    [        [        UR                  UR                  5      5      $ r   r   r   s     r   r   GGUFPhi3Converter.vocab  r   r   c                     UR                   $ r   r   r   s     r   rd   GGUFPhi3Converter.merges  r   r   c                    U R                  U R                  5      nU R                  U R                  5      n[        U5       VVVs0 sH
  u  nu  pVXT_M     nnnn[	        [        Xs5      5      nUR                  [        SSSSSS9[        SSSS9[        SSSSS9[        S	SSSS9[        S
SSSS9[        SSSSS9[        SSSSS9[        SSSSS9[        SSSSS9[        SSSSS9[        SSSSS9[        SSSSS9/5        UR                  b  UR                  UR                     OS U R                  S'   UR                  b  UR                  UR                     OS U R                  S'   UR                  b  UR                  UR                     OS U R                  S'   UR                  b  UR                  UR                     OS U R                  S'   U$ s  snnnf )N</s>TF)rstriplstripr   r   r  r   z<|assistant|>)r  r   r   z<|placeholder1|>z<|placeholder2|>z<|placeholder3|>z<|placeholder4|>z
<|system|>z<|end|>z<|placeholder5|>z<|placeholder6|>z<|user|>r   r   r   	pad_token)r   r   rd   r   r   r	   r   r   r:   ra   r   r9   r8   r;   )	r   r   r   rd   r   r   r   r   r7   s	            r   r7   GGUFPhi3Converter.tokenizer  s   zz$**-TZZ(6?6MN6M!2NTTW6M	Nc)45	$$6$uX\]?udK?4ESWX-duVZ[-duVZ[-duVZ[-duVZ[<PTU9TeTR-duVZ[-duVZ[:dudS	
$ 160B0B0NELL++,TX 	{+ 160B0B0NELL++,TX 	{+ 160B0B0NELL++,TX 	{+ 160B0B0NELL++,TX 	{+ E Os   Gc                     [         R                  " 5       [         R                  " 5       [         R                  " US5      /nU(       a  U[         R                  " SSS9/-  n[         R
                  " U5      $ )Nr   rj   r   )r   r   r   r   r   r   r   s       r   r   GGUFPhi3Converter.decoder  s\    !!#MMO[#.
 !<==H  **r   r  c                     U R                  U R                  5      nSnSn[        U R                  S5      (       a  U R                  R                  nU R                  X#5      Ul        U$ )Nr   Tre   )r7   r   r   r   re   r   )r   r7   r   re   s       r   r   GGUFPhi3Converter.converted$  s\    NN4::.	4**,>??#66GG LLG	r   r   r   r   N)r   r   r   r   r   r   rd   r7   r   r   r   r   r   r   r   r  r    s(    $
5%N	+
9 
r   r  c                   6   ^  \ rS rSrS rS\4U 4S jjrSrU =r$ )GGUFGPTConverteri1  c                 2    [        U5      U l        0 U l        g r   r   r   s     r   r   GGUFGPTConverter.__init__2  r  r   r  c                    > [        U R                  R                  5       VVs0 sH  u  pX!_M	     nnnU R                  R                  n[        TU ]  X45      nU$ s  snnf r   )r   r   ra   rd   r  r   r  s         r   r   GGUFGPTConverter.converted6  sZ    (1$2I2I2P2P(QR(QWQ(QR((//G%e4	 Ss   Ar	  r
  r  s   @r   r"  r"  1  s    $9  r   r"  c                   :    \ rS rSrS rS rS rS rS\4S jr	Sr
g	)
GGUFT5Converteri=  c                     S/US'   [        U5      U l        [        U R                  R                  5       VVs0 sH  u  p#X2_M	     snnU l        U R                  U l        0 U l        g s  snnf Nz
dummy textrd   )r   r   r   ra   token2idr   r   )r   r   r   r   s       r   r   GGUFT5Converter.__init__>  s_    $0>x *>:
*3DJJ4E4E*FG*F$!*FG"&**!# Hs   A(c                 T    [        [        UR                  UR                  5      5      $ r   r   r   s     r   r   GGUFT5Converter.vocabG  r   r   c                    [        U R                  SS5      (       ae  / n[        U R                  SS5      (       a  U[        R                  " SS9/-  nU[        R                  " SSS9/-  n[        R
                  " U5      $ g )Nr   Tre   r   )prependr   )patternr   )r   r   r   Prependr   r   )r   r   r   s      r   r   GGUFT5Converter.normalizerJ  sx    4**Hd;;Ht..0BDII[00?@@,,S%HIIH''11r   c                 V    [         R                  " SS// SQSU R                  S   4/S9$ )N$Ar  )r5  r  z$Br  )singlepairr   )r   TemplateProcessingr+  )r   s    r   r   GGUFT5Converter.post_processorS  s5    ,,&>-v./
 	
r   r  c                    U R                  U R                  5      n[        [        UU R                  R                  SS95      nU R                  U R                  5      nUb  X2l        SnSn[        U R                  S5      (       a  U R                  R                  nU R                  XE5      nUb  Xbl	        U R                  XE5      Ul
        U R                  5       nU(       a  Xrl        U$ )NFunk_idr   r   Tre   )r   r   r   r
   r:   r   r   r   re   r   r   r   )r   r   r7   r   r   re   r   r   s           r   r   GGUFT5Converter.converted\  s    zz$**-zz..#
	 __TZZ0
!#- 4**,>??#66GG**;I$&3# LLG	,,.'5$r   )r   r   r   r+  N)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r(  r(  =  s"    $5
9 r   r(  c                   :    \ rS rSrS rS rS rS rS\4S jr	Sr
g	)
GGUFGemmaConverteri|  c                 `    S/US'   [        U5      U l        U R                  U l        0 U l        g r*  r  r   s     r   r   GGUFGemmaConverter.__init__}  s.    $0>x *>:
"&**!#r   c                 V   [        [        UR                  UR                  5      5      n/ nU Hw  u  pEUS:X  a  UR	                  SU45        M   SU;   a?  [        UR                  5       5      S:X  a"  S[        U5      -  nUR	                  Xe45        Me  UR	                  XE45        My     U$ )Nz<0x09>	r   r   r   )rw   r   ra   rb   r   rx   strip)r   r   original_vocabupdated_vocabtokenscoreunderscoress          r   r   GGUFGemmaConverter.vocab  s    c%,,=>*LE $$dE]3#ekkm"4"9#c%j0$$k%9:$$e^4 + r   c                 0    [         R                  " SS5      $ )Nr   r   )r   r   r   s     r   r   GGUFGemmaConverter.normalizer  s    ""3..r   c                     [         R                  " SS5      [         R                  " 5       [         R                  " 5       /nU(       a  U[         R                  " SSS9/-  n[         R
                  " U5      $ )Nr   r   rj   r   )r   r   r   r   r   r   r   s       r   r   GGUFGemmaConverter.decoder  s\    UC(!!#MMO
 !<==H  **r   r  c                    U R                  U R                  5      n[        [        UU R                  R                  U R
                  S95      nU R                  U R                  5      nUb  X2l        SnSn[        U R                  S5      (       a  U R                  R                  nU R                  XE5      Ul
        U R                  XE5      nUb  Xbl        U$ )Nr;  r   Tre   )r   r   r   r
   r:   handle_byte_fallbackr   r   r   re   r   r   )r   r   r7   r   r   re   r   s          r   r   GGUFGemmaConverter.converted  s    zz$**-zz.."77
	 __TZZ0
!#- 4**,>??#66GG LLG	**;I$&3#r   r   N)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r?  r?  |  s"    $/	+9 r   r?  	qwen2_moe	qwen3_moer^   gemma3_textr  c                 ^    U n[         U   " U5      nUR                  5       nXCR                  4$ )a  
Utilities to convert a slow tokenizer instance in a fast tokenizer instance.

Args:
    architecture (`str`): The model architecture derived from gguf file.
    transformer_tokenizer ([`~tokenization_utils_base.PreTrainedTokenizer`]):
        Instance of a slow tokenizer to convert in the backend tokenizer for
        [`~tokenization_utils_base.PreTrainedTokenizerFast`].

Return:
    A instance of [`~tokenizers.Tokenizer`] to be used as the backend tokenizer of a
    [`~tokenization_utils_base.PreTrainedTokenizerFast`]
)GGUF_TO_FAST_CONVERTERSr   r   )r   r   tokenizer_class_name	converterfast_tokenizers        r   convert_gguf_tokenizerrZ    s7     ('(<=nMI((*N6666r   )(__doc__r   numpyr   
tokenizersr   r   r   r   r   tokenizers.modelsr	   r
    r   convert_slow_tokenizerr   r   r   r   r   utilsr   utils.loggingr   
get_loggerr   r   GGUF_CONFIG_MAPPINGGGUF_TOKENIZER_MAPPINGr   r   r   r   r  r"  r(  r?  rV  rZ  r   r   r   <module>rf     s   
   S S *  o o    
		H	%X$%X
 3*2) *& 5#8,:"X$ 3*2) *& 5#8,:"%X> 3*2) $& 5#8,:"?XV 3*2) $& 5#8,:"%2WXr 3*2) $& 5#8,:"sXJ 3*2) $& 5#8,:"%2KXf 3*2) $& 5#8,:"gX~ ++!/!/	XJ 3*2) $& 5#8,:"KXb  ) ("(<cXp 	'#%% & +#8(<,L":"qXJ 3*2) $ 5#8(8"
KX`  !$4 ((<aXp *3)2 5#8(6qXB "3),@*(&.-
CXX 3*2) $& 5#8,6"YXr 4*2) $& !+ 5#8,:$4"" 4*2) $& !+ 5#8,:$4"QX x ''++!/!/!3 )"++!/!/ 00(6 (6Vv vr (H HV	} 	<k <~> >B # 	
 #       " 	/       %! (7I 7r   