
    <hv                        S SK r S SKJr  S SKJr  SSKJr  SSKJrJ	r	  \	R                  " \5      r\" 5       (       a  S SKrS r   S"S\\   S	\S
   S\\   S\S\4   4S jjr   S"S\\   S	\S
   S\\   S\S\4   4S jjr   S"S\\   S	\S
   S\\   S\S\4   4S jjr S#S\S	S
S\\   S\S\4   4S jjr S#S\S	S
S\\   S\S\4   4S jjr S#S\S	S
S\\   S\S\4   4S jjr\\\\\\S.r  S$S\S\S\S\\   S\\   4
S jjrS#S\S\\   4S jjrS#S\S\\   4S jjrS#S\S\\   4S jjrS#S\S\\   4S jjrS#S\S\\   4S jjr S#S\S\\   4S  jjr!\\\\\ \!S.r"S#S\S\\   4S! jjr#g)%    Nwraps)Optional   )PretrainedConfig)is_torch_availableloggingc                 D   ^ ^^ S mS m[        T 5      UUU 4S j5       nU$ )aD  
Decorator function to update the RoPE parameters in the forward pass, if the model is using a dynamic RoPE
(i.e. a RoPE implementation that may recompute its frequencies in the forward pass).

Args:
    rope_forward (Callable):
        The forward pass of the RoPE implementation.

Returns:
    The decorated forward pass.
c                    [         R                  " U5      S-   n[        U R                  S5      (       a  U R                  R                  nOU R                  R
                  nX4:  aR  [        U S5      (       d%  U R                  U R                  X$S-   S9u  U l        nU R                  SU R                  SS9  gU R                  R                  U5      U l	        U R                  SU R                  SS9  g)	zbLongrope uses long factor if sequence is larger than original pretraining length, short otherwise.r    original_max_position_embeddingslong_inv_freqseq_leninv_freqF
persistentN)torchmaxhasattrconfigr   max_position_embeddingsrope_init_fnr   register_bufferoriginal_inv_freqto)selfposition_idsdevicer   r   _s         X/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/modeling_rope_utils.pylongrope_frequency_update6dynamic_rope_update.<locals>.longrope_frequency_update+   s    ))L)A-4;; BCC/3{{/[/[,/3{{/R/R,5411(,(9(9KKTU1U ): )%"A   T-?-?E R &*%;%;%>%>v%FD"  T-C-CPU V    c                    [         R                  " U5      S-   nX0R                  :  a8  U R                  U R                  X#S9u  o@l        U R                  SUSS9  X0l        X0R                  :  ah  U R                  U R                  :  aM  U R                  R                  U5      U l        U R                  SU R                  SS9  U R                  U l        ggg)z
dynamic RoPE layers should recompute `inv_freq` in the following situations:
1 - growing beyond the cached sequence length (allow scaling)
2 - the current sequence length is in the original scale (avoid losing precision with small sequences)
r   r   r   Fr   N)
r   r   max_seq_len_cachedr   r   attention_scalingr   original_max_seq_lenr   r   )r   r   r   r   r   s        r    dynamic_frequency_update5dynamic_rope_update.<locals>.dynamic_frequency_update>   s     ))L)A-,,,/3/@/@f/@/f,H,  X% H&-#...43J3JTMfMf3f &*%;%;%>%>v%FD"  T-C-CPU V&*&?&?D# 4g.r#   c                    > SU R                   ;   a  T" XUR                  S9  O!U R                   S:X  a  T" XUR                  S9  T" XU5      $ )Ndynamic)r   longrope)	rope_typer   )r   xr   r(   r!   rope_forwards      r    wrapper$dynamic_rope_update.<locals>.wrapperQ   sD    &$TI^^z)%dJD\22r#   r   )r/   r0   r(   r!   s   ` @@r    dynamic_rope_updater2      s/    W&@& <3 3 Nr#   r   r   ztorch.devicer   returnztorch.Tensorc           	      j   U R                   n[        U S5      (       a  U R                  OSn[        U SS5      =(       d    U R                  U R
                  -  n[        XT-  5      nSnSU[        R                  " SUS[        R                  S9R                  U[        R                  S9U-  -  -  nX4$ )	aH  
Computes the inverse frequencies according to the original RoPE implementation
Args:
    config ([`~transformers.PretrainedConfig`]):
        The model configuration.
    device (`torch.device`):
        The device to use for initialization of the inverse frequencies.
    seq_len (`int`, *optional*):
        The current sequence length. Unused for this type of RoPE.
Returns:
    Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
    post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).
partial_rotary_factor      ?head_dimNr      dtyper   r:   )
rope_thetar   r5   getattrhidden_sizenum_attention_headsintr   arangeint64r   float)	r   r   r   baser5   r7   dimattention_factorr   s	            r     _compute_default_rope_parametersrG   \   s    $ D<CFLc<d<dF88jmvz40dF4F4F&JdJd4dH
h.
/C du||AsAU[[ILLTZbgbmbmLnqttuvH%%r#   c                 J    U R                   S   n[        XU5      u  pEXC-  nXE4$ )aX  
Computes the inverse frequencies with linear scaling. Credits to the Reddit user /u/kaiokendev
Args:
    config ([`~transformers.PretrainedConfig`]):
        The model configuration.
    device (`torch.device`):
        The device to use for initialization of the inverse frequencies.
    seq_len (`int`, *optional*):
        The current sequence length. Unused for this type of RoPE.
Returns:
    Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
    post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).
factor)rope_scalingrG   )r   r   r   rI   r   rF   s         r    '_compute_linear_scaling_rope_parametersrK   z   s:    $   *F "B&RY!ZH
 H%%r#   c           	         U R                   n[        U S5      (       a  U R                  OSn[        U SU R                  U R
                  -  5      n[        XT-  5      nU R                  nU R                  S   nSn	Uc  UnOi[        U[        R                  5      (       a?  [        R                  " U[        R                  " XrR                  UR                  S95      nO[!        X'5      nX8U-  U-  US-
  -
  XfS-
  -  -  -  nSU[        R"                  " SUS[        R$                  S	9R'                  U[        R(                  S
9U-  -  -  n
X4$ )aw  
Computes the inverse frequencies with NTK scaling. Credits to the Reddit users /u/bloc97 and /u/emozilla
Args:
    config ([`~transformers.PretrainedConfig`]):
        The model configuration.
    device (`torch.device`):
        The device to use for initialization of the inverse frequencies.
    seq_len (`int`, *optional*):
        The current sequence length, used to update the dynamic RoPE at inference time.
Returns:
    Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
    post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).
r5   r6   r7   rI   r:   r   r   r8   r   r9   r;   )r<   r   r5   r=   r>   r?   r@   r   rJ   
isinstancer   Tensormaximumtensorr:   r   r   rA   rB   r   rC   )r   r   r   rD   r5   r7   rE   r   rI   rF   r   s              r    _compute_dynamic_ntk_parametersrR      s@   & D<CFLc<d<dF88jmvz6+=+=A[A[+[\H
h.
/C$<<  *F )	GU\\	*	*--LL0gnn]

 g7 W$'>>6A:NTWab[bTcddDdu||AsAU[[ILLTZbgbmbmLnqttuvH%%r#   c                 j  ^ U R                   n[        U S5      (       a  U R                  OSn[        U SU R                  U R
                  -  5      n[        XT-  5      nU R                  S   nU R                  R                  S5      nU R                  R                  S5      n	U R                  R                  S5      n
SU R                  ;   a  U R                  S   nU R                  U-  nOU R                  nSS
 jnUc1  U	(       a"  U
(       a  [        U" Xy5      U" Xz5      -  5      nOU" U5      nU R                  R                  S5      =(       d    SnU R                  R                  S5      =(       d    S	nS mU4S jnS nU[        R                  " SUS5      R                  U[        R                  S9U-  -  nSU-  nSUU-  -  nU R                  R                  SS5      nU" XXcUU5      u  nnS	U" UUUS-  5      R                  U[        R                  S9-
  nUS	U-
  -  UU-  -   nUU4$ )a]  
Computes the inverse frequencies with NTK scaling. Please refer to the
[original paper](https://huggingface.co/papers/2309.00071)
Args:
    config ([`~transformers.PretrainedConfig`]):
        The model configuration.
    device (`torch.device`):
        The device to use for initialization of the inverse frequencies.
    seq_len (`int`, *optional*):
        The current sequence length. Unused for this type of RoPE.
Returns:
    Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
    post-processing scaling factor applied to the computed cos/sin.
r5   r6   r7   rI   rF   mscalemscale_all_dimr   r   c                 N    U S::  a  gSU-  [         R                  " U 5      -  S-   $ )Nr   r6   g?)mathlog)scalerT   s     r    
get_mscale,_compute_yarn_parameters.<locals>.get_mscale   s(    A:V|dhhuo-33r#   	beta_fast    	beta_slowc                     U[         R                  " X0S-  [         R                  -  -  5      -  S[         R                  " U5      -  -  $ )zPInverse dimension formula to find the dimension based on the number of rotationsr8   )rW   rX   pi)num_rotationsrE   rD   r   s       r    find_correction_dim5_compute_yarn_parameters.<locals>.find_correction_dim   s@    dhh6!:Kdgg:UVWW\]`d`h`him`n\noor#   c                    > T" XX45      nT" XX45      nU(       a,  [         R                  " U5      n[         R                  " U5      n[        US5      [	        XrS-
  5      4$ )z.Find dimension range bounds based on rotationsr   r   )rW   floorceilr   min)	low_rothigh_rotrE   rD   r   truncatelowhighrb   s	           r    find_correction_range7_compute_yarn_parameters.<locals>.find_correction_range   sR    !'N"8$P

3#99T?D3{CAg...r#   c                     X:X  a  US-  n[         R                  " U[         R                  S9U -
  X-
  -  n[         R                  " USS5      nU$ )NgMbP?r9   r   r   )r   rA   float32clamp)rg   r   rE   linear_func	ramp_funcs        r    linear_ramp_factor4_compute_yarn_parameters.<locals>.linear_ramp_factor  sH    :5LC||Cu}}=C	RKKQ2	r#   r   r8   r;   rj   T)r   )r<   r   r5   r=   r>   r?   r@   rJ   getr   rC   r   rA   r   )r   r   r   rD   r5   r7   rE   rI   rF   rT   rU   r   rZ   r\   r^   rm   rt   	pos_freqsinv_freq_extrapolationinv_freq_interpolationrj   rk   rl   inv_freq_extrapolation_factorr   rb   s                            @r    _compute_yarn_parametersr{      sI   $ D<CFLc<d<dF88jmvz6+=+=A[A[+[\H
h.
/C  *F**../AB  $$X.F((,,-=>N
 *V-@-@@+1+>+>?a+b(//2RR+1+I+I(4 n$Z%?*VBd%de)&1 ##''4:I##''49Ip/ aa03363UX[[\I 9_ FY$67""&&z48H%iCGgiqrIC %&(:3cQh(O(R(RZ`hmhshs(R(t$t!!&C"CD
 #@
@	A  %%%r#   c                    U R                   n[        U S5      (       a  U R                  OSn[        U SU R                  U R
                  -  5      n[        XT-  5      nU R                  S   nU R                  S   nU R                  R                  S5      n	U R                  R                  S5      n
[        U S5      (       a&  U R                  nU R                  U R                  -  n	OU R                  nU
cM  U	S::  a  Sn
OD[        R                  " S	[        R                  " U	5      [        R                  " U5      -  -   5      n
U(       a*  X+:  a%  [        R                  " U[        R                   US
9nO$[        R                  " U[        R                   US
9n[        R"                  " SUS[        R$                  US
9R'                  5       U-  nSXU-  -  -  nX4$ )aJ  
Computes the inverse frequencies with LongRoPE scaling. Please refer to the
[original implementation](https://github.com/microsoft/LongRoPE)
Args:
    config ([`~transformers.PretrainedConfig`]):
        The model configuration.
    device (`torch.device`):
        The device to use for initialization of the inverse frequencies.
    seq_len (`int`, *optional*):
        The current sequence length.
Returns:
    Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
    post-processing scaling factor applied to the computed cos/sin.
r5   r6   r7   long_factorshort_factorrI   rF   r   r   rM   r   r8   )r<   r   r5   r=   r>   r?   r@   rJ   rv   r   r   rW   sqrtrX   r   rQ   rp   rA   rB   rC   )r   r   r   rD   r5   r7   rE   r}   r~   rI   rF   r   ext_factorsinv_freq_shaper   s                  r    _compute_longrope_parametersr   "  s   $ D<CFLc<d<dF88jmvz6+=+=A[A[+[\H
h.
/C%%m4K&&~6L  $$X.F**../AB
 v9::+1+R+R(//&2Y2YY+1+I+I( S="#yyTXXf-=Ii@j-j)jk 7=ll;emmFSll<u}}VT\\!S!5;;vNTTVY\\Nk.$889H%%r#   c                    [        XU5      u  p4U R                  S   nU R                  S   nU R                  S   nU R                  S   nX-  n	X-  n
S[        R                  -  U-  n[        R
                  " X:  X5-  U5      nX-  U-
  Xv-
  -  nSU-
  U-  U-  X-  -   nX:  ) X:  ) -  n[        R
                  " XU5      nX4$ )a  
Computes the inverse frequencies for llama 3.1.

Args:
    config ([`~transformers.PretrainedConfig`]):
        The model configuration.
    device (`torch.device`):
        The device to use for initialization of the inverse frequencies.
    seq_len (`int`, *optional*):
        The current sequence length. Unused for this type of RoPE.
Returns:
    Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
    post-processing scaling factor applied to the computed cos/sin.
rI   low_freq_factorhigh_freq_factorr   r8   r   )rG   rJ   rW   r`   r   where)r   r   r   r   rF   rI   r   r   old_context_lenlow_freq_wavelenhigh_freq_wavelenwaveleninv_freq_llamasmooth_factorsmoothed_inv_freqis_medium_freqs                   r    _compute_llama3_parametersr   X  s    $ "B&RY!ZH  *F))*;<O**+=>))*LMO&8':$''kH$G [[!;X=NPXYN$.@EUEghM]*n<vEHff238R6SSN[[NSN++r#   )defaultlinearr+   yarnr,   llama3r-   received_keysrequired_keysoptional_keysignore_keysc                     SU;   a  US1-  nUR                  S5        Ub  X-  nX!-
  nU(       a  [        SU  SU 35      eUb  X-
  U-
  nOX-
  nU(       a  [        R                  SU  SU 35        gg)zYCompare the received keys in `config.rope_scaling` against the expected and optional keystyper-   Nz9Missing required keys in `rope_scaling` for 'rope_type'='z': z5Unrecognized keys in `rope_scaling` for 'rope_type'=')addKeyErrorloggerwarning)r-   r   r   r   r   missing_keysunused_keyss          r    _check_received_keysr     s     &!+& $ 0LRS\R]]`am`nopp #3mC#3NykY\]h\ijk r#   c                     U R                   nUR                  SUR                  SS 5      5      nS1n[        UR                  5       5      n[	        X5XAS9  g )Nr-   r   r   )rJ   rv   setkeysr   )r   r   rJ   r-   r   r   s         r    !_validate_default_rope_parametersr     sP    &&L  l.>.>vt.LMI MM))+,M=Zr#   c                 &   U R                   nUR                  SUR                  SS 5      5      nSS1n[        UR                  5       5      n[	        X5XAS9  US   nUb  [        U[        5      (       a  US:  a  [        R                  SU 35        g g )Nr-   r   rI   r   r6   8`rope_scaling`'s factor field must be a float >= 1, got 	rJ   rv   r   r   r   rN   rC   r   r   )r   r   rJ   r-   r   r   rI   s          r    (_validate_linear_scaling_rope_parametersr     s    &&L  l.>.>vt.LMI (+M))+,M=Z(#F~Z66&3,QRXQYZ[ ;Gr#   c                 .   U R                   nUR                  SUR                  SS 5      5      nSS1nS1n[        UR                  5       5      n[	        X6XEUS9  US   nUb  [        U[        5      (       a  US:  a  [        R                  SU 35        g g )Nr-   r   rI   r   r   r6   r   r   )r   r   rJ   r-   r   r   r   rI   s           r    )_validate_dynamic_scaling_rope_parametersr     s    &&L  l.>.>vt.LMI (+M78M))+,M=]hi(#F~Z66&3,QRXQYZ[ ;Gr#   c                 *   U R                   nUR                  SUR                  SS 5      5      nSS1n1 Skn[        UR                  5       5      n[	        X6XEUS9  US   nUb  [        U[        5      (       a  US:  a  [        R                  SU 35        UR                  S5      nUb3  [        U[        5      (       a  US	:  a  [        R                  S
U 35        UR                  S5      n	U	b-  [        U	[        5      (       d  [        R                  SU	 35        UR                  S5      n
U
b-  [        U
[        5      (       d  [        R                  SU
 35        U	=(       d    SU
=(       d    S:  a  [        R                  SU	 SU
 S35        g g )Nr-   r   rI   >   rT   rj   r\   r^   rU   rF   r   r   r6   r   rF   r   L`rope_scaling`'s attention_factor field must be a float greater than 0, got r\   z6`rope_scaling`'s beta_fast field must be a float, got r^   z6`rope_scaling`'s beta_slow field must be a float, got r]   r   zO`rope_scaling`'s beta_fast field must be greater than beta_slow, got beta_fast=z( (defaults to 32 if None) and beta_slow=z (defaults to 1 if None)r   )r   r   rJ   r-   r   r   r   rI   rF   r\   r^   s              r    _validate_yarn_parametersr     s   &&L  l.>.>vt.LMI (+MM ))+,M=]hi(#F~Z66&3,QRXQYZ[#''(:;#Z8H%-P-PTdghThZ[kZlm	
   -IZ	5%A%AOPY{[\  -IZ	5%A%AOPY{[\RIN+]^g]h i66?[@XZ	
 ,r#   c                 *   U R                   nUR                  SUR                  SS 5      5      n1 Skn1 Skn[        UR                  5       5      n[	        X6XEUS9  [        U S5      (       a  U R                  OSn[        U SU R                  U R                  -  5      n[        X-  5      n	UR                  S	5      n
[        U
[        5      (       d/  [        S
 U
 5       5      (       a  [        R                  SU
 35        [!        U
5      U	S-  :w  a'  [        R                  SU	S-   S[!        U
5       35        UR                  S5      n[        U[        5      (       d/  [        S U 5       5      (       a  [        R                  SU 35        [!        U5      U	S-  :w  a'  [        R                  SU	S-   S[!        U5       35        [        U S5      (       a  [        R#                  S5        g UR                  S5      nUc  [        R                  S5        O3[        U[$        5      (       a  US:  a  [        R                  SU 35        UR                  S5      nUb5  [        U[$        5      (       a  US:  a  [        R                  SU 35        g g g )Nr-   r   >   r-   r}   r~   >   rI   rF   r   r   r5   r6   r7   r~   c              3   L   #    U H  n[        U[        [        45      v   M     g 7fNrN   r@   rC   .0r.   s     r    	<genexpr>0_validate_longrope_parameters.<locals>.<genexpr>  s!     1dWcRS*Qe2M2MWc   "$zC`rope_scaling`'s short_factor field must be a list of numbers, got r8   z5`rope_scaling`'s short_factor field must have length z, got r}   c              3   L   #    U H  n[        U[        [        45      v   M     g 7fr   r   r   s     r    r   r     s!     0bVaQRAU|1L1LVar   zB`rope_scaling`'s long_factor field must be a list of numbers, got z4`rope_scaling`'s long_factor field must have length r   aY  This model has set a `original_max_position_embeddings` field, to be used together with `max_position_embeddings` to determine a scaling factor. Please set the `factor` field of `rope_scaling`with this ratio instead -- we recommend the use of this field over `original_max_position_embeddings`, as it is compatible with most model architectures.rI   z1Missing required keys in `rope_scaling`: 'factor'r   rF   g        r   )rJ   rv   r   r   r   r   r5   r=   r>   r?   r@   rN   listallr   r   lenwarning_oncerC   )r   r   rJ   r-   r   r   r   r5   r7   rE   r~   r}   rI   rF   s                 r    _validate_longrope_parametersr     sZ   &&L  l.>.>vt.LMI@MVM))+,M=]hi<CFLc<d<dF88jmvz6+=+=A[A[+[\H
h.
/C##N3LlD))c1dWc1d.d.d\]i\jkl
<C1H$NsVWxjX^_bco_p^qrs""=1Kk4((S0bVa0b-b-b[\g[hij
;3!8#McUVhZW]^abm^n]opq
 v9::A	
 !!(+>NNNOFE**fslNNUV\U]^_'++,>?'.66:JS:Pbcsbtu ;Q (r#   c                    U R                   nUR                  SUR                  SS 5      5      n1 Skn[        UR                  5       5      n[	        X5XAS9  US   nUb  [        U[        5      (       a  US:  a  [        R                  SU 35        US   nUS	   nUb  [        U[        5      (       d  [        R                  S
U 35        Ub  [        U[        5      (       d  [        R                  SU 35        X::  a  [        R                  SU SU 35        US   n	U	b  [        U	[        5      (       d  [        R                  SU	 35        XR                  :  a&  [        R                  SU	 SU R                   35        g g )Nr-   r   >   rI   r-   r   r   r   r   rI   r6   r   r   r   z<`rope_scaling`'s low_freq_factor field must be a float, got z=`rope_scaling`'s high_freq_factor field must be a float, got zc`rope_scaling`'s high_freq_factor field must be greater than low_freq_factor, got high_freq_factor=z and low_freq_factor=r   zP`rope_scaling`'s original_max_position_embeddings field must be an integer, got zg`rope_scaling`'s original_max_position_embeddings field must be less than max_position_embeddings, got z and max_position_embeddings=)rJ   rv   r   r   r   rN   rC   r   r   r@   r   )
r   r   rJ   r-   r   r   rI   r   r   r   s
             r    _validate_llama3_parametersr   &  s   &&L  l.>.>vt.LMIvM))+,M=Z(#F~Z66&3,QRXQYZ["#45O#$67j%&H&HUVeUfghz2BE'J'JVWgVhij*q  5o5FH	

 (44V'W$'/zBbdg7h7h^/02	
 (+I+IIu/00MfNlNlMmo	
 Jr#   c                     [        U SS5      nUc  gUR                  SUR                  SS5      5      n[        R                  U5      nUb  U" XS9  g[        R	                  SU S35        g)	zG
Validate the RoPE config arguments, given a `PretrainedConfig` object
rJ   Nr-   r   r   r   zTMissing validation function mapping in `ROPE_VALIDATION_FUNCTIONS` for 'rope_type'='')r=   rv   ROPE_VALIDATION_FUNCTIONSr   r   )r   r   rJ   r-   validation_fns        r    rope_config_validationr   U  su     6>48L   l.>.>vy.QRI-11)<M f6bclbmmno	
r#   )NNNr   )NN)$rW   	functoolsr   typingr   configuration_utilsr   utilsr   r	   
get_logger__name__r   r   r2   r@   tuplerC   rG   rK   rR   r{   r   r   ROPE_INIT_FUNCTIONSstrr   r   r   r   r   r   r   r   r   r    r#   r    <module>r      s      1 . 
		H	% ;~ *.'+!&%&&^$& c]& >5 !	&> *.'+!&%&&^$& c]& >5 !	&> *.'+!*&%&*&^$*& c]*& >5 !	*&\ PTZ&Z&&4Z&?G}Z&
>5 !Z&| PT3&3&&43&?G}3&
>5 !3&n PT&,&,&4&,?G}&,
>5 !&,Z 05.$,(  $(!%lll l C=	l
 #l:[.> [XVY] [	\5E 	\T\]`Ta 	\\6F \U]^aUb \$
&6 $
Xc] $
N/*: /RU /d!
(8 !
xPS} !
L 168%-) 
#3 
(3- 
r#   