
    <h                        S SK JrJr  S SKrS SKrS SKJr  SSKJrJr  SSK	J
r
Jr  SSKJr  SSKJr  SS	KJr  SS
KJr  SSKJrJrJr  SSKJr  SSKJrJrJrJrJrJ r J!r!J"r"J#r#J$r$  SSK%J&r&  SSK'J(r(  \RR                  " \*5      r+ " S S\!5      r, " S S\5      r- " S S\5      r. " S S\"5      r/ " S S\&5      r0 " S S\5      r1 " S S\5      r2 " S  S!\ 5      r3 " S" S#\5      r4/ S$Qr5g)%    )CallableOptionalN)nn   )CacheDynamicCache)create_causal_mask!create_sliding_window_causal_mask)FlashAttentionKwargs)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargsauto_docstringlogging)check_model_inputs   )
LlamaAttentionLlamaDecoderLayerLlamaForCausalLMLlamaForQuestionAnsweringLlamaForSequenceClassificationLlamaForTokenClassificationLlamaMLPLlamaPreTrainedModelapply_rotary_pos_embeager_attention_forward)MistralModel   )Qwen2Configc                   (   ^  \ rS rSrU 4S jrSrU =r$ )Qwen2MLP$   c                 >  > [         TU ]  U5        [        R                  " U R                  U R
                  SS9U l        [        R                  " U R                  U R
                  SS9U l        [        R                  " U R
                  U R                  SS9U l        g )NFbias)	super__init__r   Linearhidden_sizeintermediate_size	gate_projup_proj	down_projselfconfig	__class__s     _/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/qwen2/modular_qwen2.pyr(   Qwen2MLP.__init__%   ss     4#3#3T5K5KRWXyy!1!143I3IPUV4#9#94;K;KRWX    )r.   r,   r-   )__name__
__module____qualname____firstlineno__r(   __static_attributes____classcell__r2   s   @r3   r"   r"   $   s    Y Yr5   r"   c                   B  ^  \ rS rSrS\S\4U 4S jjr  SS\R                  S\	\R                  \R                  4   S\
\R                     S\
\   S	\
\R                     S
\\   S\	\R                  \
\R                     \
\	\R                        4   4S jjrSrU =r$ )Qwen2Attention,   r1   	layer_idxc                 \  > [         TU ]  X5        [        R                  " UR                  UR
                  U R                  -  SS9U l        [        R                  " UR                  UR                  U R                  -  SS9U l	        [        R                  " UR                  UR                  U R                  -  SS9U l
        [        R                  " UR
                  U R                  -  UR                  SS9U l        UR                  U   S:X  a  UR                  U l        g S U l        g )NTr%   Fsliding_attention)r'   r(   r   r)   r*   num_attention_headshead_dimq_projnum_key_value_headsk_projv_projo_projlayer_typessliding_windowr0   r1   r@   r2   s      r3   r(   Qwen2Attention.__init__-   s    +ii 2 2F4N4NQUQ^Q^4^eijii 2 2F4N4NQUQ^Q^4^eijii 2 2F4N4NQUQ^Q^4^eijii : :T]] JFL^L^ejk7=7I7I)7TXk7kf33qur5   hidden_statesposition_embeddingsattention_maskpast_key_valuecache_positionkwargsreturnc                 J   UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      n	U R                  U5      R                  U5      R	                  SS5      n
U R                  U5      R                  U5      R	                  SS5      nUu  p[        XX5      u  pUb$  XUS.nUR                  XU R                  U5      u  p[        nU R                  R                  S:w  a  [        U R                  R                     nU" U U	U
UU4U R                  (       d  SOU R                  U R                   U R"                  S.UD6u  nnUR$                  " / UQSP76 R'                  5       nU R)                  U5      nUU4$ )Nr   r   )sincosrR   eagerg        )dropoutscalingrK   )shaperD   rE   view	transposerG   rH   r   updater@   r   r1   _attn_implementationr   trainingattention_dropoutr[   rK   reshape
contiguousrI   )r0   rN   rO   rP   rQ   rR   rS   input_shapehidden_shapequery_states
key_statesvalue_statesrX   rW   cache_kwargsattention_interfaceattn_outputattn_weightss                     r3   forwardQwen2Attention.forward5   s    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST&#7RU#[ %#&nUL'5'<'<ZW[WeWegs't$J(?;;++w6"9$++:Z:Z"[$7
%
  $}}C$2H2HLL..
%
 
%
!\ "));;;;FFHkk+.L((r5   )rG   rI   rE   rK   rH   )NN)r6   r7   r8   r9   r    intr(   torchTensortupler   r   
LongTensorr   r   rn   r:   r;   r<   s   @r3   r>   r>   ,   s    v{ vs v +/59*)||*) #5<<#=>*) !.	*)
 !*) !!1!12*) -.*) 
u||Xell3XeELL>Q5RR	S*) *)r5   r>   c                   4   ^  \ rS rSrS\S\4U 4S jjrSrU =r$ )Qwen2DecoderLayerb   r1   r@   c                 J   > [         TU ]  5         UR                  U   U l        g )N)r'   r(   rJ   attention_typerL   s      r3   r(   Qwen2DecoderLayer.__init__c   s!    $00;r5   )ry   )	r6   r7   r8   r9   r    rp   r(   r:   r;   r<   s   @r3   rv   rv   b   s    <{ <s < <r5   rv   c                       \ rS rSrSrg)Qwen2PreTrainedModelh    Nr6   r7   r8   r9   r:   r~   r5   r3   r|   r|   h       r5   r|   c                     ^  \ rS rSrS\4U 4S jjr\\       SS\\	R                     S\\	R                     S\\	R                     S\\   S\\	R                     S	\\   S
\\	R                     S\\   S\4S jj5       5       rSrU =r$ )
Qwen2Modell   r1   c                 `   > [         TU ]  U5        SU R                  R                  ;   U l        g )NrB   )r'   r(   r1   rJ   has_sliding_layersr/   s     r3   r(   Qwen2Model.__init__m   s'     "59P9P"Pr5   	input_idsrP   position_idspast_key_valuesinputs_embeds	use_cacherR   rS   rT   c                    US L US L-  (       a  [        S5      eUc  U R                  U5      nU(       a  Uc
  [        5       nUcD  Ub  UR                  5       OSn	[        R
                  " XUR                  S   -   UR                  S9nUc  UR                  S5      n[        U=n
[        5      (       d?  U R                  UUUUUS.nS[        S
0 UD60n
U R                  (       a  [        S
0 UD6U
S'   UnU R                  X5      nU R                   S U R                  R"                    H  nU" U4XR$                     UUUUUS.UD6nM!     U R'                  U5      n[)        UU(       a  US	9$ S S	9$ )Nz:You must specify exactly one of input_ids or inputs_embedsr   r   )device)r1   input_embedsrP   rR   r   r   full_attentionrB   )rP   r   rQ   r   rR   rO   )last_hidden_stater   r~   )
ValueErrorembed_tokensr   get_seq_lengthrq   aranger\   r   	unsqueeze
isinstancedictr1   r	   r   r
   
rotary_emblayersnum_hidden_layersry   normr   )r0   r   rP   r   r   r   r   rR   rS   past_seen_tokenscausal_mask_mappingmask_kwargsrN   rO   decoder_layers                  r3   rn   Qwen2Model.forwardq   s    -t";<YZZ  --i8M0*nO!CRC^==?de"\\ ]5H5H5K"KTaThThN )33A6L ?-FF ++ -"0"0#2 ,K !"4"C{"C# &&;\;k_j;k#$78% #oomJ![[)H4;;+H+HIM)	23O3OP).#-$7	 	M J 		-0&+/8O
 	
>B
 	
r5   )r   )NNNNNNN)r6   r7   r8   r9   r    r(   r   r   r   rq   rt   rr   r   FloatTensorboolr   r   r   rn   r:   r;   r<   s   @r3   r   r   l   s    Q{ Q  151537+/59$(59E
E,,-E
 !.E
 u//0	E

 "%E
   1 12E
 D>E
 !!1!12E
 +,E
 
!E
  E
r5   r   c                       \ rS rSrSrg)Qwen2ForCausalLM   r~   Nr   r~   r5   r3   r   r      r   r5   r   c                       \ rS rSrSrg)Qwen2ForSequenceClassification   r~   Nr   r~   r5   r3   r   r      r   r5   r   c                       \ rS rSrSrg)Qwen2ForTokenClassification   r~   Nr   r~   r5   r3   r   r      r   r5   r   c                       \ rS rSrSrg)Qwen2ForQuestionAnswering   r~   Nr   r~   r5   r3   r   r      r   r5   r   )r|   r   r   r   r   r   )6typingr   r   rq   torch.utils.checkpointr   cache_utilsr   r   masking_utilsr	   r
   modeling_flash_attention_utilsr   modeling_outputsr   modeling_utilsr   processing_utilsr   utilsr   r   r   utils.genericr   llama.modeling_llamar   r   r   r   r   r   r   r   r   r   mistral.modeling_mistralr   configuration_qwen2r    
get_loggerr6   loggerr"   r>   rv   r|   r   r   r   r   r   __all__r~   r5   r3   <module>r      s    %    . R B 6 & @ @ /   4 , 
		H	%Yx Y3)^ 3)l<) <	/ 	L
 L
^	' 		%C 		"= 		 9 	r5   