
    <h]                        S SK JrJr  S SKrS SKJr  S SKJr  SSKJrJ	r	  SSK
JrJr  SSKJr  SS	KJr  SS
KJr  SSKJr  SSKJr  SSKJrJrJr  SSKJrJrJrJrJ r J!r!J"r"J#r#J$r$J%r%  SSK&J'r'  \RP                  " \)5      r* " S S\!5      r+ " S S\5      r, " S S\5      r- " S S\#5      r. " S S\"5      r/ " S S\5      r0 " S S\ 5      r1 " S  S!\5      r2 " S" S#\\.5      r3/ S$Qr4g)%    )CallableOptionalN)nn)check_model_inputs   )CacheDynamicCache)create_causal_mask!create_sliding_window_causal_mask)FlashAttentionKwargs)GenericForQuestionAnswering)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargsauto_docstringlogging   )
LlamaAttentionLlamaDecoderLayerLlamaForCausalLMLlamaForSequenceClassificationLlamaForTokenClassificationLlamaMLP
LlamaModelLlamaPreTrainedModelapply_rotary_pos_embeager_attention_forward   )MistralConfigc                   (   ^  \ rS rSrU 4S jrSrU =r$ )
MistralMLP$   c                 >  > [         TU ]  U5        [        R                  " U R                  U R
                  SS9U l        [        R                  " U R                  U R
                  SS9U l        [        R                  " U R
                  U R                  SS9U l        g )NFbias)	super__init__r   Linearhidden_sizeintermediate_size	gate_projup_proj	down_proj)selfconfig	__class__s     c/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/mistral/modular_mistral.pyr(   MistralMLP.__init__%   ss     4#3#3T5K5KRWXyy!1!143I3IPUV4#9#94;K;KRWX    )r.   r,   r-   )__name__
__module____qualname____firstlineno__r(   __static_attributes____classcell__r1   s   @r2   r"   r"   $   s    Y Yr4   r"   c                   B  ^  \ rS rSrS\S\4U 4S jjr  SS\R                  S\	\R                  \R                  4   S\
\R                     S\
\   S	\
\R                     S
\\   S\	\R                  \
\R                     \
\	\R                        4   4S jjrSrU =r$ )MistralAttention,   r0   	layer_idxc                 f  > [         TU ]  5         [        USS 5      =(       d    UR                  UR                  -  U l        [        R                  " UR                  UR                  U R
                  -  SS9U l        [        R                  " UR                  UR                  U R
                  -  SS9U l
        [        R                  " UR                  UR                  U R
                  -  SS9U l        [        R                  " UR                  U R
                  -  UR                  SS9U l        g )Nhead_dimFr%   )r'   r(   getattrr*   num_attention_headsrA   r   r)   q_projnum_key_value_headsk_projv_projo_projr/   r0   r?   r1   s      r2   r(   MistralAttention.__init__-   s    
D9mV=O=OSYSmSm=mii 2 2F4N4NQUQ^Q^4^ejkii 2 2F4N4NQUQ^Q^4^ejkii 2 2F4N4NQUQ^Q^4^ejkii : :T]] JFL^L^ejkr4   hidden_statesposition_embeddingsattention_maskpast_key_valuecache_positionkwargsreturnc           
      `   UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      n	U R                  U5      R                  U5      R	                  SS5      n
U R                  U5      R                  U5      R	                  SS5      nUu  p[        XX5      u  pUb$  XUS.nUR                  XU R                  U5      u  p[        nU R                  R                  S:w  a  [        U R                  R                     nU" U U	U
UU4U R                  (       d  SOU R                  U R                   [#        U R                  SS 5      S.UD6u  nnUR$                  " / UQSP76 R'                  5       nU R)                  U5      nUU4$ )	Nr   r   )sincosrO   eagerg        sliding_window)dropoutscalingrW   )shaperA   rD   view	transposerF   rG   r   updater?   r   r0   _attn_implementationr   trainingattention_dropoutrY   rB   reshape
contiguousrH   )r/   rK   rL   rM   rN   rO   rP   input_shapehidden_shapequery_states
key_statesvalue_statesrU   rT   cache_kwargsattention_interfaceattn_outputattn_weightss                     r2   forwardMistralAttention.forward5   s    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST&#7RU#[ %#&nUL'5'<'<ZW[WeWegs't$J(?;;++w6"9$++:Z:Z"[$7
%
  $}}C$2H2HLL"4;;0@$G
%
 
%
!\ "));;;;FFHkk+.L((r4   )rA   rF   rH   rD   rG   )NN)r5   r6   r7   r8   r    intr(   torchTensortupler   r   
LongTensorr   r   rl   r9   r:   r;   s   @r2   r=   r=   ,   s    l} l l +/59*)||*) #5<<#=>*) !.	*)
 !*) !!1!12*) -.*) 
u||Xell3XeELL>Q5RR	S*) *)r4   r=   c                   4   ^  \ rS rSrS\S\4U 4S jjrSrU =r$ )MistralDecoderLayerb   r0   r?   c                 `   > [         TU ]  X5        [        XS9U l        [	        U5      U l        g )N)r0   r?   )r'   r(   r=   	self_attnr"   mlprI   s      r2   r(   MistralDecoderLayer.__init__c   s(    +)Mf%r4   )rx   rw   )	r5   r6   r7   r8   r    rn   r(   r9   r:   r;   s   @r2   rt   rt   b   s    &} & & &r4   rt   c                       \ rS rSr\\S.rSrg)MistralPreTrainedModeli   )rK   
attentions N)r5   r6   r7   r8   rt   r=   _can_record_outputsr9   r~   r4   r2   r{   r{   i   s    ,&r4   r{   c                       \ rS rSr\\       SS\\R                     S\\R                     S\\R                     S\\
   S\\R                     S\\   S	\\R                     S
\\   S\4S jj5       5       rSrg)MistralModelp   N	input_idsrM   position_idspast_key_valuesinputs_embeds	use_cacherO   rP   rQ   c                    US L US L-  (       a  [        S5      eUc  U R                  U5      nU(       a  Uc
  [        5       nUcD  Ub  UR                  5       OSn	[        R
                  " XUR                  S   -   UR                  S9nUc  UR                  S5      nU R                  R                  c  [        O[        n
U
" U R                  UUUUUS9nUnU R                  X5      nU R                  S U R                  R                    H  nU" U4UUUUUUS.UD6nM     U R!                  U5      n[#        UU(       a  US9$ S S9$ )Nz:You must specify exactly one of input_ids or inputs_embedsr   r   )device)r0   input_embedsrM   rO   r   r   )rM   r   rN   r   rO   rL   )last_hidden_stater   )
ValueErrorembed_tokensr	   get_seq_lengthro   arangerZ   r   	unsqueezer0   rW   r
   r   
rotary_emblayersnum_hidden_layersnormr   )r/   r   rM   r   r   r   r   rO   rP   past_seen_tokensmask_functioncausal_maskrK   rL   decoder_layers                  r2   rl   MistralModel.forwardq   su    -t";<YZZ  --i8M0*nO!CRC^==?de"\\ ]5H5H5K"KTaThThN )33A6L.2kk.H.H.P*Vw#;;&))+%
 &"oomJ![[)H4;;+H+HIM)	*).#-$7	 	M J 		-0&+/8O
 	
>B
 	
r4   r~   )NNNNNNN)r5   r6   r7   r8   r   r   r   ro   rr   rp   r   FloatTensorboolr   r   r   rl   r9   r~   r4   r2   r   r   p   s     151537+/59$(599
E,,-9
 !.9
 u//0	9

 "%9
   1 129
 D>9
 !!1!129
 +,9
 
!9
  9
r4   r   c                       \ rS rSrSrg)MistralForCausalLM   r~   Nr5   r6   r7   r8   r9   r~   r4   r2   r   r          r4   r   c                       \ rS rSrSrg)MistralForTokenClassification   r~   Nr   r~   r4   r2   r   r      r   r4   r   c                       \ rS rSrSrg) MistralForSequenceClassification   r~   Nr   r~   r4   r2   r   r      r   r4   r   c                       \ rS rSrSrg)MistralForQuestionAnswering   r~   Nr   r~   r4   r2   r   r      s    X[r4   r   )r   r   r   r{   r   r   )5typingr   r   ro   r   transformers.utils.genericr   cache_utilsr   r	   masking_utilsr
   r   modeling_flash_attention_utilsr   modeling_layersr   modeling_outputsr   modeling_utilsr   processing_utilsr   utilsr   r   r   llama.modeling_llamar   r   r   r   r   r   r   r   r   r   configuration_mistralr    
get_loggerr5   loggerr"   r=   rt   r{   r   r   r   r   r   __all__r~   r4   r2   <module>r      s    %   9 . R B 8 5 & @ @   1 
		H	%Y Y3)~ 3)l&+ &1 <
: <
~	) 		$? 		'E 	 \"=?U [r4   