
    <h,                        S SK JrJr  S SKrS SKJr  SSKJrJr  SSK	J
r
  SSKJr  SSKJr  SSKJr  SS	KJr  SS
KJrJr  SSKJr  SSKJrJrJrJrJrJrJrJ r   SSK!J"r"  \RF                  " \$5      r%Sr&Sr' " S S\5      r( " S S\5      r) " S S\5      r* " S S\5      r+ " S S\5      r, " S S\5      r- " S S\5      r. " S  S!\5      r// S"Qr0g)#    )CallableOptionalN   )CacheDynamicCache)create_causal_mask)GradientCheckpointingLayer)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargslogging   )CLIPMLP)LlamaAttentionLlamaForCausalLMLlamaForSequenceClassificationLlamaForTokenClassification
LlamaModelLlamaRotaryEmbeddingapply_rotary_pos_embeager_attention_forward   )	PhiConfigzmicrosoft/phi-1r   c                   8  ^  \ rS rSrS\S\4U 4S jjr  SS\R                  S\	\R                  \R                  4   S\
\R                     S\
\   S	\
\R                     S
\	\R                  \
\R                     \
\	\R                        4   4S jjrSrU =r$ )PhiAttention#   config	layer_idxc                   > [         TU ]  X5        [        R                  " UR                  UR
                  U R                  -  SS9U l        [        R                  " UR                  UR                  U R                  -  SS9U l	        [        R                  " UR                  UR                  U R                  -  SS9U l
        [        R                  " UR
                  U R                  -  UR                  SS9U l        U ?[        U R                  UR                  -  5      U l        UR                   U l        U R                   (       ay  [        R"                  " UR                  UR
                  -  UR$                  SS9U l        [        R"                  " UR                  UR
                  -  UR$                  SS9U l        g g )NTbias)epselementwise_affine)super__init__nnLinearhidden_sizenum_attention_headshead_dimq_projnum_key_value_headsk_projv_projdenseo_projintpartial_rotary_factorrotary_ndimsqk_layernorm	LayerNormlayer_norm_epsq_layernormk_layernormselfr   r   	__class__s      [/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/phi/modular_phi.pyr&   PhiAttention.__init__$   s`   +ii 2 2F4N4NQUQ^Q^4^eijii 2 2F4N4NQUQ^Q^4^eijii 2 2F4N4NQUQ^Q^4^eijYYv99DMMI6K]K]dhi
K0L0L LM"//!||""f&@&@@fF[F[pt D  "||""f&@&@@fF[F[pt D	     hidden_statesposition_embeddingsattention_maskpast_key_valuecache_positionreturnc                    UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      n	U R                  U5      R                  U5      R	                  SS5      n
U R                  U5      R                  U5      R	                  SS5      nU R                  (       a"  U R                  U	5      n	U R                  U
5      n
Uu  pU	SS U R                  24   U	SU R                  S 24   pU
SS U R                  24   U
SU R                  S 24   nn[        UUX5      u  nn[        R                  " X4SS9n	[        R                  " UU4SS9n
Ub$  XUS.nUR                  XU R                  U5      u  p[         nU R"                  R$                  S:w  a  [&        U R"                  R$                     nU" U U	U
UU4U R(                  (       d  SOU R*                  U R,                  S	.UD6u  nnUR.                  " / UQSP76 R1                  5       nU R3                  U5      nUU4$ )
Nr   r   .)dim)sincosrD   eagerg        )dropoutscaling)shaper+   r,   view	transposer.   r/   r5   r8   r9   r4   r   torchcatupdater   r   r   _attn_implementationr   trainingattention_dropoutrM   reshape
contiguousr0   )r;   r@   rA   rB   rC   rD   kwargsinput_shapehidden_shapequery_states
key_statesvalue_statesrJ   rI   	query_rot
query_passkey_rotkey_passcache_kwargsattention_interfaceattn_outputattn_weightss                         r=   forwardPhiAttention.forward5   s_    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST++L9L))*5J& 1 1 1112d//112 
 s/d////0sD--//0 
 2)WcO	7 yy)!8bAYY2;
%#&nUL'5'<'<ZW[WeWegs't$J(?;;++w6"9$++:Z:Z"[$7	%
  $}}C$2H2HLL	%
 	%
!\ "));;;;FFHjj-L((r?   )r0   r9   r.   r8   r,   r5   r4   r/   )NN)__name__
__module____qualname____firstlineno__r   r2   r&   rQ   Tensortupler   r   
LongTensorrg   __static_attributes____classcell__r<   s   @r=   r   r   #   s    y S , +/59;)||;) #5<<#=>;) !.	;)
 !;) !!1!12;) 
u||Xell3XeELL>Q5RR	S;) ;)r?   r   c                       \ rS rSrSrg)PhiMLPs    Nri   rj   rk   rl   rp   rv   r?   r=   rt   rt   s       r?   rt   c                     ^  \ rS rSrS\S\4U 4S jjr       SS\R                  S\	\R                     S\	\R                     S\	\\R                        S	\	\   S
\	\   S\	\R                     S\	\\R                  \R                  4      S\\R                  \	\\R                  \R                  4      4   4S jjrSrU =r$ )PhiDecoderLayerw   r   r   c                   > [         TU ]  5         [        XS9U l        [	        U5      U l        [        R                  " UR                  UR                  S9U l
        [        R                  " UR                  5      U l        g )N)r   r#   )r%   r&   r   	self_attnrt   mlpr'   r6   r)   r7   input_layernormDropoutresid_pdropresid_dropoutr:   s      r=   r&   PhiDecoderLayer.__init__x   s[    %fB&>!||F,>,>FDYDYZZZ(:(:;r?   r@   rB   position_idsrC   output_attentions	use_cacherD   rA   rE   c	                     Un
U R                  U5      nU R                  " SUUUUUUUUS.U	D6u  pU R                  U5      nU R                  U R                  U5      5      nX-   U
-   nU4nU(       a  X4-  nU$ )N)r@   rB   r   rC   r   r   rD   rA   rv   )r   r~   r   r   )r;   r@   rB   r   rC   r   r   rD   rA   rY   residualattn_outputsself_attn_weightsfeed_forward_hidden_statesoutputss                  r=   rg   PhiDecoderLayer.forward   s     !,,]; +/.. 
+
')%)/) 3
+
 
+
' )),7%)%7%78O%P"$AHL "++Gr?   )r   r   r   r~   )NNNFFNN)ri   rj   rk   rl   r   r2   r&   rQ   rm   r   ro   rn   boolFloatTensorrg   rp   rq   rr   s   @r=   rz   rz   w   s
   <y <S < 26378<,1$)59KO%||% !.% u//0	%
 !u||!45% $D>% D>% !!1!12% &eELL%,,,F&GH% 
u  (51B1BEDUDU1U+V"WW	X% %r?   rz   c                       \ rS rSrSrg)PhiRotaryEmbedding   rv   Nrw   rv   r?   r=   r   r      rx   r?   r   c                     ^  \ rS rSrS\4U 4S jjr         SS\\R                     S\\R                     S\\R                     S\\
   S\\R                     S	\\   S
\\   S\\   S\\R                     S\\   S\4S jjrSrU =r$ )PhiModel   r   c           	      f  > [         TU ]  U5        [        R                  " [	        UR
                  5       Vs/ sH  n[        X5      PM     sn5      U l        [        R                  " UR                  5      U l
        [        R                  " UR                  UR                  S9U l        U ?g s  snf )Nr}   )r%   r&   r'   
ModuleListrangenum_hidden_layersrz   layersr   
embd_pdropembed_dropoutr6   r)   r7   final_layernormnormr:   s      r=   r&   PhiModel.__init__   s     mmAFvG_G_A`aA`I_V/A`a
  ZZ(9(9:!||F,>,>FDYDYZI	 bs   B.	input_idsrB   r   past_key_valuesinputs_embedsr   r   output_hidden_statesrD   rY   rE   c
                     Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nUS L US L-  (       a  [	        S5      eU R
                  (       a/  U R                  (       a  U(       a  [        R                  S5        SnUc  U R                  U5      nU(       a  Uc
  [        5       nU	cD  Ub  UR                  5       OSn[        R                  " XUR                  S   -   UR                  S9n	Uc  U	R!                  S5      n[#        U R                   UUU	UUS9nU R%                  U5      nUnU R'                  X5      nU(       a  SOS nU(       a  SOS nU R(                  S U R                   R*                    H7  nU(       a  X4-  nU" U4UUUUUU	US	.U
D6nUS   nU(       d  M.  UUS   4-  nM9     U R-                  U5      nU(       a  X4-  n[/        UU(       a  UOS UUS
9$ )Nz:You must specify exactly one of input_ids or inputs_embedszX`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.Fr   r   )device)r   input_embedsrB   rD   r   r   rv   )rB   r   rC   r   r   rD   rA   )last_hidden_stater   r@   
attentions)r   r   r   r   
ValueErrorgradient_checkpointingrU   loggerwarning_onceembed_tokensr   get_seq_lengthrQ   arangerN   r   	unsqueezer   r   
rotary_embr   r   r   r
   )r;   r   rB   r   r   r   r   r   r   rD   rY   past_seen_tokenscausal_maskr@   rA   all_hidden_statesall_self_attnsdecoder_layerlayer_outputss                      r=   rg   PhiModel.forward   s?    2C1N-TXT_T_TqTq$8$D $++JjJj 	 "+!6IDKK<Q<Q	-t";<YZZ&&4==Yj I  --i8M0*nO!CRC^==?de"\\ ]5H5H5K"KTaThThN )33A6L(;;&))+%
 **=9% #oomJ #7BD0d![[)H4;;+H+HIM#!%55!)
*)."3#-$7
 
M *!,M  =#3"55' J* ,,];  !11&+/8Od+%	
 	
r?   )r   r   r   )	NNNNNNNNN)ri   rj   rk   rl   r   r&   r   rQ   ro   rm   r   r   r   r   r   r
   rg   rp   rq   rr   s   @r=   r   r      s    y  151537+/59$(,0/359^
E,,-^
 !.^
 u//0	^

 "%^
   1 12^
 D>^
 $D>^
 'tn^
 !!1!12^
 +,^
 
!^
 ^
r?   r   c                   (   ^  \ rS rSrU 4S jrSrU =r$ )PhiForCausalLMi  c                    > [         TU ]  U5        [        R                  " UR                  UR
                  SS9U l        g )NTr!   )r%   r&   r'   r(   r)   
vocab_sizelm_head)r;   r   r<   s     r=   r&   PhiForCausalLM.__init__  s0     yy!3!3V5F5FTRr?   )r   )ri   rj   rk   rl   r&   rp   rq   rr   s   @r=   r   r     s    S Sr?   r   c                       \ rS rSrSrg)PhiForSequenceClassificationi  rv   Nrw   rv   r?   r=   r   r     rx   r?   r   c                       \ rS rSrSrg)PhiForTokenClassificationi   rv   Nrw   rv   r?   r=   r   r      rx   r?   r   )PhiPreTrainedModelr   r   r   r   )1typingr   r   rQ   torch.nnr'   cache_utilsr   r   masking_utilsr   modeling_layersr	   modeling_outputsr
   modeling_utilsr   processing_utilsr   utilsr   r   clip.modeling_clipr   llama.modeling_llamar   r   r   r   r   r   r   r   configuration_phir   
get_loggerri   r   _CHECKPOINT_FOR_DOC_CONFIG_FOR_DOCr   rt   rz   r   r   r   r   r   __all__rv   r?   r=   <module>r      s    %   . / 9 6 & 0 (	 	 	 ) 
		H	%' M)> M)`	W 	-0 -`	- 	h
z h
VS% S	#A 		 ; 	r?   