
    <h%                        S r SSKJrJrJr  SSKrSSKrSSKJr  SSKJ	r	  SSK
Jr  SSKJrJr  SS	KJrJr  SS
KJr  SSKJr  SSKJr  SSKJr  SSKJrJr  SSKJrJrJ r J!r!J"r"J#r#J$r$J%r%J&r&  SSK'J(r(  \RR                  " \*5      r+ " S S\RX                  5      r- " S S\5      r. " S S\5      r/ " S S\$5      r0 " S S\#5      r1 " S S\ 5      r2 " S S \!5      r3 " S! S"\"5      r4/ S#Qr5g)$zPyTorch Starcoder2 model.    )CallableOptionalUnionN)nn)check_model_inputs   )ACT2FN)CacheDynamicCache)create_causal_mask!create_sliding_window_causal_mask)FlashAttentionKwargs)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargslogging   )	MistralAttentionMistralDecoderLayerMistralForCausalLM MistralForSequenceClassificationMistralForTokenClassificationMistralModelMistralRotaryEmbeddingapply_rotary_pos_embeager_attention_forward   )Starcoder2Configc                   v   ^  \ rS rSrS\4U 4S jjrS\\\R                        S\R                  4S jr
SrU =r$ )Starcoder2MLP7   configc                 D  > [         TU ]  5         UR                  n[        R                  " X!R
                  UR                  S9U l        [        R                  " UR
                  X!R                  S9U l        [        UR                     U l        UR                  U l        g N)bias)super__init__hidden_sizer   Linearintermediate_sizeuse_biasc_fcc_projr	   
hidden_actactresidual_dropout)selfr#   	embed_dim	__class__s      i/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/starcoder2/modular_starcoder2.pyr(   Starcoder2MLP.__init__8   sq    &&	IIi)A)AX	ii 8 8)//Z&++, & 7 7    hidden_statesreturnc                     U R                  U5      nU R                  U5      nU R                  U5      n[        R                  R                  XR                  U R                  S9nU$ )Nptraining)r-   r0   r.   r   
functionaldropoutr1   r=   )r2   r8   s     r5   forwardStarcoder2MLP.forward@   sX    		-0/M2--m?T?T_c_l_l-mr7   )r0   r-   r.   r1   )__name__
__module____qualname____firstlineno__r   r(   r   tupletorchFloatTensorr@   __static_attributes____classcell__r4   s   @r5   r!   r!   7   s>    8/ 8XeE4E4E.F%G EL]L]  r7   r!   c                   L  ^  \ rS rSrSS\S\\   4U 4S jjjr  SS\R                  S\
\R                  \R                  4   S\\R                     S\\   S	\\R                     S
\\   S\
\R                  \\R                     \\
\R                        4   4S jjrSrU =r$ )Starcoder2AttentionH   r#   	layer_idxc                 t  > [         TU ]  5         UR                  U l        [        R                  " UR
                  UR                  U R                  -  UR                  S9U l	        [        R                  " UR
                  UR                  U R                  -  UR                  S9U l        [        R                  " UR
                  UR                  U R                  -  UR                  S9U l        [        R                  " UR                  U R                  -  UR
                  UR                  S9U l        g r%   )r'   r(   r1   r   r*   r)   num_attention_headshead_dimr,   q_projnum_key_value_headsk_projv_projo_projr2   r#   rO   r4   s      r5   r(   Starcoder2Attention.__init__I   s     & 7 7ii 2 2F4N4NQUQ^Q^4^eketetuii 2 2F4N4NQUQ^Q^4^eketetuii 2 2F4N4NQUQ^Q^4^eketetuii : :T]] JFL^L^eketetur7   r8   position_embeddingsattention_maskpast_key_valuecache_positionkwargsr9   c           
         UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      n	U R                  U5      R                  U5      R	                  SS5      n
U R                  U5      R                  U5      R	                  SS5      nUu  p[        XX5      u  pUb$  XUS.nUR                  XU R                  U5      u  p[        nU R                  R                  S:w  a  [        U R                  R                     nU" U U	U
UU4U R                  (       d  SOU R                  U R                   [#        U R                  SS 5      S.UD6u  nnUR$                  " / UQSP76 R'                  5       nU R)                  U5      n[*        R,                  R/                  UU R0                  U R                  S	9nUU4$ )
Nr   r   )sincosr]   eagerg        sliding_window)r?   scalingrd   r;   )shaperR   rS   view	transposerU   rV   r   updaterO   r   r#   _attn_implementationr   r=   attention_dropoutre   getattrreshape
contiguousrW   r   r>   r?   r1   )r2   r8   rZ   r[   r\   r]   r^   input_shapehidden_shapequery_states
key_statesvalue_statesrb   ra   cache_kwargsattention_interfaceattn_outputattn_weightss                     r5   r@   Starcoder2Attention.forwardQ   s    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST&#7RU#[ %#&nUL'5'<'<ZW[WeWegs't$J(?;;++w6"9$++:Z:Z"[$7
%
  $}}C$2H2HLL"4;;0@$G
%
 
%
!\ "));;;;FFHkk+.mm++4004== , 
 L((r7   )rU   rW   rS   r1   rV   )N)NN)rB   rC   rD   rE   r   r   intr(   rG   TensorrF   r
   
LongTensorr   r   r@   rI   rJ   rK   s   @r5   rM   rM   H   s    v/ vHSM v v +/59.)||.) #5<<#=>.) !.	.)
 !.) !!1!12.) -..) 
u||Xell3XeELL>Q5RR	S.) .)r7   rM   c                   4   ^  \ rS rSrS\S\4U 4S jjrSrU =r$ )Starcoder2DecoderLayer   r#   rO   c                   > [         TU ]  U 5        [        XS9U l        [	        U5      U l        [        R                  " UR                  UR                  S9U l
        [        R                  " UR                  UR                  S9U l        g )N)r#   rO   eps)r'   r(   rM   	self_attnr!   mlpr   	LayerNormr)   norm_epsiloninput_layernormpost_attention_layernormrX   s      r5   r(   Starcoder2DecoderLayer.__init__   sf    ,FP (!||F,>,>FDWDWX(*V5G5GVM`M`(a%r7   )r   r   r   r   )	rB   rC   rD   rE   r   ry   r(   rI   rJ   rK   s   @r5   r}   r}      s     b/ bC b br7   r}   c                       \ rS rSrSrg)Starcoder2RotaryEmbedding    NrB   rC   rD   rE   rI   r   r7   r5   r   r          r7   r   c                   2  ^  \ rS rSrS\4U 4S jjr\       SS\\R                     S\\R                     S\\R                     S\\\\\R                     4      S\\R                     S	\\   S
\\R                     S\\   S\4S jj5       rSrU =r$ )Starcoder2Model   r#   c           	      :  > [         TU ]  U5        [        R                  " [	        UR
                  5       Vs/ sH  n[        X5      PM     sn5      U l        [        R                  " UR                  UR                  S9U l        UR                  U l        g s  snf )Nr   )r'   r(   r   
ModuleListrangenum_hidden_layersr}   layersr   r)   r   normembedding_dropoutrX   s      r5   r(   Starcoder2Model.__init__   sy     mmHMfNfNfHghHg9#F6Hgh
 LL!3!39L9LM	!'!9!9 is   B	input_idsr[   position_idspast_key_valuesinputs_embeds	use_cacher]   r^   r9   c                    US L US L-  (       a  [        S5      eUc  U R                  U5      nU(       a  Uc
  [        5       nUcD  Ub  UR                  5       OSn	[        R
                  " XUR                  S   -   UR                  S9nUc  UR                  S5      nU R                  R                  c  [        O[        n
U
" U R                  UUUUUS9nUn[        R                  R                  XR                   U R"                  S9nU R%                  X5      nU R&                  S U R                  R(                    H  nU" U4UUUUUUS.UD6nM     U R+                  U5      n[-        UU(       a  US9$ S S9$ )	Nz:You must specify exactly one of input_ids or inputs_embedsr   r   )device)r#   input_embedsr[   r]   r   r   r;   )r[   r   r\   r   r]   rZ   )last_hidden_stater   )
ValueErrorembed_tokensr   get_seq_lengthrG   arangerf   r   	unsqueezer#   rd   r   r   r   r>   r?   r   r=   
rotary_embr   r   r   r   )r2   r   r[   r   r   r   r   r]   r^   past_seen_tokensmask_functioncausal_maskr8   rZ   decoder_layers                  r5   r@   Starcoder2Model.forward   s    -t";<YZZ  --i8M0*nO!CRC^==?de"\\ ]5H5H5K"KTaThThN )33A6L.2kk.H.H.P*Vw#;;&))+%
 &--33dmm . 

 #oomJ![[)H4;;+H+HIM)	*).#-$7	 	M J 		-0&+/8O
 	
>B
 	
r7   )r   r   r   )NNNNNNN)rB   rC   rD   rE   r   r(   r   r   rG   r{   rz   r   r
   listrH   boolr   r   r   r@   rI   rJ   rK   s   @r5   r   r      s    :/ :  151537KO59$(59?
E,,-?
 !.?
 u//0	?

 "%tE4E4E/F(F"GH?
   1 12?
 D>?
 !!1!12?
 +,?
 
!?
 ?
r7   r   c                       \ rS rSrSrg)Starcoder2ForCausalLM   r   Nr   r   r7   r5   r   r      r   r7   r   c                       \ rS rSrSrg)#Starcoder2ForSequenceClassification   r   Nr   r   r7   r5   r   r      r   r7   r   c                       \ rS rSrSrg) Starcoder2ForTokenClassification   r   Nr   r   r7   r5   r   r      r   r7   r   )r   r   Starcoder2PreTrainedModelr   r   )6__doc__typingr   r   r   rG   torch.utils.checkpointr   transformers.utils.genericr   activationsr	   cache_utilsr
   r   masking_utilsr   r   modeling_flash_attention_utilsr   modeling_outputsr   modeling_utilsr   processing_utilsr   utilsr   r   mistral.modeling_mistralr   r   r   r   r   r   r   r   r   configuration_starcoder2r   
get_loggerrB   loggerModuler!   rM   r}   r   r   r   r   r   __all__r   r7   r5   <module>r      s   (   , ,    9 ! . R B 7 5 & 0
 
 
 7 
		H	%BII "7)* 7)tb0 b	 6 	I
l I
X	. 		*J 		'D 	r7   