
    <hJ                     r   S SK JrJrJr  \" 5       (       a
  SSKrSSKJr  \" 5       (       a  SSKJr  SSKr\R                  " \
5      r/ SQrS rS r\R                  S	S
.S\R                   S\S\R$                  4S jjr " S S\R(                  5      rS rS rS rS rS r     SS jr    SS jrg)   )is_accelerate_availableis_torch_availablelogging    N)nn)init_empty_weights)g        g      ?g      ?g      ?g       @g      @g      @g      @g       g      g      g      g       g      g      g      c                     [         R                  R                  R                  nU" U R	                  [
        R                  5      [
        R                  SS9u  p[        X5      u  pX4$ )N   )axis)	triton_kernels_hubnumerics_detailsmxfpdowncast_to_mxfptotorchbfloat16uint8swizzle_mxfp4)wr   w_scales      W/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/integrations/mxfp4.pyquantize_to_mxfp4r   3   sO    )::??PP!!$$u~~"6!LJAq*JA:    c                 z   [         R                  R                  [         R                  R                  [         R                  R                  pCn[         R
                  R                  n[         R
                  R                  R                  nUR                  SS9u  pxU" U" XS9U40 UD6n U" U" U5      U5      nX4$ )Nr
   )mx_axisdtype)	r   tensorFP4convert_layoutwrap_torch_tensortensor_detailslayoutStridedLayout"make_default_matmul_mxfp4_w_layout)	r   r   r   r    r!   r#   r$   value_layoutvalue_layout_optss	            r   r   r   ;   s    !!%%!!00!!33 +C
  ..55F&55<<JJM&,&O&OXY&O&Z#L(6ZHYZA .w7GG:r   i   )r   rows_per_chunkr   r(   returnc                H   SS K nU R                  (       dC  [        R                  R	                  5       (       a   U R                  5       n UR                  5       nUR                  [        R                  5      S-
  nU R                  S S UR                  :X  d#   SU R                  < SUR                  < 35       e[        R                  " [        X R                  S9nU R                  Gt pgnUR                  U5      U-  n	U R                  X5      n UR                  U	S5      n[        R                  " XS-  X R                  S9n
[        SX5       H  n[        X-   U	5      nXU nXU nUS	-  R                  [        R                   5      nUS
-	  R                  [        R                   5      nXU nX_   US S 2SS S24'   UU   US S 2SS S24'   [        R"                  " UUUS9  AAAAAM     U
R                  " / UQUPUS-  P76 R$                  " / UQXx-  S-  P76 n
A AAU
$ )Nr      zblocks.shape=z does not match scales.shape=)r   devicer
   r         )out)mathis_cudar   cudais_availabler   int32shaper   
FP4_VALUESr-   prodreshapeemptyrangeminlongldexpview)blocksscalesr   r(   r1   lutprefix_shapeGB
rows_totalr0   r0r1blkexpidx_loidx_hisubs                     r   convert_moe_packed_tensorsrN   X   s     >>ejj5577YYu{{#c)F<<,_?]PVP\P\.__,
,,z}}
EC ,,\a<(1,J^^J*F^^J*F
++ja%u]]
KCAz2$j1mm *,(uzz*Rj{Aqt!tG6{Aqt!tGC#&FCc 3" ++
.|
.Q
.A
.
3
3
M\
M1519
MC 	Jr   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )Mxfp4GptOssExperts   c           
        > [         TU ]  5         UR                  U l        UR                  U l        UR
                  U l        [        R                  " [        R                  " U R                  SU R                  -  U R
                  S-  S[        R                  S9SS9U l        [        R                  " [        R                  " U R                  SU R                  -  U R
                  S-  [        R                  S9SS9U l        [        R                  " [        R                  " U R                  SU R                  -  [        R                  S9SS9U l        [        R                  " [        R                  " U R                  U R
                  U R                  S-  S4[        R                  S9SS9U l        [        R                  " [        R                  " U R                  U R
                  U R                  S-  [        R                  S9SS9U l        [        R                  " [        R                  " U R                  U R
                  [        R                  S9SS9U l        SU l        S U l        S U l        g )Nr          r   Frequires_gradgZd;?)super__init__num_local_expertsnum_expertsintermediate_sizehidden_sizer   	Parameterr   zerosr   gate_up_proj_blocksgate_up_proj_scalesfloat32gate_up_proj_biasdown_proj_blocksdown_proj_scalesdown_proj_biasalphagate_up_proj_precision_configdown_proj_precision_config)selfconfig	__class__s     r   rX   Mxfp4GptOssExperts.__init__   s   !33!'!9!9!--#%<<KK((!d.D.D*DdFVFVZ\F\^`hmhshst$
  $&<<KK((!d.D.D*DdFVFVZ\F\didodop$
  "$KK((!d.D.D*DEMMZjo"
 !#KK))4+;+;T=S=SWY=Y[]^fkfqfqr!
 !#KK(($*:*:D<R<RVX<X`e`k`kl!
 !llKK(($*:*:%--P`e
 
-1**.'r   hidden_statesr)   c                    [         R                  R                  [         R                  R                  [         R                  R                  pvn[         R                  R
                  n[        R                  R                  UR                  5         U" U" SUS5      U R                  S 4S5      n	U" UU R                  U R                  R                  [        R                  5      UUU R                  S U	S9n
U" U
U R                  U R                   R                  [        R                  5      UUU R"                  UR$                  S9nS S S 5        U$ ! , (       d  f       W$ = f)Nswiglu)rf   limitr   )gather_indxprecision_configgammasfused_activation)scatter_indxrr   rs   )r   
matmul_ogsFnSpecsFusedActivationro   	swiglu_fnr   r3   r-   rf   gate_up_projrb   r   ra   rg   	down_projre   rh   	gate_scal)ri   rm   routing_data
gather_idxscatter_idxrw   rx   rv   ry   actintermediate_cache1intermediate_cache3s               r   forwardMxfp4GptOssExperts.forward   s0   ))11))99))44 #-
 '--77	ZZ}334!'(I?Q"RUYU_U_aeTfhijC",!!&&))%--8&!%!C!C!$	# #-###&&u}}5(!%!@!@#--# 50 #"1 540 #"s   B8E
E")rf   re   rc   rh   rd   rb   r_   rg   r`   r\   r[   rZ   )
__name__
__module____qualname____firstlineno__rX   r   Tensorr   __static_attributes____classcell__)rk   s   @r   rP   rP      s,    !/F #U\\  #]b]i]i  #  #r   rP   c                 T   SS K n[        R                  R                  [        R                  R                  [        R                  R
                  [        R                  R                  4u  p4pV[        R                  R                  U R                  5         [        R                  R                  5       n[        UR                  R                  SS5      5      nSn	U R                  S   n
U R                  S   nX-  nX-  nUS-   U-  nX-  nS nU" X5      u  nn[        R                   " USS9n[        R"                  " USS9u  nn[        R$                  " USU5      nUR'                  S5      n[        R(                  " UXS-
  S9X nUR+                  S5      R-                  [        R.                  5      nSn[        R0                  " UU:  UU5      n[        R2                  " US	S
9R-                  [        R.                  5      n[        R2                  " U5      R-                  [        R.                  5      n[        R0                  " UU:  UU	5      n[        R0                  " UU:*  UU	5      n[        R0                  " UU	:H  U	U5      nUU   n[        R0                  " UU   U	:H  U	U5      nU" UR                  5       UR                  5       S9nU" UR                  5       UR                  5       S9nU" UX5      nUnS S S 5        U" WWWWW5      WW4$ ! , (       d  f       N= f)Nr   
LOCAL_RANKr,   r
   c                     [         R                  " U * SSS9S S 2S U24   nUR                  5       n[         R                  " XSS9nX2R	                  5       4$ )Nr
   T)dimstabler   )r   argsortr=   take_along_dimint)valsktk_indxtk_vals       r   topk routing_torch_dist.<locals>.topk   sO    mmTEq>q"1"uEGllnG))$Q?F;;=((r   r   )binsmaxi  T)r   )src_indxdst_indx)osr   routing
GatherIndxRoutingDataScatterIndxcompute_expt_data_torchr   r3   r-   distributedget_world_sizer   environgetr6   softmaxsortgatherr9   histcr?   r   r5   wherer   )logitsn_expts_actr   r   r   r   r   
world_sizerankreplace_valuen_tokensn_expts_totn_local_expertslocal_expert_startlocal_expert_endn_gates_padr   	expt_scal	expt_indxsort_indiceshistvar	topk_indx	gate_indxr|   rq   ru   	expt_datahitted_expertss                                r   routing_torch_distr      s     	""--""..""..""::	EAJ[ 
		6==	)&&557
2::>>,23<<?ll1o%3!3 1H7,	)  $F8	9MM)4	"'**YA">	<LLA|<	 %%b)	{{9;!OLM_qNN2&))%++6	 KK	,> >YO	MM)D9<<U[[I	MM),//<	KK	,< <iW	KK 2i ?MZ	KK	] :M9U	i(	KK	) 4 E}V_`	 !)--/IMMOT"IMMOimmoV+D/O	$g 
*h y$SU`bnnni 
*	)s   I+L
L'c                    SS K Jn  UR                  5       (       a  UR                  5       (       a  [        nO[
        R                  R                  nUnUR                  S   nUR                  SU R                  R                  5      n[        R                  R                  XR                  R                  U R                  R                  5      n[         R"                  R%                  UR$                  5         U" XPR                  R&                  5      u  pgnS S S 5        U R)                  UWWW5      n	U	R                  USU R                  R                  5      n	X4$ ! , (       d  f       NL= f)Nr   r,   )torch.distributedr   r4   is_initializedr   r   r   r6   r9   router
hidden_dimr   
functionallinearweightbiasr   r3   r-   top_kexperts)
ri   rm   distr   
batch_sizerouter_logitsr}   r~   r   
routed_outs
             r   mlp_forwardr     s   $t2244$$,,44$$Q'J!))"dkk.D.DEMMM((8J8JDKKL\L\]M			=//	007{{GXGX0Y-+ 
1 m\:{SJ##JDKK4J4JKJ$$ 
1	0s   : E  
E.c                 ^   ^ SR                  U 5      m[        U4S jU 5       5      (       d  gg)N.c              3      >#    U H>  n[         R                  " U S 3T5      =(       d    [         R                  " U T5      v   M@     g7f)z\.N)rematch).0keycurrent_key_name_strs     r   	<genexpr>(should_convert_module.<locals>.<genexpr>3  s>      ksdgC523_rxx3%J^7__kss   AATF)joinany)current_key_namepatternsr   s     @r   should_convert_moduler   1  s5    88$45 ks   r   c                 n   SSK Jn  UR                  SS 5      nUR                  SS 5      nUR                  SS 5      n	UR                  SS 5      n
UR                  SS 5      nUR                  SS 5      nS	 GH<  nX;   d  M  Ub  U" UUUUU	U
UUS
S9	nU S3nU S3n[        XR	                  SS5      S   U5        [        X5      (       d  MW  [        X5      (       d  Mi  [        [        X5      [        X5      5      nUR                  SS5      R                  5       R                  U5      nUS:X  aA  [        R                  R                  5       (       a  [        R                  R                  5         [        X[        R                  R!                  U5      5        [#        X5        [#        X5        GM?     g )Nr   shard_and_distribute_modulemodelempty_paramcasting_dtypeto_contiguousr   device_meshrz   r{   F)	set_param_blocks_scalesr   r
   cpu)integrations.tensor_parallelr   r   setattrrsplithasattrrN   getattr	transpose
contiguousr   r   r3   r4   empty_cacher   r]   delattr)module
param_nameparam_valuetarget_devicedq_param_namekwargsr   r   r   r   r   r   r   projblocks_attrscales_attrdequantizeds                    r   
dequantizer  :  s|   JJJw%E**]D1KJJ5MJJ5M::fd#D**]D1K-&9!!!#
 "F'*K!F'*KF--c15a8+Fv++0L0L89UW^_eWst)33Aq9DDFII-X E)ejj.E.E.G.GJJ**,ehh&8&8&EF,,3 .r   c                 :   [         R                  R                  [         R                  R                  [         R                  R                  pvnSSKJn  UR                  SS 5      n	UR                  SS 5      n
UR                  SS 5      nUR                  SS 5      nUR                  SS 5      nUR                  SS 5      nS	 GHD  nX;   d  M  Ub  U" XXXX5        O;[        XR                  S
S5      S   [        R                  R                  USS95        U S3nU S3n[        U U5      n[        U U5      nUR                  R                  S:w  d  M  UR                  R                  S:w  d  M  UR!                  S5      nUS:X  a!  UR#                  UU R$                  S-  S5      nO UR#                  USU R$                  S-  5      n[        USU5      S:X  a  SnUR'                  U5      nUR'                  U5      n[        R(                  R                  U5         [+        UR-                  SS5      UR-                  SS5      5      u  nnS S S 5        US:X  a6  [        R.                  " UU R0                  U R$                  S-  /5      Wl        O2[        R.                  " UU R$                  U R0                  /5      Wl        [        XU5        [        U U S3U" WU" U" 5       S9S95        [5        U U5        [5        U U5        AGMG     g ! , (       d  f       N= f)Nr   r   r   r   r   r   r   r   r   r   r
   FrU   r   r   metar   rz   r,   typer   r3   _precision_config)rhs_data)weight_scaleflex_ctx)r   rv   PrecisionConfigFlexCtx
InFlexDatar   r   r   r   r   r   r   r]   r   r-   r  sizer?   r[   r   r3   r   r   Sizer\   r6   r   )r   r   r   r   r   r  r  r  r   r   r   r   r   r   r   r   r   r   r@   rA   local_expertstriton_weight_tensorr	  s                          r   load_and_swizzle_mxfp4r  `  s   %%55%%--%%00 )O
 KJJw%E**]D1KJJ5MJJ5M::fd#D**]D1K-&+_c  1 1#q 9! <ehh>P>PQ\lq>P>rs!F'*K!F'*KV[1FV[1F}}!!V+0B0Bf0L &A>)#[[8P8PST8TVXYF#[[F<T<TXY<YZF=&-@EI$*M=1=1ZZ&&}59F((R0&2B2B2r2J:6(, 6 >)16&(:(:F<T<TWX<XY2(. 27&(@(@&BTBTU2(.
 &:;f-.#YcYeHfg ,,k .4 65s   0L
L	c           
         Uc  / nU R                  5        GH  u  pgUR                  U5        [        X!5      (       d  UR                  S5        M:  UR                  R
                  S:X  a>  UR                  (       d-  [        5          [        U5      U R                  U'   SnS S S 5        UR                  R
                  S:X  a)  UR                  (       d  SSK
Jn  U" [        U5      Ul        [        [        UR!                  5       5      5      S:  a  [#        UUUUUUS9u  pUR                  S5        GM     X4$ ! , (       d  f       N= f)Nr,   GptOssExpertsT	GptOssMLPr   )
MethodType)has_been_replacedrj   )named_childrenappendr   poprk   r   r  r   rP   _modulestypesr  r   r   lenlistchildren_replace_with_mxfp4_linear)
r   modules_to_not_convertr   quantization_configr  rj   namer   r  _s
             r   r   r     s    ,,.%$%5NN  $$$7@S@^@^#%'9&'At$$(! & $$3<O<Z<Z('V<FNtFOO%&'!+#=& #"3$ A 	R - /. ### &%s   D::
E	c                 &   UR                   (       a  U $ SSKJn  U" S5      qUc  S/OUnUR                  b  UR                  UR                  5        [        [        U5      5      n[        U UUUUS9u  pU(       d  [        R                  S5        U $ )Nr   )
get_kernelz kernels-community/triton_kernelslm_head)rj   zYou are loading your model using mixed-precision FP4 quantization but no linear modules were found in your model. Please double check your model architecture, or submit an issue on github if you think this is a bug.)r  kernelsr&  r   r!  extendr  setr   loggerwarning)r   r!  r   r"  rj   r&  r  s          r   replace_with_mxfp4_linearr-    s     %%& ((JK,B,Ji[Pf11=%%&9&P&PQ!#&<"=>9 E 	
 Lr   )NNNFN)NNNN)utilsr   r   r   r   r   
accelerater   r   
get_loggerr   r+  r7   r   r   r   r   r   r   rN   ModulerP   r   r   r   r  r  r   r-   r   r   <module>r3     s    I H - 	 
		H	%
*B &4 ;;	4
 4 \\4nD# D#RAoH%*#-LDR  "$N  "r   