
    hH                     r   d dl mZmZmZ  e       r
ddlZddlmZ  e       rddlmZ ddlZddl	m
Z
  ej                  e      Zg dZe
d        Zd	 Zd
 Zej$                  dddej&                  dedej*                  fdZ G d dej.                        Zd Zd Zd Zd Zd Z	 	 	 	 	 ddZ	 	 	 	 ddZy)   )is_accelerate_availableis_torch_availablelogging    N)nn)init_empty_weights)contextmanager)g        g      ?g      ?g      ?g       @g      @g      @g      @g       g      g      g      g       g      g      g      c              #     K   t               rdd l}t        | |j                        r| j                  } n"t        | t
              r |j                  |       } t        | dd       }|dk(  r*|j                  j	                  |       5  d  	 d d d        y |dk(  r6t        |d      r*|j                  j	                  |       5  d  	 d d d        y d  y # 1 sw Y   IxY w# 1 sw Y   xY ww)Nr   typecudaxpu)
r   torch
isinstanceTensordevicestrgetattrr   hasattrr   )devr   dev_types      ^/var/www/html/aiagenthome/venv/lib/python3.12/site-packages/transformers/integrations/mxfp4.py	on_devicer   3   s     c5<<(**CS!%,,s#C3-v""3' (' u!6!!#& '& 
 (' '&s6   BC*C	5C*>CC*CC*C'#C*c                     |j                   j                  j                  } || j                  t        j
                        t        j                  d      \  } }| |fS )N   )axis)numerics_detailsmxfpdowncast_to_mxfp_torchtor   bfloat16uint8)wtriton_kernels_hubr   w_scales       r   quantize_to_mxfp4r%   J   sH    /@@EE\\'U^^(<ekkPQRJAwg:    c                 f   |j                   j                  |j                   j                  |j                   j                  }}}|j                  j
                  }|j                  j
                  j                  }|j                  d      \  }}	 | || |      |fi |	}  | ||      |      }| |fS )zE
    Changes the layout of the tensors depending on the hardware
    r   )mx_axisdtype)tensorFP4convert_layoutwrap_torch_tensortensor_detailslayoutStridedLayout"make_default_matmul_mxfp4_w_layout)
r"   r$   r#   r,   r-   r.   r0   r1   value_layoutvalue_layout_optss
             r   swizzle_mxfp4r5   P   s    
 	!!%%!!00!!33 +C
  ..55F&55<<JJM&,&O&OXY&O&Z#L#(#6ZHYZA.w7GGg:r&   i   )r*   rows_per_chunkr*   r6   returnc                   ddl }| j                  s>t        j                  j	                         r | j                         } |j                         }|j                  t        j                        dz
  }| j                  dd |j                  k(  s$J d| j                  dd d|j                         t        j                  t        || j                        }| j                  ^ }}}|j                  |      |z  }	| j                  |	|      } |j                  |	d      }t        j                  |	|d	z  || j                        }
t        d|	|      D ]  }t        ||z   |	      }| || }||| }|d
z  j                  t        j                         }|dz	  j                  t        j                         }|
|| }||   |ddddd	f<   ||   |ddddd	f<   t        j"                  |||       ~~~~~   |
j                  g |||d	z   j$                  g |||z  d	z   }
~ ~~|
j'                  dd	      j)                         S )zw
    Convert the mxfp4 weights again, dequantizing and makes them compatible with the forward
    pass of GPT_OSS.
    r   N   zblocks.shape[:-1]=z does not match scales.shape=)r*   r   r   r         )out)mathis_cudar   r   is_availabler   int32shaper+   
FP4_VALUESr   prodreshapeemptyrangeminlongldexpview	transpose
contiguous)blocksscalesr*   r6   r>   lutprefix_shapeGB
rows_totalr=   r0r1blkexpidx_loidx_hisubs                     r   convert_moe_packed_tensorsr\   d   s#     >>ejj557YYu{{#c)F<<,d1Ccr1B0DDbU[UaUaTc.dd,
,,zv}}
EC ,,\1a<(1,J^^J*F^^J*F
++j!a%uV]]
KCAz>2n$j1RmRm *,(uzz*"Rj6{Aqt!tG6{Aqt!tGC#&FCc 3" 4+#++
.|
.Q
.A
.
3
3
M\
M1q519
MC==A))++r&   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )Mxfp4GptOssExpertsc           	      r   t         |           |j                  | _        |j                  | _        |j
                  | _        t        j                  t        j                  | j                  d| j                  z  | j
                  dz  dt        j                        d      | _        t        j                  t        j                  | j                  d| j                  z  | j
                  dz  t        j                        d      | _        t        j                  t        j                  | j                  d| j                  z  t        j                        d      | _        t        j                  t        j                  | j                  | j
                  | j                  dz  dft        j                        d      | _        t        j                  t        j                  | j                  | j
                  | j                  dz  t        j                        d      | _        t        j                  t        j                  | j                  | j
                  t        j                        d      | _        d| _        t'        |dd	      | _        d | _        d | _        t'        |dd	      | _        y )
Nr          r)   Frequires_gradgZd;?swiglu_limitg      @)super__init__num_local_expertsnum_expertsintermediate_sizehidden_sizer   	Parameterr   zerosr!   gate_up_proj_blocksgate_up_proj_scalesfloat32gate_up_proj_biasdown_proj_blocksdown_proj_scalesdown_proj_biasalphar   limitgate_up_proj_precision_configdown_proj_precision_config)selfconfig	__class__s     r   rf   zMxfp4GptOssExperts.__init__   s   !33!'!9!9!--#%<<KK((!d.D.D*DdFVFVZ\F\^`hmhshst$
  $&<<KK((!d.D.D*DdFVFVZ\F\didodop$
  "$KK((!d.D.D*DEMMZjo"
 !#KK))4+;+;T=S=SWY=Y[]^fkfqfqr!
 !#KK(($*:*:D<R<RVX<X`e`k`kl!
 !llKK(($*:*:%--P`e
 
V^S9
-1**.'V^S9
r&   hidden_statesr7   c                    t         j                  j                  t         j                  j                  t         j                  j                  }}}t         j                  j
                  }t        |j                        5   | |d|d      | j                  | j                  fd      }	 ||| j                  | j                  j                  t        j                        ||| j                  d |	      }
 ||
| j                   | j"                  j                  t        j                        ||| j$                  |j&                        }d d d        |S # 1 sw Y   S xY w)Nswiglu)rt   ru   r   )gather_indxprecision_configgammasfused_activation)scatter_indxr   r   )r#   
matmul_ogsFnSpecsFusedActivationr}   	swiglu_fnr   r   rt   ru   gate_up_projrp   r   r   ro   rv   	down_projrs   rw   	gate_scal)rx   r{   routing_data
gather_idxscatter_idxr   r   r   r   actintermediate_cache1intermediate_cache3s               r   forwardzMxfp4GptOssExperts.forward   s+   ))11))99))44 #-
 '--77	}++,!'(I?Q"RUYU_U_aeakakTlnopC",!!&&))%--8&!%!C!C!$	# #-###&&u}}5(!%!@!@#--# -. #"/ -. #"s   >CEE)__name__
__module____qualname__rf   r   r   r   __classcell__)rz   s   @r   r^   r^      s'    ":H#U\\ #]b]i]i #r&   r^   c                 B   dd l }t        j                  j                  t        j                  j                  t        j                  j
                  t        j                  j                  f\  }}}}t        | j                        5  t        j                  j                         }t        |j                  j                  dd            }d}	| j                  d   }
| j                  d   }||z  }||z  }|dz   |z  }|
|z  }d } || |      \  }}t        j                   |d      }t        j"                  |d      \  }}t        j$                  |d|      }|j'                  d      }t        j(                  |||dz
        || }|j+                  d      j-                  t        j.                        }d	}t        j0                  ||k  ||      }t        j2                  |d
      j-                  t        j.                        }t        j2                  |      j-                  t        j.                        }t        j0                  ||k  ||	      }t        j0                  ||k  ||	      }t        j0                  ||	k(  |	|      }||   }t        j0                  ||   |	k(  |	|      } ||j                         |j                               } ||j                         |j                               } ||||      }|}d d d         |      fS # 1 sw Y   xY w)Nr   
LOCAL_RANK0r:   r   c                     t        j                  |  dd      d d d |f   }|j                         }t        j                  | |d      }||j	                         fS )Nr   T)dimstabler   )r   argsortrI   take_along_dimint)valsktk_indxtk_vals       r   topkz routing_torch_dist.<locals>.topk   sS    mmTEq>q"1"uEGllnG))$Q?F7;;=((r&   r   )binsmaxi  T)r   )src_indxdst_indx)osr#   routing
GatherIndxRoutingDataScatterIndxcompute_expt_data_torchr   r   r   distributedget_world_sizer   environgetrB   softmaxsortgatherrE   histcrK   r   rA   wherer   )logitsn_expts_actr   r   r   r   r   
world_sizerankreplace_valuen_tokensn_expts_totn_local_expertslocal_expert_startlocal_expert_endn_gates_padr   	expt_scal	expt_indxsort_indiceshistvar	topk_indx	gate_indxr   r~   r   	expt_datahit_expertss                                r   routing_torch_distr      s     	""--""..""..""::	EAJ[*A 
6==	!&&557
2::>>,45<<?ll1o%3!O3 1H7,	)  $FK8	9MM)4	"'**YA">	<LLA|<	 %%b)	{{9;K!OLM_`pqNN2&))%++6	 KK	,> >YO	MM)D9<<U[[I	MM),//<	KK	,< <iW	KK 2i ?MZ	KK	] :M9U	i(	KK	) 4 E}V_`	 !)--/IMMOT"IMMOimmoV+D/;O	!g 
"h y$iPR]_kkki 
"	!s   I9LLc                    dd l m} |j                         r#|j                         rt	        | d      rt
        }nt        j                  j                  }|j                  d   }|j                  d| j                  j                        }t        j                  j                  || j                  j                  | j                  j                         }t#        |j$                        5   ||| j                  j&                        \  }}}d d d        | j)                  |      }	|	j                  |d| j                  j                        }	|	|fS # 1 sw Y   HxY w)Nr   
_is_hookedr:   )torch.distributedr   r@   is_initializedr   r   r#   r   rB   rE   router
hidden_dimr   
functionallinearweightbiasr   r   top_kexperts)
rx   r{   distr   
batch_sizerouter_logitsr   r   r   
routed_outs
             r   mlp_forwardr   '  s   $t224|9T$$,,44$$Q'J!))"dkk.D.DEMMM((8J8JDKKL\L\]M	=''	(07t{{GXGX0Y-j+ 
) m\:{SJ##JDKK4J4JKJ}$$ 
)	(s   '"EEc                 R    dj                  |       t        fd|D              syy)N.c              3      K   | ]6  }t        j                  | d       xs t        j                  |        8 yw)z\.N)rematch).0keycurrent_key_name_strs     r   	<genexpr>z(should_convert_module.<locals>.<genexpr>=  s?      ksdgC523_rxx3%J^7__kss   <?TF)joinany)current_key_namepatternsr   s     @r   should_convert_moduler   ;  s0    88$45 ks  r&   c                    ddl m} |j                  d      }|j                  d      }|j                  d      }	|j                  d      }
|j                  d      }|j                  d      }d	D ]  }||v s	| ||||||	|
||      }| d
}| d}t        | |j	                  dd      d   |       t        | |      sQt        | |      s^t        t        | |      t        | |            }|dk(  r<t        j                  j                         rt        j                  j                          t        | |t        j                  j                  |j                  |                   t        | |       t        | |        y )Nr   shard_and_distribute_modulemodelempty_paramcasting_dtypeto_contiguousr   device_mesh)r   r   _blocks_scalesr   r   cpu)integrations.tensor_parallelr   r   setattrrsplitr   r\   r   r   r   r@   empty_cacher   rk   r   delattr)module
param_nameparam_valuetarget_devicedq_param_namekwargsr   r   r   r   r   r   r   projblocks_attrscales_attrdequantizeds                    r   
dequantizer  D  sZ   JJJwE**]+KJJ/MJJ/M::fD**]+K-:&9!!!	 "F'*K!F'*KFJ--c15a8+Fv{+0L89UW^_egrWst E)ejj.E.E.GJJ**,ehh&8&89V&WX,,- .r&   c                    |j                   j                  |j                   j                  |j                   j                  }}}ddlm}	 |j                  d      }
|j                  d      }|j                  d      }|j                  d      }|j                  d      }|j                  d      }d	|v r&|j                  d
      d   j                  d      d   }d|v r&|j                  d
      d   j                  d      d   }| |	|
|||||||       n?t        | |j                  d
d      d   t        j                  j                  |d              d}| d}t        | |      }t        | |      }|j                  j                  dk7  r|j                  j                  dk7  r|j!                  d      }|dk(  r!|j#                  || j$                  dz  d      }n |j#                  |d| j$                  dz        }t        |d|      dk(  rd}|j'                  |      j)                         }|j'                  |      j)                         }t+        |      5  t-        |j/                  dd      |j/                  dd      |      \  }}ddd       |dk(  r5t        j0                  || j2                  | j$                  dz  g      _        n1t        j0                  || j$                  | j2                  g      _        t        | ||       t        | | d | | |                          t7        | |       t7        | |       ~yyy# 1 sw Y   xY w)zq
    This transforms the weights obtained using `convert_gpt_oss.py` to load them into `Mxfp4GptOssExperts`.
    r   r   r   r   r   r   r   r   rN   r   r:   r   r   rO   r   Nr   Frb   metar   r   r   r   _precision_config)rhs_data)weight_scaleflex_ctx)r   PrecisionConfigFlexCtx
InFlexDatar   r   r   splitr   r   r   r   rk   r   r   r   sizerE   ri   r   rM   r   r5   rL   Sizerj   rB   r   )r   r   r   r   r#   r   r  r  r  r   r   r   r   r   r   r   r  r  r  rN   rO   local_expertstriton_weight_tensorr  s                           r   load_and_swizzle_mxfp4r  g  s+   
 	%%55%%--%%00 )WO
 KJJwE**]+KJJ/MJJ/M::fD**]+K:$R(..y9!<:$R(..y9!<#;ZW[]h	
 	
))#q1!4ehh6H6Hdi6H6jkF'"KF'"KV[)FV[)F}}V#(:(:f(DA>!^^M63K3Ka3OQSTF^^M2v7O7OST7TUF=&-8EA"M=)446=)446}%1>  R(&*:*:2r*BDV2. , & >!).]FDVDVX^XpXpstXt4u)v &).]FD\D\^d^p^p4q)r & 	23f%&Q[Q]@^_	
 	$$A )E# &%s   1MMc           	      n   |g }| j                         D ]  \  }}|j                  |       t        ||      s|j                  d       6|j                  j
                  dk(  r9|j                  s-t               5  t        |      | j                  |<   d}d d d        |j                  j
                  dk(  r$|j                  sddl
m}  |t        |      |_        t        t        |j!                                     dkD  rt#        ||||||      \  }	}|j                  d        | |fS # 1 sw Y   xY w)Nr:   GptOssExpertsT	GptOssMLPr   )
MethodType)has_been_replacedry   )named_childrenappendr   poprz   r   r  r   r^   _modulestypesr  r   r   lenlistchildren_replace_with_mxfp4_linear)
r   modules_to_not_convertr   quantization_configr  ry   namer   r  _s
             r   r#  r#    s%    ,,.f%$%57MN  $$$7@S@^@^#%'9&'At$$(! & $$3<O<Z<Z('V<FNtFOO%&'!+#=& #"3$ A  	R - /. #### &%s   :D++D4	c                    |j                   r| S ddlm}  |d      a|dgn|}|j                  |j                  |j                         t        t        |            }t        | ||||      \  } }|st        j                  d       | S )Nr   )
get_kernelz kernels-community/triton_kernelslm_head)ry   zYou are loading your model using mixed-precision FP4 quantization but no linear modules were found in your model. Please double check your model architecture, or submit an issue on github if you think this is a bug.)r  kernelsr)  r#   r$  extendr!  setr#  loggerwarning)r   r$  r   r%  ry   r)  r  s          r   replace_with_mxfp4_linearr0    s     %%& ((JK,B,Ji[Pf11=%%&9&P&PQ!#&<"=>9 E 	
 Lr&   )NNNFN)NNNN) utilsr   r   r   r   r   
accelerater   r   
contextlibr	   
get_loggerr   r.  rC   r   r%   r5   r    r*   r   r   r\   Moduler^   r   r   r   r  r  r#  r0   r&   r   <module>r7     s    I H - 	 % 
		H	%
( 
 
,0 &3, ;;	3,
 3, \\3,lD# D#RAlH%( -F@J  "$N  "r&   