
    <h%!                         S SK JrJrJr  SSKJr  SSKJr  SSKJ	r	  \(       a  SSK
Jr  SSKJrJrJrJrJr  SS	KJr  \" 5       (       a  S S
Kr\R*                  " \5      r " S S\5      rg
)    )TYPE_CHECKINGAnyOptional   )tqdm   )HfQuantizer)get_module_from_name)PreTrainedModel)is_accelerate_availableis_flute_availableis_hadamard_availableis_torch_availablelogging)QuantizationConfigMixinNc                     ^  \ rS rSrSrSrSrSS/rS\4U 4S jjr	S	 r
S S jr S!SSSSS\SSS\\\4   S\\\      4S jjr  S"S jrS"S jrS\\   S\S
\\   4S jr\S
\4S j5       rS!S jrSSSSS\S\\\4   S
\4
S jrS rSrU =r$ )#HiggsHfQuantizer"   z
Quantizer of the HIGGS method. Enables the loading of prequantized models and in-flight quantization of full-precision models.
FTzflute-kernelfast_hadamard_transformquantization_configc                 4   > [         TU ]  " U40 UD6  Xl        g N)super__init__r   )selfr   kwargs	__class__s      _/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/quantizers/quantizer_higgs.pyr   HiggsHfQuantizer.__init__+   s    ,77#6     c                    [         R                  R                  5       (       d  [        S5      e[	        5       (       d  [        S5      e[        5       (       d  [        S5      e[        5       (       d  [        S5      eUc  [        S5      e[        U[        5      (       a4  SUR                  5       ;   d  SUR                  5       ;   a  [        S5      eg g )	NzNHIGGS quantization is only supported on GPU. Please use a different quantizer.zHUsing `higgs` quantization requires Accelerate: `pip install accelerate`zLUsing `higgs` quantization requires FLUTE: `pip install flute-kernel>=0.3.0`zbUsing `higgs` quantization requires fast_hadamard_transform: `pip install fast_hadamard_transform`zwYou are attempting to load a HIGGS model without setting device_map. Please set device_map comprised of 'cuda' devices.cpudiskzYou are attempting to load a HIGGS model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.)torchcudais_availableNotImplementedErrorr   ImportErrorr   r   
ValueError
isinstancedictvalues)r   
device_mapr   s      r   validate_environment%HiggsHfQuantizer.validate_environment/   s    zz&&((%&vww&((hii!##lmm$&&t  F  
D))u
8I8I8K/KvYcYjYjYlOld  Pm)r    returnc                     Uc'  [         R                  S5        [        R                  nU$ U[        R                  :w  a#  U[        R                  :w  a  [        SU S35      eU$ )NzS`torch_dtype` is None. Setting `torch_dtype=torch.float16` for FLUTE compatibility.zInvalid `torch_dtype` z_. HIGGS quantization only supports `torch_dtype=torch.float16` or `torch_dtype=torch.bfloat16`.)loggerinfor$   float16bfloat16r)   )r   torch_dtypes     r   update_torch_dtype#HiggsHfQuantizer.update_torch_dtypeI   sg    KKmn--K  EMM)kU^^.K(  6U  V  r    modelr   param_valueztorch.Tensor
param_nametarget_deviceztorch.device
state_dictunexpected_keysc                 ,   SSK Jn   U" UR                  U5      U R                  R                  U R                  R
                  U R                  R                  U R                  R                  5      nA[        X5      u  pSR                  UR                  S5      S S 5      nUR                  5        H  u  pXR                  ;   a-  [        R                  R                  USS9U	R                  U'   MA  XR                   ;   a.  [        R                  R#                  U5      U	R                   U'   M~  US:X  a/  Xl        UR'                  5       U R                  R$                  U'   M  [)        SU S	U	 35      e   Ub  X6;   a  UR+                  U5        g g g )
Nr   )quantize_with_higgs.F)requires_gradtune_metadatazUnexpected key z in module )integrationsr@   tor   bitsp
group_sizehadamard_sizer
   joinsplititems_parametersr$   nn	Parameter_buffersBufferrD   to_dictr)   remove)r   r9   r:   r;   r<   r=   r>   r@   
flute_dictmodule_module_namekeyvalues                 r   create_quantized_param'HiggsHfQuantizer.create_quantized_paramT   sb    	7	 )NN=)$$))$$&&$$//$$22

 (;	hhz//4Sb9:$**,JC(((*/((*<*<URW*<*X""3''',xxu'=$'',$FKmmo((66{C ?3%{6(!KLL - &:+H"":. ,I&r    c                 h    SSK Jn  U" UU R                  S9  U R                  UR                  l        g )Nr   )replace_with_higgs_linearr   )rE   r^   r   config)r   r9   r   r^   s       r   $_process_model_before_weight_loading5HiggsHfQuantizer._process_model_before_weight_loading{   s/    
 	=! $ 8 8	
 ,0+C+C(r    c                    SSK JnJn  SSKJn  SSKJn  0 nUR                  5        VV	s0 sH  u  p[        X5      (       d  M  X_M     n
nn	[        U
R                  5       SSS9 GH  u  pU	R                  R                  U;  a0  U" U	R                  R                  S	9XyR                  R                  '   XyR                  R                     U	l        UR                  U R                  R                   U   5      U	l        U" U	R                  R"                  U	R$                  R"                  U	R                   S
9u  U	R                  l        U	l        U	R                   R'                  5       U R                  R                   U'   GM     g s  sn	nf )Nr   )TuneMetaDatamaybe_tune_and_repack)make_workspace_streamkr   HiggsLinearzRepacking HIGGS modulesF)descleave)device)weightscalesmetadata)
flute.tunerd   re   flute.utilsrf   rE   rh   named_modulesr*   r   rM   rl   rk   	workspace	from_dictr   rD   datarm   rS   )r   r9   r   rd   re   rf   rh   flute_workspacesnamerV   flute_moduless              r   #_process_model_after_weight_loading4HiggsHfQuantizer._process_model_after_weight_loading   s@   B6.:?:M:M:Os:O,$S]^dSr:Os !4!4!6=V^cdLD }}##+;;9OW]WdWdWkWk9l !5!56/0D0DEF $0#9#9$:R:R:`:`ae:f#gF 7L}}))}}))--84FMM 4
 <B;O;O;W;W;YD$$2248 e ts   FFmissing_keysprefixc                    ^^	 SSK Jn  UR                  5        VVs1 sH  u  pV[        Xd5      (       d  M  UiM     snnm	S[        S[
        4U	U4S jjnU Vs/ sH  o" U5      (       a  M  UPM     sn$ s  snnf s  snf )Nr   rg   rY   r0   c                    >^ ^ T R                  S5      (       d  T R                  S5      (       a  gT ST  3m[        UU 4S jT 5       5      $ )Nz.weightz.biasFrA   c              3   B   >#    U H  oT;   =(       d    UT;   v   M     g 7fr    ).0rv   full_keyrY   s     r   	<genexpr>NHiggsHfQuantizer.update_missing_keys.<locals>.should_update.<locals>.<genexpr>   s      O;4s{6dh&66;s   )endswithany)rY   r   higgs_namesr{   s   `@r   should_update;HiggsHfQuantizer.update_missing_keys.<locals>.should_update   sD    ||I&&#,,w*?*? 3%(HO;OOOr    )rE   rh   rq   r*   strbool)
r   r9   rz   r{   rh   rv   rV   r   rY   r   s
      `     @r   update_missing_keys$HiggsHfQuantizer.update_missing_keys   su    .050C0C0Ei0ETZIht0Ei	Ps 	Pt 	P 	P  ,F|=3E|FF j Gs   A4A4A:+A:c                     g)NFr   )r   s    r   is_trainableHiggsHfQuantizer.is_trainable   s    r    c                     g)NTr   )r   safe_serializations     r   is_serializable HiggsHfQuantizer.is_serializable   s    r    c                     SSK Jn  [        X5      u  px[        Xv5      (       a%  US:X  a  UR                  [
        R                  :w  a  gg)Nr   rg   rl   TF)rE   rh   r
   r*   dtyper$   int16)	r   r9   r:   r;   r=   r   rh   rV   tensor_names	            r   check_quantized_param&HiggsHfQuantizer.check_quantized_param   s@     	/25Ef**{h/F;K\K\`e`k`kKkr    c                 "    SSK Jn  U" U5      nU$ )Nr   )dequantize_higgs)rE   r   )r   r9   r   s      r   _dequantizeHiggsHfQuantizer._dequantize   s    3 'r    r_   )r6   torch.dtyper0   r   r   )r9   r   )__name__
__module____qualname____firstlineno____doc__requires_calibration requires_parameters_quantizationrequired_packagesr   r   r.   r7   r   r+   r   r   listr[   ra   rx   r   propertyr   r   r   r   r   __static_attributes____classcell__)r   s   @r   r   r   "   s4    !'+$')BC7,C 74	$ 04%/ %/ $%/ 	%/
 &%/ cN%/ "$s),%/ND DZ2GtCy G# GRVWZR[ G d    $ 	
 cN 
" r    r   )typingr   r   r   utils.loggingr   baser	   quantizers_utilsr
   modeling_utilsr   utilsr   r   r   r   r   utils.quantization_configr   r$   
get_loggerr   r2   r   r   r    r   <module>r      sS    0 /    2 0 s s ? 			H	%h{ hr    