
    <h)                         S SK JrJrJr  SSKJr  SSKJr  \(       a  SSKJ	r	  SSK
JrJrJrJr  SSKJr  \" 5       (       a  S S	Kr\R$                  " \5      r " S
 S\5      rg	)    )TYPE_CHECKINGAnyOptional   )HfQuantizer)get_module_from_name   )PreTrainedModel)is_fp_quant_availableis_qutlass_availableis_torch_availablelogging)QuantizationConfigMixinNc                   "  ^  \ rS rSrSrSrSrSrS/rS\	4U 4S jjr
S rSS
 jr SSSSSS\SSS\\\4   S\\\      4S jjr  S S jrS S jrS\\   S\S	\\   4S jr\SS\S   4S jj5       rSS jrSSSSS\S\\\4   S	\4
S jrSrU =r$ )!FPQuantHfQuantizer!   z
Quantizer for the FP-Quant method. Enables the loading of prequantized models and in-flight quantization of full-precision models.
FTfp_quantquantization_configc                 4   > [         TU ]  " U40 UD6  Xl        g N)super__init__r   )selfr   kwargs	__class__s      b/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/quantizers/quantizer_fp_quant.pyr   FPQuantHfQuantizer.__init__+   s    ,77#6     c                    [         R                  R                  5       (       d  [        S5      e[	        5       (       d&  U R
                  R                  (       d  [        S5      eU R
                  R                  (       a  [        R                  S5        [        5       (       d  [        S5      eUc  [        S5      e[        U[        5      (       a4  SUR                  5       ;   d  SUR                  5       ;   a  [        S5      eg g )	NzPFPQuant quantization is only supported on GPU. Please use a different quantizer.a  Using `fp_quant` with real quantization requires a **Blackwell GPU** and qutlass: `git clone https://github.com/IST-DASLab/qutlass.git && cd qutlass && pip install --no-build-isolation .`. You can use `FPQuantConfig(pseudoquantization=True, ...)` to use Triton-based pseudo-quantization. It doesn't provide any speedups but emulates the quantization behavior of the real quantization.zUsing pseudo-quantization for FP-Quant. This doesn't provide any speedups but emulates the quantization behavior of the real quantization.zGUsing `fp_quant` quantization requires fp_quant: `pip install fp_quant`zyYou are attempting to load a FPQuant model without setting device_map. Please set device_map comprised of 'cuda' devices.cpudiskzYou are attempting to load a FPQuant model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.)torchcudais_availableNotImplementedErrorr   r   pseudoquantizationImportErrorloggerwarningr   
ValueError
isinstancedictvalues)r   
device_mapr   s      r   validate_environment'FPQuantHfQuantizer.validate_environment/   s    zz&&((%b  $%%d.F.F.Y.Y S  ##66NN ] %&&ghhF  
D))u
8I8I8K/KvYcYjYjYlOld  Pm)r   returnc                     Uc'  [         R                  S5        [        R                  nU$ U[        R                  :w  a  [	        SU S35      eU$ )NzV`torch_dtype` is None. Setting `torch_dtype=torch.bfloat16` for qutlass compatibility.zInvalid `torch_dtype` zC. fp_quant quantization only supports `torch_dtype=torch.bfloat16`.)r(   infor"   bfloat16r*   )r   torch_dtypes     r   update_torch_dtype%FPQuantHfQuantizer.update_torch_dtypeM   sV    KKpq..K  ENN*(5xy  r   modelr
   param_valueztorch.Tensor
param_nametarget_deviceztorch.device
state_dictunexpected_keysc                 F   [        X5      u  pxUR                  S5      (       aA  [        R                  R	                  UR                  U5      SS9Ul        S Ul        S Ul        g UR                  S5      (       aI  [        R                  R	                  UR                  U5      5      Ul        S Ul        S Ul        S Ul	        g [        R                  R	                  UR                  U5      5      Ul        UR                  5         Ub  X6;   a  UR                  U5        g g g )Nz.qweightF)requires_gradz	.dqweight)r   endswithr"   nn	Parametertoqweightweightdqweightscalespre_forwardremove)	r   r8   r9   r:   r;   r<   r=   module_s	            r   create_quantized_param)FPQuantHfQuantizer.create_quantized_paramX   s     );	 z**"XX//}-# 0 FN !FM"FO{++#hh001NOFO FM!FN FM **;>>-+HI&:+H"":. ,I&r   c                     SSK Jn  SSKJn  U" UU" U R                  5      S9  U R                  UR
                  l        g )Nr   )replace_with_fp_quant_linearr	   )adapt_fp_quant_config)fp_quant_linear_config)r   rO   integrations.fp_quantrP   r   config)r   r8   r   rO   rP   s        r   $_process_model_before_weight_loading7FPQuantHfQuantizer._process_model_before_weight_loading   s7    
 	:A$#89Q9Q#R	
 ,0+C+C(r   c                     U$ r    )r   r8   r   s      r   #_process_model_after_weight_loading6FPQuantHfQuantizer._process_model_after_weight_loading   s    r   missing_keysprefixc                    ^^	 SSK Jn  UR                  5        VVs1 sH  u  pV[        Xd5      (       d  M  UiM     snnm	S[        S[
        4U	U4S jjnU Vs/ sH  o" U5      (       a  M  UPM     sn$ s  snnf s  snf )Nr   FPQuantLinearkeyr1   c                    >^ ^ T R                  S5      (       d  T R                  S5      (       a  gT ST  3m[        UU 4S jT 5       5      $ )Nz.weightz.biasF.c              3   B   >#    U H  oT;   =(       d    UT;   v   M     g 7fr   rW   ).0namefull_keyr_   s     r   	<genexpr>QFPQuantHfQuantizer.update_missing_keys.<locals>.should_exclude.<locals>.<genexpr>   s      R>4s{6dh&66>s   )r@   any)r_   re   fp_quant_namesr[   s   `@r   should_exclude>FPQuantHfQuantizer.update_missing_keys.<locals>.should_exclude   sD    ||I&&#,,w*?*? 3%(HR>RRRr   )r   r^   named_modulesr+   strbool)
r   r8   rZ   r[   r^   rd   rJ   rj   r_   ri   s
      `     @r   update_missing_keys&FPQuantHfQuantizer.update_missing_keys   su    *383F3F3Hn3H<4JW]Lm$3Hn	S 	S 	S 	S  ,G|>#3F|GG o Hs   A4A4A:+A:c                     g)NFrW   )r   r8   s     r   is_trainableFPQuantHfQuantizer.is_trainable   s    r   c                     g)NTrW   )r   safe_serializations     r   is_serializable"FPQuantHfQuantizer.is_serializable   s    r   c                 X    SSK Jn  [        X5      u  px[        Xv5      (       a  US;   a  gg)Nr   r]   )rE   rD   rF   TF)r   r^   r   r+   )	r   r8   r9   r:   r<   r   r^   rJ   tensor_names	            r   check_quantized_param(FPQuantHfQuantizer.check_quantized_param   s-     	+25Ef,,@a1ar   )r   )r5   torch.dtyper1   r|   r   )r8   r
   )__name__
__module____qualname____firstlineno____doc__requires_calibration requires_parameters_quantizationis_qat_trainablerequired_packagesr   r   r/   r6   rm   r,   r   r   listrL   rT   rX   ro   propertyrr   rv   rn   rz   __static_attributes____classcell__)r   s   @r   r   r   !   s6    !'+$#7,C 7<	$ 04(/ (/ $(/ 	(/
 &(/ cN(/ "$s),(/TD DHtCy H# HRVWZR[ H (+<"=    $ 	
 cN 
 r   r   )typingr   r   r   baser   quantizers_utilsr   modeling_utilsr
   utilsr   r   r   r   utils.quantization_configr   r"   
get_loggerr}   r(   r   rW   r   r   <module>r      sP    0 /  2 0 \ \ ? 			H	%V Vr   