
    Ph|                         d dl mZmZ ddlmZ ddlmZ erddlmZ ddl	m
Z
mZmZmZ ddlmZ  e       rd d	lZ ej"                  e      Z G d
 de      Zy	)    )TYPE_CHECKINGOptional   )HfQuantizer)get_module_from_name   )PreTrainedModel)is_fp_quant_availableis_qutlass_availableis_torch_availablelogging)QuantizationConfigMixinNc                        e Zd ZdZdZdZdZdgZdef fdZ	d Z
dd	Zd
ddddeddfdZ	 	 ddZddZdee   dedee   fdZedd
ed   fd       ZddZd
ddedefdZ xZS )FPQuantHfQuantizerz
    Quantizer for the FP-Quant method. Enables the loading of prequantized models and in-flight quantization of full-precision models.
    FTfp_quantquantization_configc                 4    t        |   |fi | || _        y N)super__init__r   )selfr   kwargs	__class__s      d/var/www/html/saasai/venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_fp_quant.pyr   zFPQuantHfQuantizer.__init__+   s    ,77#6     c                     t         j                  j                         st        d      t	               s!| j
                  j                  st        d      | j
                  j                  rt        j                  d       t               st        d      |!| j
                  j                  st        d      t        |t              rGd|j                         v sd|j                         v r"| j
                  j                  st        d      y y y )	NzPFPQuant quantization is only supported on GPU. Please use a different quantizer.a  Using `fp_quant` with real quantization requires a **Blackwell GPU** and qutlass: `git clone https://github.com/IST-DASLab/qutlass.git && cd qutlass && pip install --no-build-isolation .`. You can use `FPQuantConfig(pseudoquantization=True, ...)` to use Triton-based pseudo-quantization. It doesn't provide any speedups but emulates the quantization behavior of the real quantization.zUsing pseudo-quantization for FP-Quant. This doesn't provide any speedups but emulates the quantization behavior of the real quantization.zGUsing `fp_quant` quantization requires fp_quant: `pip install fp_quant`zyYou are attempting to load a FPQuant model without setting device_map. Please set device_map comprised of 'cuda' devices.cpudiskzYou are attempting to load a FPQuant model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.)torchcudais_availableNotImplementedErrorr   r   pseudoquantizationImportErrorloggerwarningr
   
ValueError
isinstancedictvalues)r   
device_mapr   s      r   validate_environmentz'FPQuantHfQuantizer.validate_environment/   s	   zz&&(%b  $%d.F.F.Y.Y S  ##66NN ] %&ghhd&>&>&Q&QF 
 z4(*++--:;L;L;N1N,,??d  @ 2O )r   returnc                     |'t         j                  d       t        j                  }|S |t        j                  k7  rt	        d| d      |S )NzJ`dtype` is None. Setting `dtype=torch.bfloat16` for qutlass compatibility.zInvalid `dtype` z=. fp_quant quantization only supports `dtype=torch.bfloat16`.)r%   infor   bfloat16r'   )r   dtypes     r   update_dtypezFPQuantHfQuantizer.update_dtypeQ   sM    =KKdeNNE  enn$/w6stuur   modelr	   param_valueztorch.Tensor
param_nametarget_deviceztorch.devicec                    t        ||      \  }}|j                  d      rDt        j                  j	                  |j                  |      d      |_        d |_        d |_        y |j                  d      rIt        j                  j	                  |j                  |            |_        d |_        d |_        d |_	        y t        j                  j	                  |j                  |            |_        |j                          y )Nz.qweightF)requires_gradz	.dqweight)r   endswithr   nn	Parametertoqweightweightdqweightscalespre_forward)r   r3   r4   r5   r6   r   module_s           r   create_quantized_paramz)FPQuantHfQuantizer.create_quantized_paramZ   s     )
;	 z*"XX//}-# 0 FN !FM"FO{+#hh001NOFO FM!FN FM **;>>-+HIr   c                     ddl m} ddlm}  || || j                               | j                  |j
                  _        y )Nr   )replace_with_fp_quant_linearr   )adapt_fp_quant_config)fp_quant_linear_config)r   rF   integrations.fp_quantrG   r   config)r   r3   r   rF   rG   s        r   $_process_model_before_weight_loadingz7FPQuantHfQuantizer._process_model_before_weight_loading   s7    
 	:A$#89Q9Q#R	
 ,0+C+C(r   c                     |S r    )r   r3   r   s      r   #_process_model_after_weight_loadingz6FPQuantHfQuantizer._process_model_after_weight_loading   s    r   missing_keysprefixc                    	 ddl m} |j                         D ch c]  \  }}t        ||      s| c}}	dt        dt
        f	fd}|D cg c]  } ||      r| c}S c c}}w c c}w )Nr   FPQuantLinearkeyr-   c                       j                  d      s j                  d      ry d  t         fdD              S )Nz.weightz.biasF.c              3   2   K   | ]  }|v xs |v   y wr   rM   ).0namefull_keyrT   s     r   	<genexpr>zQFPQuantHfQuantizer.update_missing_keys.<locals>.should_exclude.<locals>.<genexpr>   s#     R>4ts{6dh&66>s   )r9   any)rT   rZ   fp_quant_namesrP   s   `@r   should_excludez>FPQuantHfQuantizer.update_missing_keys.<locals>.should_exclude   s>    ||I&#,,w*? 3%(HR>RRRr   )r   rS   named_modulesr(   strbool)
r   r3   rO   rP   rS   rY   rB   r^   rT   r]   s
      `     @r   update_missing_keysz&FPQuantHfQuantizer.update_missing_keys   st    *383F3F3Hn3H<4JW]_lLm$3Hn	S 	S 	S  ,G|>#3F|GG o Hs   A%A%A+A+c                 `    | j                   j                  }|st        j                  d       |S )NzYou are attempting to train a model with FPQuant quantization. This is only supported when `store_master_weights=True`. Please set `store_master_weights=True` to train the model.)r   store_master_weightsr%   r&   )r   r3   	trainables      r   is_trainablezFPQuantHfQuantizer.is_trainable   s0    ,,AA	NN E r   c                      y)NTrM   )r   safe_serializations     r   is_serializablez"FPQuantHfQuantizer.is_serializable   s    r   c                 P    ddl m} t        ||      \  }}t        ||      r|dv ryy)Nr   rR   )r>   r=   r?   TF)r   rS   r   r(   )r   r3   r5   r   rS   rB   tensor_names          r   param_needs_quantizationz+FPQuantHfQuantizer.param_needs_quantization   s.    *25*Efm,@a1ar   )r1   torch.dtyper-   rm   )r3   r	   r   )__name__
__module____qualname____doc__requires_calibration requires_parameters_quantizationis_qat_trainablerequired_packagesr   r   r,   r2   r`   rD   rK   rN   listrb   propertyr   rf   ri   ra   rl   __classcell__)r   s   @r   r   r   !   s     !'+$#7,C 7 D$ $ $$ 	$
 &$LD DHtCy H# HRVWZR[ H (+<"=  .? S _c r   r   )typingr   r   baser   quantizers_utilsr   modeling_utilsr	   utilsr
   r   r   r   utils.quantization_configr   r   
get_loggerrn   r%   r   rM   r   r   <module>r      sL    +  2 0 \ \ ? 			H	%R Rr   