
    <h                         S SK r S SKJr  S SKJr  SSKJr  \(       a  SSKJr  SSK	J
r
JrJrJrJr  SS	KJrJr  \" 5       (       a  S SKr\R&                  " \5      r " S
 S\5      rg)    N)TYPE_CHECKING)version   )HfQuantizer   )PreTrainedModel)is_auto_gptq_availableis_gptqmodel_availableis_optimum_availableis_torch_availablelogging)
GPTQConfigQuantizationConfigMixinc                      ^  \ rS rSrSrSr/ SQrSrS\4U 4S jjr	S r
SS
 jrS rSS jrSS jr\S	\4S j5       rSS jrSrU =r$ )GptqHfQuantizer#   z
Quantizer of the GPTQ method - for GPTQ the quantizer support calibration of the model through
`auto_gptq` or `gptqmodel` package. Quantization is done under the hood for users if they load a non-prequantized model.
F)optimum	auto_gptq	gptqmodelNquantization_configc                    > [         TU ]  " U40 UD6  [        5       (       d  [        S5      eSSKJn  UR                  U R                  R                  5       5      U l	        g )NGLoading a GPTQ quantized model requires optimum (`pip install optimum`)r   )GPTQQuantizer)
super__init__r   ImportErroroptimum.gptqr   	from_dictr   to_dict_optimumoptimum_quantizer)selfr   kwargsr   	__class__s       ^/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/quantizers/quantizer_gptq.pyr   GptqHfQuantizer.__init__-   sP    ,77#%%ghh.!.!8!89Q9Q9a9a9c!d    c                 ^   [        5       (       d  [        S5      e[        5       (       a$  [        5       (       a  [        R                  S5        [        5       =(       aJ    [        R                  " [        R                  R                  S5      5      [        R                  " S5      :  =(       d
    [        5       nU(       d.  [        R                  R                  5       (       d  [        S5      e[        5       (       d  [        5       (       d  [        S5      e[        5       (       aV  [        R                  " [        R                  R                  S5      5      [        R                  " S5      :  a  [        S5      e[        5       (       a  [        R                  " [        R                  R                  S	5      5      [        R                  " S
5      :  dK  [        R                  " [        R                  R                  S5      5      [        R                  " S5      :  a  [        S5      eg g )Nr   z4Detected gptqmodel and auto-gptq, will use gptqmodelz	auto-gptqz0.4.2z2GPU is required to quantize or run quantize model.z|Loading a GPTQ quantized model requires gptqmodel (`pip install gptqmodel`) or auto-gptq (`pip install auto-gptq`) library. r   zYou need a version of auto_gptq >= 0.4.2 to use GPTQ: `pip install --upgrade auto-gptq` or use gptqmodel by `pip install gptqmodel>=1.4.3`.r   z1.4.3r   1.23.99zJThe gptqmodel version should be >= 1.4.3, optimum version should >= 1.24.0)r   r   r	   r
   loggerwarningr   parse	importlibmetadatatorchcudais_availableRuntimeError)r!   argsr"   gptq_supports_cpus       r$   validate_environment$GptqHfQuantizer.validate_environment6   s   #%%ghh!##(>(@(@NNQR #$ `i0088EFW^I__& $% 	 !)@)@)B)BSTT(**.D.F.F O  $%%'--	8J8J8R8RS^8_*`cjcpcpd
 +
  ^  $%%MM),,44[ABW]]SZE[[}}Y//77	BCgmmT]F^^jkk _ &r&   returnc                     Uc'  [         R                  n[        R                  S5        U$ U[         R                  :w  a  [        R                  S5        U$ )NzRLoading the model in `torch.float16`. To overwrite it, set `torch_dtype` manually.zRWe suggest you to set `torch_dtype=torch.float16` for better efficiency with GPTQ.)r.   float16r)   info)r!   torch_dtypes     r$   update_torch_dtype"GptqHfQuantizer.update_torch_dtypeR   sG    --KKKlm  EMM)KKlmr&   c                     Uc  S[         R                  " S5      0n[        5       (       d%  USS[         R                  " S5      04;   a  USS0:H    U$ )N cpur   )r.   devicer
   )r!   
device_maps     r$   update_device_map!GptqHfQuantizer.update_device_mapZ   sQ    ell512J%''J52u||TYGZB[:\,\2q'!r&   c                 z   UR                   R                  S:w  a  [        S5      eU R                  (       a  [        R
                  " [        R                  R	                  S5      5      [        R
                  " S5      ::  a  U R                  R                  U5      ng U R                  R                  " U40 UD6ng g )N	input_idsz%We can only quantize pure text model.r   r(   )
r#   main_input_namer1   pre_quantizedr   r+   r,   r-   r    convert_modelr!   modelr"   s      r$   $_process_model_before_weight_loading4GptqHfQuantizer._process_model_before_weight_loadingb   s    ??**k9FGG}}Y//77	BCw}}U^G__..<<UC..<<UMfM r&   c                    U R                   (       a  U R                  R                  U5      ng U R                  R                  c  UR
                  U R                  l        U R                  R                  XR                  R                  5        [        R                  " U R                  R                  5       5      UR                  l        g N)rG   r    post_init_modelr   	tokenizername_or_pathquantize_modelr   r   to_dictconfigrI   s      r$   #_process_model_after_weight_loading3GptqHfQuantizer._process_model_after_weight_loadingm   s    **::5AE''1195:5G5G((2""11%9Q9Q9[9[\/9/C/CDDZDZDbDbDd/eELL,r&   c                     gNT )r!   s    r$   is_trainableGptqHfQuantizer.is_trainablew   s    r&   c                     grX   rY   )r!   safe_serializations     r$   is_serializableGptqHfQuantizer.is_serializable{   s    r&   )r    )r:   torch.dtyper6   r`   )rJ   r   rN   )__name__
__module____qualname____firstlineno____doc__requires_calibrationrequired_packagesr    r   r   r4   r;   rB   rK   rU   propertyboolrZ   r^   __static_attributes____classcell__)r#   s   @r$   r   r   #   sl    
 !=e,C el8	Nf d   r&   r   )r,   typingr   	packagingr   baser   modeling_utilsr   utilsr	   r
   r   r   r   utils.quantization_configr   r   r.   
get_loggerra   r)   r   rY   r&   r$   <module>rs      sP         0 u u K 			H	%Yk Yr&   