
    <h                         S SK JrJr  SSKJr  \(       a  SSKJr  SSKJr  SSK	J
r
JrJrJr  SSKJr  \" 5       (       a  S S	Kr\R"                  " \5      r " S
 S\5      rg	)    )TYPE_CHECKINGOptional   )HfQuantizer   )PreTrainedModel)replace_with_spqr_linear)is_accelerate_availableis_spqr_availableis_torch_availablelogging)QuantizationConfigMixinNc                      ^  \ rS rSrSrSrS\4U 4S jjrS rSS jr	 SSS	S
\
\\      4S jjrSS jr\S 5       rSS jrSrU =r$ )SpQRHfQuantizer!   zK
Quantizer of the SpQR method. Enables the loading of prequantized models.
Tquantization_configc                 4   > [         TU ]  " U40 UD6  Xl        g N)super__init__r   )selfr   kwargs	__class__s      ^/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/quantizers/quantizer_spqr.pyr   SpQRHfQuantizer.__init__(   s    ,77#6     c                     [         R                  R                  5       (       d  [        S5      e[	        5       (       d  [        S5      e[        5       (       d  [        S5      eg )Nz,GPU is required to run SpQR quantized model.zGUsing `spqr` quantization requires Accelerate: `pip install accelerate`zFUsing `spqr` quantization requires SpQR: `pip install spqr_quant[gpu]`)torchcudais_availableRuntimeErrorr
   ImportErrorr   )r   argsr   s      r   validate_environment$SpQRHfQuantizer.validate_environment,   sP    zz&&((MNN&((ghh ""fgg #r   c                     Uc'  [         R                  n[        R                  S5        U$ U[         R                  :w  a  [	        S5      eU$ )NzHAssuming SpQR inference on GPU and loading the model in `torch.float16`.z|You cannot use any type other than torch.float16 for SpQR. Please either leave it None or set it totorch.float16 explicitly.)r   float16loggerinfo
ValueError)r   torch_dtypes     r   update_torch_dtype"SpQRHfQuantizer.update_torch_dtype6   sN    --KKKbc  EMM),  r   modelr   keep_in_fp32_modulesc                     U R                  XR                  R                  U5      U l        [        UU R                  U R                  S9  U R                  UR                  l        g )N)r   modules_to_not_convert)get_modules_to_not_convertr   r1   r	   config)r   r.   r/   r   s       r   $_process_model_before_weight_loading4SpQRHfQuantizer._process_model_before_weight_loadingA   s\     '+&E&E++BBDX'
# 	! $ 8 8#'#>#>	

 ,0+C+C(r   c                     U$ r    )r   r.   r   s      r   #_process_model_after_weight_loading3SpQRHfQuantizer._process_model_after_weight_loadingR   s    r   c                     g)NFr7   )r   s    r   is_trainableSpQRHfQuantizer.is_trainableU   s    r   c                     g)NTr7   )r   safe_serializations     r   is_serializableSpQRHfQuantizer.is_serializableY   s    r   )r1   r   )r+   torch.dtypereturnrA   r   )r.   r   )__name__
__module____qualname____firstlineno____doc__requires_calibrationr   r   r$   r,   r   liststrr4   r8   propertyr;   r?   __static_attributes____classcell__)r   s   @r   r   r   !   ss      7,C 7h	 59D D 'tCy1D"   r   r   )typingr   r   baser   modeling_utilsr   integrationsr	   utilsr
   r   r   r   utils.quantization_configr   r   
get_loggerrC   r(   r   r7   r   r   <module>rU      sK    +  0 3 [ [ ? 			H	%9k 9r   