
    Phz                         d dl mZmZ ddlmZ erddlmZ ddlmZm	Z	m
Z
mZ ddlmZ  e
       rd dlZ ej                  e      Z G d	 d
e      Zy)    )TYPE_CHECKINGOptional   )HfQuantizer   )PreTrainedModel)is_accelerate_availableis_eetq_availableis_torch_availablelogging)get_module_from_nameNc                        e Zd ZdZdZdZddgZ fdZd Zdd	Z	d
dde
defdZd
dddde
ddfdZddZ	 dd
ddeee
      fdZddZedefd       Z xZS )EetqHfQuantizera  
    8-bit quantization from EETQ quantization method:
        before loading: converts transformer layers into W8A16Linear during loading: load 16bit weight and pass to the
        layer object after: quantizes individual weights in Linear8bitLt into 8bit at first .cuda() call
    TFeetq
acceleratec                 4    t        |   |fi | || _        y N)super__init__quantization_config)selfr   kwargs	__class__s      `/var/www/html/saasai/venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_eetq.pyr   zEetqHfQuantizer.__init__-   s    ,77#6     c                 @   t               st        d      	 dd l}t	               st        d      |j                  dd      s|j                  dd      rt        d	      t        j                  j                         st        d
      |j                  d      }|t        j                  d       y |At        |t              r0d|j                         v sd|j                         v rt        d      y y y # t        $ r}dt        |      v rt        d      | d }~ww xY w)NzUsing `eetq` 8-bit quantization requires eetq.Please install the latest version of eetq from : https://github.com/NetEase-FuXi/EETQr   shard_checkpointzYou are using a version of EETQ that is incompatible with the current transformers version. Either downgrade transformers to <= v4.46.3 or, if available, upgrade EETQ to > v1.0.0.zNLoading an EETQ quantized model requires accelerate (`pip install accelerate`)from_tfF	from_flaxzConverting into 8-bit weights from tf/flax weights is currently not supported, please make sure the weights are in PyTorch format.z/No GPU found. A GPU is needed for quantization.
device_mapzYou have loaded an EETQ model on CPU and have a CUDA device available, make sure to set your model on a GPU device in order to run your model.cpudiskzYou are attempting to load an EETQ model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.)r
   ImportErrorr   strr	   get
ValueErrortorchcudais_availableRuntimeErrorloggerwarning_once
isinstancedictvalues)r   argsr   r   excr    s         r   validate_environmentz$EetqHfQuantizer.validate_environment1   s=    "h 
	 '(noo::i'6::k5+I; 
 zz&&(PQQZZ-
I #*d+*:K:K:M1MQW[e[l[l[nQn h  Ro+ $=  
	!SX- "n 
 
	s   C5 5	D>DDreturnc                     |(t         j                  }t        j                  d|       |S |t         j                  k7  rt        j                  d       |S )NzOverriding dtype=%s with `dtype=torch.float16` due to requirements of `eetq` to enable model loading in 8-bit. Pass your own dtype to specify the dtype of the remaining non-linear layers or pass dtype=torch.float16 to remove this warning.zLWe suggest you to set `dtype=torch.float16` for better efficiency with EETQ.)r'   float16r+   info)r   dtypes     r   update_dtypezEetqHfQuantizer.update_dtype_   sM    =MMEKK?   emm#KKfgr   modelr   
param_namec                 l    ddl m} t        ||      \  }}t        ||      r| j                  s|dk(  ryyy)Nr   )
EetqLinearbiasFT)r   r<   r   r-   pre_quantized)r   r9   r:   r   r<   moduletensor_names          r   param_needs_quantizationz(EetqHfQuantizer.param_needs_quantizationm   s9    #25*Efj)!![F%:r   param_valueztorch.Tensortarget_deviceztorch.devicec                 z   ddl m}m} t        ||      \  }}	 ||      \  }
}t	        ||      rN| j
                  s|	dk(  r-|	dk(  r8|j                  t        j                  k7  rt        d      |	dk(  rt        d      |
j                  |      |j                  |	<   |j                  d|j                  |             y )	Nr   )r<   quantize_and_preprocess_weightsr=   weightz6Expect quantized weights but got an unquantized weightweight_scalez;Expect unquantized weights but got a quantized weight_scaleweight_scales)r   r<   rE   r   r-   r>   r7   r'   int8r&   to_buffersregister)r   r9   rB   r:   rC   r   r<   rE   r?   r@   	new_valuerG   s               r   create_quantized_paramz&EetqHfQuantizer.create_quantized_paramy   s     	E25*E"A+"N	< fj)!![F%:(*{/@/@EJJ/N$%]^^.0$%bcc'0||M'B$)GHr   c                     |S r    )r   r9   r   s      r   #_process_model_after_weight_loadingz3EetqHfQuantizer._process_model_after_weight_loading   s    r   keep_in_fp32_modulesc                     ddl m} | j                  || j                  j                  |      | _         ||| j                  | j                  | j
                        }| j                  |j                  _        y )Nr   )replace_with_eetq_linear)modules_to_not_convertr   r>   )integrationsrT   get_modules_to_not_convertr   rU   r>   config)r   r9   rR   r   rT   s        r   $_process_model_before_weight_loadingz4EetqHfQuantizer._process_model_before_weight_loading   sl     	<&*&E&E4++BBDX'
# )#'#>#> $ 8 8,,	
 ,0+C+C(r   c                      yNTrP   )r   safe_serializations     r   is_serializablezEetqHfQuantizer.is_serializable   s    r   c                      yr[   rP   )r   s    r   is_trainablezEetqHfQuantizer.is_trainable   s    r   )r7   torch.dtyper3   r`   )r9   r   r   )__name__
__module____qualname____doc__ requires_parameters_quantizationrequires_calibrationrequired_packagesr   r2   r8   r$   boolrA   rN   rQ   r   listrY   r]   propertyr_   __classcell__)r   s   @r   r   r   !   s     (,$ .7,\
.? 
S 
_c 
I I $I 	I
 &I2 59D D 'tCy1D* d  r   r   )typingr   r   baser   modeling_utilsr   utilsr	   r
   r   r   quantizers_utilsr   r'   
get_loggerra   r+   r   rP   r   r   <module>rr      sK    +  0 [ [ 2  
		H	%Nk Nr   