o
    
sh                     @   s   d dl Z d dlmZmZmZmZ d dlmZ ddlm	Z	 ddl
mZ er*ddlmZ dd	lmZmZmZmZ dd
lmZ e rCd dlZeeZG dd de	ZdS )    N)TYPE_CHECKINGAnyOptionalUnion)version   )HfQuantizer)get_module_from_name   )PreTrainedModel)is_accelerate_availableis_optimum_quanto_availableis_torch_availablelogging)QuantoConfigc                
       s4  e Zd ZdZddgZdZdZdef fddZd	d
 Z	dd Z
dd Zd4ddZdee dedee fddZdddddedeeef def
ddZd eeeeef f deeeeef f fd!d"Zddddded#d$fd%d&Zd5d(d)Z	*d6ddd+eee  fd,d-Zd.d/ Zedefd0d1Zd6d2d3Z  ZS )7QuantoHfQuantizerz*
    Quantizer for the quanto library
    quanto
accelerateTFquantization_configc                    s    t  j|fi | |   d S N)super__init__	post_init)selfr   kwargs	__class__ f/var/www/html/alpaca_bot/venv/lib/python3.10/site-packages/transformers/quantizers/quantizer_quanto.pyr   2   s   zQuantoHfQuantizer.__init__c                 C   s"   | j jdur| jstddS dS )z 
        Safety checker
        NzWe don't support quantizing the activations with transformers library.Use quanto library for more complex use cases such as activations quantization, calibration and quantization aware training.)r   activationspre_quantized
ValueErrorr   r   r   r   r   6   s
   zQuantoHfQuantizer.post_initc                 O   s    t  stdt stdd S )NzhLoading an optimum-quanto quantized model requires optimum-quanto library (`pip install optimum-quanto`)z`Loading an optimum-quanto quantized model requires accelerate library (`pip install accelerate`))r   ImportErrorr   )r   argsr   r   r   r   validate_environment@   s   z&QuantoHfQuantizer.validate_environmentc                 C   s   |d u rddi}t d |S )N cpuzThe device_map was not initialized. Setting device_map to {'':'cpu'}. If you want to use the model for inference, please set device_map ='auto')loggerinfo)r   
device_mapr   r   r   update_device_mapJ   s   z#QuantoHfQuantizer.update_device_mapdtypetorch.dtypereturnc                 C   s   |d u rt d tj}|S )NzPYou did not specify `dtype` in `from_pretrained`. Setting it to `torch.float32`.)r(   r)   torchfloat32)r   r,   r   r   r   update_dtypeT   s   
zQuantoHfQuantizer.update_dtypemissing_keysprefixc                    s   t  r	ddlm} g  | D ]*\}}t||r9|D ]}||v s)|| d| v r8|ds8|ds8 | qq fdd|D S )Nr   QModuleMixin.z.weightz.biasc                    s   g | ]}| vr|qS r   r   ).0knot_missing_keysr   r   
<listcomp>h   s    z9QuantoHfQuantizer.update_missing_keys.<locals>.<listcomp>)r   optimum.quantor5   named_modules
isinstanceendswithappend)r   modelr2   r3   r5   namemodulemissingr   r9   r   update_missing_keysZ   s   

z%QuantoHfQuantizer.update_missing_keysrA   r   param_valueztorch.Tensor
param_name
state_dictc                 K   s   t  r	ddlm} |d}|d}|dur8|dur8t| }	|dkr8t|	dkr8|	dhks8|	ddhks8d	S t||\}
}t|
|rLd
|v rL|
j	 S d	S )z=
        Check if a parameter needs to be quantized.
        r   r4   r*   param_deviceNr'   r   diskFweight)
r   r<   r5   getsetvalueslenr	   r>   frozen)r   rA   rF   rG   rH   r   r5   r*   rI   device_map_valuesrC   tensor_namer   r   r   check_quantized_paramj   s   

z'QuantoHfQuantizer.check_quantized_param
max_memoryc                 C   s   dd |  D }|S )Nc                 S   s   i | ]	\}}||d  qS )g?r   )r7   keyvalr   r   r   
<dictcomp>   s    z7QuantoHfQuantizer.adjust_max_memory.<locals>.<dictcomp>)items)r   rT   r   r   r   adjust_max_memory   s   z#QuantoHfQuantizer.adjust_max_memorytarget_deviceztorch.devicec           
      O   s<   ddl m} ||||| t||\}}	|  d|j_dS )ze
        Create the quantized parameter by calling .freeze() after setting it to the module.
        r   )set_module_tensor_to_deviceFN)accelerate.utilsr[   r	   freezerK   requires_grad)
r   rA   rF   rG   rZ   r$   r   r[   rC   _r   r   r   create_quantized_param   s
   z(QuantoHfQuantizer.create_quantized_paramtarget_dtypec                 C   sV   t tj dt dkr'ddlm} tj|j|j	|j
d}|| jj }|S td)Nr   z0.27.0r   )CustomDtype)int8float8int4int2zYou are using `device_map='auto'` on an optimum-quanto quantized model. To automatically compute the appropriate device map, you should upgrade your `accelerate` library,`pip install --upgrade accelerate` or install it from source.)r   parse	importlibmetadatar\   rb   r/   rc   FP8INT4INT2r   weightsr!   )r   ra   rb   mappingr   r   r   adjust_target_dtype   s   z%QuantoHfQuantizer.adjust_target_dtypeNkeep_in_fp32_modulesc                 K   sD   ddl m} | || jj|| _||| j| jd\}}| j|j_d S )Nr
   )replace_with_quanto_layers)modules_to_not_convertr   )integrationsrq   get_modules_to_not_convertr   rr   config)r   rA   rp   r   rq   r_   r   r   r   $_process_model_before_weight_loading   s   


z6QuantoHfQuantizer._process_model_before_weight_loadingc                 K   s   |S r   r   )r   rA   r   r   r   r   #_process_model_after_weight_loading      z5QuantoHfQuantizer._process_model_after_weight_loadingc                 C      dS )NTr   r"   r   r   r   is_trainable   s   zQuantoHfQuantizer.is_trainablec                 C   ry   )NFr   )r   safe_serializationr   r   r   is_serializable   rx   z!QuantoHfQuantizer.is_serializable)r,   r-   r.   r-   )ra   r-   r.   r-   r   ) __name__
__module____qualname____doc__required_packages requires_parameters_quantizationrequires_calibrationr   r   r   r%   r+   r1   liststrrE   dictr   boolrS   r   intrY   r`   ro   r   rv   rw   propertyrz   r|   __classcell__r   r   r   r   r   )   sV    





2



r   )rh   typingr   r   r   r   	packagingr   baser   quantizers_utilsr	   modeling_utilsr   utilsr   r   r   r   utils.quantization_configr   r/   
get_loggerr}   r(   r   r   r   r   r   <module>   s   
