
    Ph                         d dl Z d dlmZmZmZ d dlmZ ddlmZ ddl	m
Z
 erddlmZ dd	lmZmZmZmZ dd
lmZ  e       rd dlZ ej*                  e      Z G d de      Zy)    N)TYPE_CHECKINGOptionalUnion)version   )HfQuantizer)get_module_from_name   )PreTrainedModel)is_accelerate_availableis_optimum_quanto_availableis_torch_availablelogging)QuantoConfigc                   "    e Zd ZdZddgZdZdZdef fdZd Z	d	 Z
d
 Zd!dZdee   dedee   fdZdddedefdZdeeeeef   f   deeeeef   f   fdZdddddeddfdZd"dZ	 d#dddeee      fdZd Zedefd       Zd#d Z xZS )$QuantoHfQuantizerz*
    Quantizer for the quanto library
    quanto
accelerateTFquantization_configc                 F    t        |   |fi | | j                          y N)super__init__	post_init)selfr   kwargs	__class__s      b/var/www/html/saasai/venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_quanto.pyr   zQuantoHfQuantizer.__init__2   s     ,77    c                 `    | j                   j                  | j                  st        d      yy)z 
        Safety checker
        NzWe don't support quantizing the activations with transformers library.Use quanto library for more complex use cases such as activations quantization, calibration and quantization aware training.)r   activationspre_quantized
ValueErrorr   s    r   r   zQuantoHfQuantizer.post_init6   s;     ##//;DDVDVO  EW;r   c                 X    t               st        d      t               st        d      y )NzhLoading an optimum-quanto quantized model requires optimum-quanto library (`pip install optimum-quanto`)z`Loading an optimum-quanto quantized model requires accelerate library (`pip install accelerate`))r   ImportErrorr   )r   argsr   s      r   validate_environmentz&QuantoHfQuantizer.validate_environment@   s5    *,z  '(r  )r   c                 <    |ddi}t         j                  d       |S )N cpuzThe device_map was not initialized. Setting device_map to {'':'cpu'}. If you want to use the model for inference, please set device_map ='auto')loggerinfo)r   
device_maps     r   update_device_mapz#QuantoHfQuantizer.update_device_mapJ   s+    eJKK\
 r   returnc                 T    |%t         j                  d       t        j                  }|S )NzPYou did not specify `dtype` in `from_pretrained`. Setting it to `torch.float32`.)r,   r-   torchfloat32)r   dtypes     r   update_dtypezQuantoHfQuantizer.update_dtypeT   s"    =KKjkMMEr   missing_keysprefixc                 8   t               rddlm} g }|j                         D ]\  \  }}t	        |      s|D ]E  }||v s
|| d| v s|j                  d      r#|j                  d      r5|j                  |       G ^ |D 	cg c]	  }	|	|vs|	 c}	S c c}	w )Nr   QModuleMixin.z.weightz.bias)r   optimum.quantor:   named_modules
isinstanceendswithappend)
r   modelr6   r7   r:   not_missing_keysnamemodulemissingks
             r   update_missing_keysz%QuantoHfQuantizer.update_missing_keysZ   s    &(3!//1LD&&,/+GDvhay4I,I ' 0 0 ; ' 0 0 9(//8  , 2 (E<a14D+D<EEEs   	BBrA   r   
param_namec                 |    t               rddlm} t        ||      \  }}t	        |      rd|v r|j
                   S y)Nr   r9   weightF)r   r<   r:   r	   r>   frozen)r   rA   rH   r   r:   rD   tensor_names          r   param_needs_quantizationz*QuantoHfQuantizer.param_needs_quantizationj   s=    &(325*Efl+K0G}}$$r   
max_memoryc                 ^    |j                         D ci c]  \  }}||dz   }}}|S c c}}w )Ng?)items)r   rN   keyvals       r   adjust_max_memoryz#QuantoHfQuantizer.adjust_max_memoryv   s9    6@6F6F6HI6H(#sc3:o6H
I Js   )param_valueztorch.Tensortarget_deviceztorch.devicec                     ddl m}  ||||j                  |             t        ||      \  }}|j	                          d|j
                  _        y )Nr
   )_load_parameter_into_modelF)modeling_utilsrW   tor	   freezerJ   requires_grad)	r   rA   rT   rH   rU   r   rW   rD   _s	            r   create_quantized_paramz(QuantoHfQuantizer.create_quantized_paramz   sB     	@"5*knn]6ST(
;	&+#r   c                 P   t        j                  t        j                  j                  d            t        j                  d      kD  rTddlm} t        j                  |j                  |j                  |j                  d}|| j                  j                     }|S t        d      )Nr   z0.27.0r   )CustomDtype)int8float8int4int2zYou are using `device_map='auto'` on an optimum-quanto quantized model. To automatically compute the appropriate device map, you should upgrade your `accelerate` library,`pip install --upgrade accelerate` or install it from source.)r   parse	importlibmetadataaccelerate.utilsr_   r2   r`   FP8INT4INT2r   weightsr#   )r   target_dtyper_   mappings       r   adjust_target_dtypez%QuantoHfQuantizer.adjust_target_dtype   s    ==++33LABW]]S[E\\4 

%//#((#((	G #4#;#;#C#CDLP r   keep_in_fp32_modulesc                     ddl m} | j                  || j                  j                  |      | _         ||| j                  | j                        \  }}| j                  |j
                  _        y )Nr
   )replace_with_quanto_layers)modules_to_not_convertr   )integrationsrq   get_modules_to_not_convertr   rr   config)r   rA   ro   r   rq   r\   s         r   $_process_model_before_weight_loadingz6QuantoHfQuantizer._process_model_before_weight_loading   sf     	>&*&E&E4++BBDX'
# .$*E*E[_[s[s
q ,0+C+C(r   c                     |S r    )r   rA   r   s      r   #_process_model_after_weight_loadingz5QuantoHfQuantizer._process_model_after_weight_loading   s    r   c                      y)NTrx   r$   s    r   is_trainablezQuantoHfQuantizer.is_trainable   s    r   c                      y)NFrx   )r   safe_serializations     r   is_serializablez!QuantoHfQuantizer.is_serializable   s    r   )r4   torch.dtyper0   r   )rl   r   r0   r   r   )__name__
__module____qualname____doc__required_packages requires_parameters_quantizationrequires_calibrationr   r   r   r(   r/   r5   liststrrG   boolrM   dictr   intrS   r]   rn   r   rv   ry   propertyr{   r~   __classcell__)r   s   @r   r   r   )   s@    "<0'+$ L FtCy F# FRVWZR[ F 
.? 
S 
_c 
DeCHo1E,F 4PSUZ[^`c[cUdPdKe , , $, 	,
 &,( UYD&D>FtCy>QD d  r   r   )re   typingr   r   r   	packagingr   baser   quantizers_utilsr	   rX   r   utilsr   r   r   r   utils.quantization_configr   r2   
get_loggerr   r,   r   rx   r   r   <module>r      s[     1 1   2 0  5 			H	%I Ir   