ó
    <±h'  ã                   ó¼   • S SK JrJrJr  SSKJr  \(       a  SSKJr  SSKJ	r	J
r
JrJr  SSKJr  \" 5       (       a  S SKr\R                   " \5      r " S	 S
\5      rg)é    )ÚTYPE_CHECKINGÚAnyÚOptionalé   )ÚHfQuantizeré   )ÚPreTrainedModel)Úis_accelerate_availableÚis_eetq_availableÚis_torch_availableÚlogging)Úget_module_from_nameNc                   óþ   ^ • \ rS rSrSrSrSrSS/rU 4S jrS r	SS
 jr
SSSSS\S\\\4   4S jr SSSSSS\SSS\\\4   S\\\      4S jjrSS jr SSSS\\\      4S jjrSS jr\S	\4S j5       rSrU =r$ )ÚEetqHfQuantizeré!   a  
8-bit quantization from EETQ quantization method:
    before loading: converts transformer layers into W8A16Linear during loading: load 16bit weight and pass to the
    layer object after: quantizes individual weights in Linear8bitLt into 8bit at first .cuda() call
TFÚeetqÚ
acceleratec                 ó4   >• [         TU ]  " U40 UD6  Xl        g ©N)ÚsuperÚ__init__Úquantization_config)Úselfr   ÚkwargsÚ	__class__s      €Ú^/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/quantizers/quantizer_eetq.pyr   ÚEetqHfQuantizer.__init__-   s   ø€ Ü‰ÒÐ,Ñ7°Ò7Ø#6Õ ó    c                 óŽ  • [        5       (       d  [        S5      e SS Kn[	        5       (       d  [        S5      eUR                  SS5      (       d  UR                  SS5      (       a  [        S	5      e[        R                  R                  5       (       d  [        S
5      eUR                  S5      nUc  [        R                  S5        g UbJ  [        U[        5      (       a4  SUR                  5       ;   d  SUR                  5       ;   a  [        S5      eg g g ! [         a!  nS[        U5      ;   a  [        S5      Uee S nAff = f)NzƒUsing `eetq` 8-bit quantization requires eetq.Please install the latest version of eetq from : https://github.com/NetEase-FuXi/EETQr   Úshard_checkpointz³You are using a version of EETQ that is incompatible with the current transformers version. Either downgrade transformers to <= v4.46.3 or, if available, upgrade EETQ to > v1.0.0.zNLoading an EETQ quantized model requires accelerate (`pip install accelerate`)Úfrom_tfFÚ	from_flaxz‚Converting into 8-bit weights from tf/flax weights is currently not supported, please make sure the weights are in PyTorch format.z/No GPU found. A GPU is needed for quantization.Ú
device_mapzŽYou have loaded an EETQ model on CPU and have a CUDA device available, make sure to set your model on a GPU device in order to run your model.ÚcpuÚdiskz¯You are attempting to load an EETQ model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.)r   ÚImportErrorr   Ústrr
   ÚgetÚ
ValueErrorÚtorchÚcudaÚis_availableÚRuntimeErrorÚloggerÚwarning_onceÚ
isinstanceÚdictÚvalues)r   Úargsr   r   Úexcr#   s         r   Úvalidate_environmentÚ$EetqHfQuantizer.validate_environment1   sO  € Ü ×"Ñ"Üðhóð ð
	Ûô '×(Ñ(ÜÐnÓoÐoà:‰:i ×'Ñ'¨6¯:©:°kÀ5×+IÑ+IÜð;óð ô
 z‰z×&Ñ&×(Ñ(ÜÐPÓQÐQà—Z‘Z Ó-ˆ
ØÑÜ×ÑðIõð Ñ#Ü˜*¤d×+Ñ+°¸*×:KÑ:KÓ:MÓ1MÐQWÐ[e×[lÑ[lÓ[nÓQnÜ ðhóð ð RoÐ+ð $øô= ó 
	Ø!¤S¨£XÓ-ô "ðnóð ðð
 ûð
	ús   œD Ä
EÄ#D?Ä?EÚreturnc                 ó®   • Uc(  [         R                  n[        R                  SU5        U$ U[         R                  :w  a  [        R                  S5        U$ )Na  Overriding torch_dtype=%s with `torch_dtype=torch.float16` due to requirements of `eetq` to enable model loading in 8-bit. Pass your own torch_dtype to specify the dtype of the remaining non-linear layers or pass torch_dtype=torch.float16 to remove this warning.zRWe suggest you to set `torch_dtype=torch.float16` for better efficiency with EETQ.)r*   Úfloat16r.   Úinfo)r   Útorch_dtypes     r   Úupdate_torch_dtypeÚ"EetqHfQuantizer.update_torch_dtype_   sQ   € ØÑÜŸ-™-ˆKÜK‰KðEð ôð Ðð œEŸM™MÓ)ÜK‰KÐlÔmØÐr   Úmodelr	   Úparam_valueztorch.TensorÚ
param_nameÚ
state_dictc                 óü   • SSK Jn  [        X5      u  px[        Xv5      (       aY  U R                  (       d  US:X  a0  US:X  a)  UR
                  [        R                  :w  a  [        S5      egUS:X  a  [        S5      eg	g)
Nr   )Ú
EetqLinearÚbiasÚweightz6Expect quantized weights but got an unquantized weightFÚweight_scalez;Expect unquantized weights but got a quantized weight_scaleT)	r   rC   r   r0   Úpre_quantizedÚdtyper*   Úint8r)   )	r   r>   r?   r@   rA   r   rC   ÚmoduleÚtensor_names	            r   Úcheck_quantized_paramÚ%EetqHfQuantizer.check_quantized_paramm   sq   € õ 	$ä2°5ÓEÑˆäf×)Ñ)Ø×!×! [°FÓ%:Ø (Ó*¨{×/@Ñ/@ÄEÇJÁJÓ/NÜ$Ð%]Ó^Ð^Øà .Ó0Ü$Ð%bÓcÐcØØr   Útarget_deviceztorch.deviceÚunexpected_keysc                 ó¼   • SSK Jn  [        X5      u  p‰U" U5      u  p«U
R                  U5      UR                  U	'   UR                  SUR                  U5      5        g)z2
quantizes weights into qweight and weight_scales
r   )Úquantize_and_preprocess_weightsÚweight_scalesN)r   rQ   r   ÚtoÚ_buffersÚregister)r   r>   r?   r@   rN   rA   rO   rQ   rJ   rK   Ú	new_valuerF   s               r   Úcreate_quantized_paramÚ&EetqHfQuantizer.create_quantized_param„   sO   € õ 	9ä2°5ÓEÑˆÙ"AÀ+Ó"NÑˆ	à'0§|¡|°MÓ'Bˆ‰˜Ñ$Ø‰˜¨¯©¸Ó)GÕHr   c                 ó   • U$ r   © )r   r>   r   s      r   Ú#_process_model_after_weight_loadingÚ3EetqHfQuantizer._process_model_after_weight_loading˜   s   € Øˆr   Úkeep_in_fp32_modulesc                 óê   • SSK Jn  U R                  XR                  R                  U5      U l        U" UU R                  U R                  U R
                  S9nU R                  UR                  l        g )Nr   )Úreplace_with_eetq_linear)Úmodules_to_not_convertr   rG   )Úintegrationsr_   Úget_modules_to_not_convertr   r`   rG   Úconfig)r   r>   r]   r   r_   s        r   Ú$_process_model_before_weight_loadingÚ4EetqHfQuantizer._process_model_before_weight_loading›   sj   € õ 	<à&*×&EÑ&EØ×+Ñ+×BÑBÐDXó'
ˆÔ#ñ )ØØ#'×#>Ñ#>Ø $× 8Ñ 8Ø×,Ñ,ñ	
ˆð ,0×+CÑ+Cˆ‰Õ(r   c                 ó   • g©NTrZ   )r   Úsafe_serializations     r   Úis_serializableÚEetqHfQuantizer.is_serializable°   s   € Ør   c                 ó   • grg   rZ   )r   s    r   Úis_trainableÚEetqHfQuantizer.is_trainable³   s   € àr   )r`   r   )r;   útorch.dtyper7   rn   r   )r>   r	   )Ú__name__Ú
__module__Ú__qualname__Ú__firstlineno__Ú__doc__Ú requires_parameters_quantizationÚrequires_calibrationÚrequired_packagesr   r5   r<   r'   r1   r   rL   r   ÚlistrW   r[   rd   ri   ÚpropertyÚboolrl   Ú__static_attributes__Ú__classcell__)r   s   @r   r   r   !   s  ø† ñð (,Ð$Ø Ðà Ð.Ðõ7ò,ô\ðà ðð $ðð ð	ð
 ˜˜c˜‘Nôð< 04ñIà ðIð $ðIð ð	Ið
 &ðIð ˜˜c˜‘NðIð " $ s¡)Ñ,õIô(ð 59ñDà ðDð ' t¨C¡yÑ1õDô*ð ð˜dó ó ör   r   )Útypingr   r   r   Úbaser   Úmodeling_utilsr	   Úutilsr
   r   r   r   Úquantizers_utilsr   r*   Ú
get_loggerro   r.   r   rZ   r   r   Ú<module>r‚      sO   ð÷ 0Ñ /å ö Ý0ç [Ó [Ý 2ñ ×ÑÛð 
×	Ò	˜HÓ	%€ôTkõ Tr   