ó
    <±h  ã                   óÐ   • S SK r S SKJr  S SKJr  SSKJr  \(       a  SSKJr  SSK	J
r
JrJrJrJr  SS	KJrJr  \" 5       (       a  S SKr\R&                  " \5      r " S
 S\5      rg)é    N)ÚTYPE_CHECKING)Úversioné   )ÚHfQuantizeré   )ÚPreTrainedModel)Úis_auto_gptq_availableÚis_gptqmodel_availableÚis_optimum_availableÚis_torch_availableÚlogging)Ú
GPTQConfigÚQuantizationConfigMixinc                   ó   ^ • \ rS rSrSrSr/ SQrSrS\4U 4S jjr	S r
SS
 jrS rSS jrSS jr\S	\4S j5       rSS jrSrU =r$ )ÚGptqHfQuantizeré#   zÙ
Quantizer of the GPTQ method - for GPTQ the quantizer support calibration of the model through
`auto_gptq` or `gptqmodel` package. Quantization is done under the hood for users if they load a non-prequantized model.
F)ÚoptimumÚ	auto_gptqÚ	gptqmodelNÚquantization_configc                 óÄ   >• [         TU ]  " U40 UD6  [        5       (       d  [        S5      eSSKJn  UR                  U R                  R                  5       5      U l	        g )NúGLoading a GPTQ quantized model requires optimum (`pip install optimum`)r   )ÚGPTQQuantizer)
ÚsuperÚ__init__r   ÚImportErrorÚoptimum.gptqr   Ú	from_dictr   Úto_dict_optimumÚoptimum_quantizer)Úselfr   Úkwargsr   Ú	__class__s       €Ú^/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/quantizers/quantizer_gptq.pyr   ÚGptqHfQuantizer.__init__-   sP   ø€ Ü‰ÒÐ,Ñ7°Ò7ä#×%Ñ%ÜÐgÓhÐhÝ.à!.×!8Ñ!8¸×9QÑ9Q×9aÑ9aÓ9cÓ!dˆÕó    c                 ó^  • [        5       (       d  [        S5      e[        5       (       a$  [        5       (       a  [        R                  S5        [        5       =(       aJ    [        R                  " [        R                  R                  S5      5      [        R                  " S5      :„  =(       d
    [        5       nU(       d.  [        R                  R                  5       (       d  [        S5      e[        5       (       d  [        5       (       d  [        S5      e[        5       (       aV  [        R                  " [        R                  R                  S5      5      [        R                  " S5      :  a  [        S5      e[        5       (       a¢  [        R                  " [        R                  R                  S	5      5      [        R                  " S
5      :  dK  [        R                  " [        R                  R                  S5      5      [        R                  " S5      :  a  [        S5      eg g )Nr   z4Detected gptqmodel and auto-gptq, will use gptqmodelz	auto-gptqz0.4.2z2GPU is required to quantize or run quantize model.z|Loading a GPTQ quantized model requires gptqmodel (`pip install gptqmodel`) or auto-gptq (`pip install auto-gptq`) library. r   z‹You need a version of auto_gptq >= 0.4.2 to use GPTQ: `pip install --upgrade auto-gptq` or use gptqmodel by `pip install gptqmodel>=1.4.3`.r   z1.4.3r   ú1.23.99zJThe gptqmodel version should be >= 1.4.3, optimum version should >= 1.24.0)r   r   r	   r
   ÚloggerÚwarningr   ÚparseÚ	importlibÚmetadataÚtorchÚcudaÚis_availableÚRuntimeError)r!   Úargsr"   Úgptq_supports_cpus       r$   Úvalidate_environmentÚ$GptqHfQuantizer.validate_environment6   s˜  € Ü#×%Ñ%ÜÐgÓhÐhÜ!×#Ñ#Ô(>×(@Ñ(@ÜN‰NÐQÔRô #Ó$÷ `Ü—’œi×0Ñ0×8Ñ8¸ÓEÓFÌÏÊÐW^ÓI_Ñ_÷&ô $Ó%ð 	ö !¬¯©×)@Ñ)@×)BÑ)BÜÐSÓTÐTÜ(×*Ñ*Ô.D×.FÑ.FÜð Oóð ô $×%Ñ%¬'¯-ª-¼	×8JÑ8J×8RÑ8RÐS^Ó8_Ó*`Ôcj×cpÒcpØód
ó +
ô ð ^óð ô $×%Ñ%ÜMŠMœ)×,Ñ,×4Ñ4°[ÓAÓBÄWÇ]Â]ÐSZÓE[Ó[Ü}Š}œY×/Ñ/×7Ñ7¸	ÓBÓCÄgÇmÂmÐT]ÓF^Ó^äÐjÓkÐkð _ð &r&   Úreturnc                 ó¬   • Uc'  [         R                  n[        R                  S5        U$ U[         R                  :w  a  [        R                  S5        U$ )NzRLoading the model in `torch.float16`. To overwrite it, set `torch_dtype` manually.zRWe suggest you to set `torch_dtype=torch.float16` for better efficiency with GPTQ.)r.   Úfloat16r)   Úinfo)r!   Útorch_dtypes     r$   Úupdate_torch_dtypeÚ"GptqHfQuantizer.update_torch_dtypeR   sG   € ØÑÜŸ-™-ˆKÜK‰KÐlÔmð Ðð œEŸM™MÓ)ÜK‰KÐlÔmØÐr&   c                 ó¤   • Uc  S[         R                  " S5      0n[        5       (       d%  USS[         R                  " S5      04;   a  USS0:H    U$ )NÚ Úcpur   )r.   Údevicer
   )r!   Ú
device_maps     r$   Úupdate_device_mapÚ!GptqHfQuantizer.update_device_mapZ   sQ   € ØÑØœeŸlšl¨5Ó1Ð2ˆJä%×'Ñ'¨J¸5À2ÄuÇ|Â|ÐTYÓGZÐB[Ð:\Ó,\Ø˜2˜q˜'Ò!ØÐr&   c                 óz  • UR                   R                  S:w  a  [        S5      eU R                  (       a…  [        R
                  " [        R                  R	                  S5      5      [        R
                  " S5      ::  a  U R                  R                  U5      ng U R                  R                  " U40 UD6ng g )NÚ	input_idsz%We can only quantize pure text model.r   r(   )
r#   Úmain_input_namer1   Úpre_quantizedr   r+   r,   r-   r    Úconvert_model©r!   Úmodelr"   s      r$   Ú$_process_model_before_weight_loadingÚ4GptqHfQuantizer._process_model_before_weight_loadingb   s‹   € Ø?‰?×*Ñ*¨kÓ9ÜÐFÓGÐGà××ä}Š}œY×/Ñ/×7Ñ7¸	ÓBÓCÄwÇ}Â}ÐU^ÓG_Ó_Ø×.Ñ.×<Ñ<¸UÓC‘à×.Ñ.×<Ò<¸UÑMÀfÑM‘ð r&   c                 óš  • U R                   (       a  U R                  R                  U5      ng U R                  R                  c  UR
                  U R                  l        U R                  R                  XR                  R                  5        [        R                  " U R                  R                  5       5      UR                  l        g ©N)rG   r    Úpost_init_modelr   Ú	tokenizerÚname_or_pathÚquantize_modelr   r   Úto_dictÚconfigrI   s      r$   Ú#_process_model_after_weight_loadingÚ3GptqHfQuantizer._process_model_after_weight_loadingm   s   € Ø××Ø×*Ñ*×:Ñ:¸5ÓA‰Eà×'Ñ'×1Ñ1Ñ9Ø5:×5GÑ5G×(Ñ(Ô2à×"Ñ"×1Ñ1°%×9QÑ9Q×9[Ñ9[Ô\Ü/9×/CÒ/CÀD×DZÑDZ×DbÑDbÓDdÓ/eˆEL‰LÕ,r&   c                 ó   • g©NT© )r!   s    r$   Úis_trainableÚGptqHfQuantizer.is_trainablew   s   € àr&   c                 ó   • grX   rY   )r!   Úsafe_serializations     r$   Úis_serializableÚGptqHfQuantizer.is_serializable{   s   € Ør&   )r    )r:   útorch.dtyper6   r`   )rJ   r   rN   )Ú__name__Ú
__module__Ú__qualname__Ú__firstlineno__Ú__doc__Úrequires_calibrationÚrequired_packagesr    r   r   r4   r;   rB   rK   rU   ÚpropertyÚboolrZ   r^   Ú__static_attributes__Ú__classcell__)r#   s   @r$   r   r   #   sl   ø† ñð
 !ÐÚ=ÐØÐðeÐ,C÷ eòlô8òô	Nôfð ð˜dó ó ð÷ò r&   r   )r,   Útypingr   Ú	packagingr   Úbaser   Úmodeling_utilsr   Úutilsr	   r
   r   r   r   Úutils.quantization_configr   r   r.   Ú
get_loggerra   r)   r   rY   r&   r$   Ú<module>rs      sP   ðó Ý  å å ö Ý0ç uÕ uß Kñ ×ÑÛà	×	Ò	˜HÓ	%€ôYkõ Yr&   