ó
    <±h=  ã                   ó¬   • S SK JrJrJr  SSKJr  \(       a  SSKJr  SSKJ	r	J
r
Jr  \
" 5       (       a  S SKr\R                  " \5      r " S S	\5      rg)
é    )ÚTYPE_CHECKINGÚOptionalÚUnioné   )ÚHfQuantizeré   )ÚPreTrainedModel)Úis_accelerate_availableÚis_torch_availableÚloggingNc                   óì   ^ • \ rS rSrSrSrSrS/rU 4S jrS r	SS
 jr
 SSS	S\\\      4S jjrS\\\\\4   4   S\\\\\4   4   4S jrSS jrSS jr\S\4S j5       r\S\4S j5       rSrU =r$ )ÚBitNetHfQuantizeré    zã
1.58-bit quantization from BitNet quantization method:
Before loading: it converts the linear layers into BitLinear layers during loading.

Check out the paper introducing this method: https://huggingface.co/papers/2402.17764
FTÚ
acceleratec                 ó4   >• [         TU ]  " U40 UD6  Xl        g ©N)ÚsuperÚ__init__Úquantization_config)Úselfr   ÚkwargsÚ	__class__s      €Ú`/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/quantizers/quantizer_bitnet.pyr   ÚBitNetHfQuantizer.__init__-   s   ø€ Ü‰ÒÐ,Ñ7°Ò7Ø#6Õ ó    c                 ó
  • [        5       (       d  [        S5      eUR                  SS5      (       d  UR                  SS5      (       a  [        S5      e[        R
                  R                  5       (       d  [        R                  S5        g UR                  S5      nUc  [        R                  S5        g UbJ  [        U[        5      (       a4  S	UR                  5       ;   d  S
UR                  5       ;   a  [        S5      eg g g )NzOLoading a BitNet quantized model requires accelerate (`pip install accelerate`)Úfrom_tfFÚ	from_flaxztLoading ternary weights from tf/flax is currently not supported, please make sure the weights are in PyTorch format.zhYou don't have a GPU available to load the model, the inference will be slow because of weight unpackingÚ
device_mapzYou have loaded a BitNet model on CPU and have a CUDA device available, make sure to set your model on a GPU device in order to run your model.ÚcpuÚdiskz¯You are attempting to load a BitNet model with a device_map that contains a CPU or disk device.This is not supported. Please remove the CPU or disk device from the device_map.)r
   ÚImportErrorÚgetÚ
ValueErrorÚtorchÚcudaÚis_availableÚloggerÚwarning_onceÚ
isinstanceÚdictÚvalues)r   Úargsr   r   s       r   Úvalidate_environmentÚ&BitNetHfQuantizer.validate_environment1   sö   € Ü&×(Ñ(ÜÐoÓpÐpà:‰:i ×'Ñ'¨6¯:©:°kÀ5×+IÑ+IÜð;óð ô
 z‰z×&Ñ&×(Ñ(Ü×ÑØzôð à—Z‘Z Ó-ˆ
ØÑÜ×ÑðIõð Ñ#Ü˜*¤d×+Ñ+°¸*×:KÑ:KÓ:MÓ1MÐQWÐ[e×[lÑ[lÓ[nÓQnÜ ðgóð ð RoÐ+ð $r   Úmodelr	   c                 ó   • U$ r   © )r   r0   r   s      r   Ú#_process_model_after_weight_loadingÚ5BitNetHfQuantizer._process_model_after_weight_loadingN   s   € Øˆr   Úkeep_in_fp32_modulesc                 ó´   • SSK Jn  U R                  XR                  R                  U5      U l        U" UU R                  U R                  U R
                  S9ng )Nr   )Úreplace_with_bitnet_linear)Úmodules_to_not_convertr   Úpre_quantized)Úintegrationsr7   Úget_modules_to_not_convertr   r8   r9   )r   r0   r5   r   r7   s        r   Ú$_process_model_before_weight_loadingÚ6BitNetHfQuantizer._process_model_before_weight_loadingQ   sV   € õ 	>à&*×&EÑ&EØ×+Ñ+×BÑBÐDXó'
ˆÔ#ñ +ØØ#'×#>Ñ#>Ø $× 8Ñ 8Ø×,Ñ,ñ	
‰r   Ú
max_memoryÚreturnc                 ó^   • UR                  5        VVs0 sH
  u  p#X#S-  _M     nnnU$ s  snnf )NgÍÌÌÌÌÌì?)Úitems)r   r>   ÚkeyÚvals       r   Úadjust_max_memoryÚ#BitNetHfQuantizer.adjust_max_memoryd   s5   € Ø6@×6FÑ6FÔ6HÔIÑ6H©(¨#c ™:’oÑ6Hˆ
ÑIØÐùó Js   ”)c                 ó&   • [         R                  nU$ r   )r%   Úint8)r   Útarget_dtypes     r   Úadjust_target_dtypeÚ%BitNetHfQuantizer.adjust_target_dtypeh   s   € Ü—z‘zˆØÐr   c                 ó   • g)NTr2   )r   Úsafe_serializations     r   Úis_serializableÚ!BitNetHfQuantizer.is_serializablel   s   € Ør   c                 ót   • U R                   R                  S:H  =(       a    U R                   R                  S:H  $ )NÚautobitlinearÚonline©r   Úlinear_classÚquantization_mode©r   s    r   Úis_trainableÚBitNetHfQuantizer.is_trainableo   s7   € ð ×$Ñ$×1Ñ1°_ÑD÷ GØ×(Ñ(×:Ñ:¸hÑFð	
r   c                 ót   • U R                   R                  S:H  =(       a    U R                   R                  S:H  $ )zUFlag indicating whether the quantized model can carry out quantization aware trainingrP   rQ   rR   rU   s    r   Úis_qat_trainableÚ"BitNetHfQuantizer.is_qat_trainablev   s7   € ð ×$Ñ$×1Ñ1°_ÑD÷ GØ×(Ñ(×:Ñ:¸hÑFð	
r   )r8   r   )r0   r	   r   )rH   útorch.dtyper?   r[   )Ú__name__Ú
__module__Ú__qualname__Ú__firstlineno__Ú__doc__Ú requires_parameters_quantizationÚrequires_calibrationÚrequired_packagesr   r.   r3   r   ÚlistÚstrr<   r+   r   ÚintrD   rI   rM   ÚpropertyÚboolrV   rY   Ú__static_attributes__Ú__classcell__)r   s   @r   r   r       sÏ   ø† ñð (-Ð$ØÐà%˜Ðõ7òô:ð 59ñ
à ð
ð ' t¨C¡yÑ1õ
ð&¨D°°e¸CÀ¸H±oÐ1EÑ,Fð È4ÐPSÐUZÐ[^Ð`cÐ[cÑUdÐPdÑKeô ôôð ð
˜dó 
ó ð
ð ð
 $ó 
ó ö
r   r   )Útypingr   r   r   Úbaser   Úmodeling_utilsr	   Úutilsr
   r   r   r%   Ú
get_loggerr\   r(   r   r2   r   r   Ú<module>rp      sL   ð÷ 2Ñ 1å ö Ý0ç HÑ Hñ ×ÑÛð 
×	Ò	˜HÓ	%€ô\
˜õ \
r   