
    <h=                         S SK JrJrJr  SSKJr  \(       a  SSKJr  SSKJ	r	J
r
Jr  \
" 5       (       a  S SKr\R                  " \5      r " S S	\5      rg)
    )TYPE_CHECKINGOptionalUnion   )HfQuantizer   )PreTrainedModel)is_accelerate_availableis_torch_availableloggingNc                      ^  \ rS rSrSrSrSrS/rU 4S jrS r	SS
 jr
 SSS	S\\\      4S jjrS\\\\\4   4   S\\\\\4   4   4S jrSS jrSS jr\S\4S j5       r\S\4S j5       rSrU =r$ )BitNetHfQuantizer    z
1.58-bit quantization from BitNet quantization method:
Before loading: it converts the linear layers into BitLinear layers during loading.

Check out the paper introducing this method: https://huggingface.co/papers/2402.17764
FT
acceleratec                 4   > [         TU ]  " U40 UD6  Xl        g N)super__init__quantization_config)selfr   kwargs	__class__s      `/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/quantizers/quantizer_bitnet.pyr   BitNetHfQuantizer.__init__-   s    ,77#6     c                 
   [        5       (       d  [        S5      eUR                  SS5      (       d  UR                  SS5      (       a  [        S5      e[        R
                  R                  5       (       d  [        R                  S5        g UR                  S5      nUc  [        R                  S5        g UbJ  [        U[        5      (       a4  S	UR                  5       ;   d  S
UR                  5       ;   a  [        S5      eg g g )NzOLoading a BitNet quantized model requires accelerate (`pip install accelerate`)from_tfF	from_flaxztLoading ternary weights from tf/flax is currently not supported, please make sure the weights are in PyTorch format.zhYou don't have a GPU available to load the model, the inference will be slow because of weight unpacking
device_mapzYou have loaded a BitNet model on CPU and have a CUDA device available, make sure to set your model on a GPU device in order to run your model.cpudiskzYou are attempting to load a BitNet model with a device_map that contains a CPU or disk device.This is not supported. Please remove the CPU or disk device from the device_map.)r
   ImportErrorget
ValueErrortorchcudais_availableloggerwarning_once
isinstancedictvalues)r   argsr   r   s       r   validate_environment&BitNetHfQuantizer.validate_environment1   s    &((opp::i''6::k5+I+I; 
 zz&&((z ZZ-
I #*d++*:K:K:M1MQW[e[l[l[nQn g  Ro+ $r   modelr	   c                     U$ r    )r   r0   r   s      r   #_process_model_after_weight_loading5BitNetHfQuantizer._process_model_after_weight_loadingN   s    r   keep_in_fp32_modulesc                     SSK Jn  U R                  XR                  R                  U5      U l        U" UU R                  U R                  U R
                  S9ng )Nr   )replace_with_bitnet_linear)modules_to_not_convertr   pre_quantized)integrationsr7   get_modules_to_not_convertr   r8   r9   )r   r0   r5   r   r7   s        r   $_process_model_before_weight_loading6BitNetHfQuantizer._process_model_before_weight_loadingQ   sV     	>&*&E&E++BBDX'
# +#'#>#> $ 8 8,,	
r   
max_memoryreturnc                 ^    UR                  5        VVs0 sH
  u  p#X#S-  _M     nnnU$ s  snnf )Ng?)items)r   r>   keyvals       r   adjust_max_memory#BitNetHfQuantizer.adjust_max_memoryd   s5    6@6F6F6HI6H(#c:o6H
I Js   )c                 &    [         R                  nU$ r   )r%   int8)r   target_dtypes     r   adjust_target_dtype%BitNetHfQuantizer.adjust_target_dtypeh   s    zzr   c                     g)NTr2   )r   safe_serializations     r   is_serializable!BitNetHfQuantizer.is_serializablel   s    r   c                 t    U R                   R                  S:H  =(       a    U R                   R                  S:H  $ )Nautobitlinearonliner   linear_classquantization_moder   s    r   is_trainableBitNetHfQuantizer.is_trainableo   s7     $$11_D G((::hF	
r   c                 t    U R                   R                  S:H  =(       a    U R                   R                  S:H  $ )zUFlag indicating whether the quantized model can carry out quantization aware trainingrP   rQ   rR   rU   s    r   is_qat_trainable"BitNetHfQuantizer.is_qat_trainablev   s7     $$11_D G((::hF	
r   )r8   r   )r0   r	   r   )rH   torch.dtyper?   r[   )__name__
__module____qualname____firstlineno____doc__ requires_parameters_quantizationrequires_calibrationrequired_packagesr   r.   r3   r   liststrr<   r+   r   intrD   rI   rM   propertyboolrV   rY   __static_attributes____classcell__)r   s   @r   r   r       s     (-$%7: 59
 
 'tCy1
&DeCHo1E,F 4PSUZ[^`c[cUdPdKe  
d 
 
 
$ 
 
r   r   )typingr   r   r   baser   modeling_utilsr	   utilsr
   r   r   r%   
get_loggerr\   r(   r   r2   r   r   <module>rp      sL    2 1  0 H H  
		H	%\
 \
r   