o
    
sh=                     @   sp   d dl mZmZmZ ddlmZ erddlmZ ddlm	Z	m
Z
mZ e
 r)d dlZeeZG dd	 d	eZdS )
    )TYPE_CHECKINGOptionalUnion   )HfQuantizer   )PreTrainedModel)is_accelerate_availableis_torch_availableloggingNc                       s   e Zd ZdZdZdZdgZ fddZdd ZdddZ		d d	d
de
ee  fddZdeeeeef f deeeeef f fddZd!ddZd ddZedefddZedefddZ  ZS )"BitNetHfQuantizerz
    1.58-bit quantization from BitNet quantization method:
    Before loading: it converts the linear layers into BitLinear layers during loading.

    Check out the paper introducing this method: https://huggingface.co/papers/2402.17764
    FT
acceleratec                    s   t  j|fi | || _d S N)super__init__quantization_config)selfr   kwargs	__class__ f/var/www/html/alpaca_bot/venv/lib/python3.10/site-packages/transformers/quantizers/quantizer_bitnet.pyr   -   s   
zBitNetHfQuantizer.__init__c                 O   s   t  std|dds|ddrtdtj s#td d S |d}|d u r3td d S |d urLt	|t
rNd	| v sHd
| v rPtdd S d S d S )NzOLoading a BitNet quantized model requires accelerate (`pip install accelerate`)from_tfF	from_flaxztLoading ternary weights from tf/flax is currently not supported, please make sure the weights are in PyTorch format.zhYou don't have a GPU available to load the model, the inference will be slow because of weight unpacking
device_mapzYou have loaded a BitNet model on CPU and have a CUDA device available, make sure to set your model on a GPU device in order to run your model.cpudiskzYou are attempting to load a BitNet model with a device_map that contains a CPU or disk device.This is not supported. Please remove the CPU or disk device from the device_map.)r	   ImportErrorget
ValueErrortorchcudais_availableloggerwarning_once
isinstancedictvalues)r   argsr   r   r   r   r   validate_environment1   s.   

"z&BitNetHfQuantizer.validate_environmentmodelr   c                 K   s   |S r   r   )r   r*   r   r   r   r   #_process_model_after_weight_loadingN      z5BitNetHfQuantizer._process_model_after_weight_loadingNkeep_in_fp32_modulesc                 K   s:   ddl m} | || jj|| _||| j| j| jd}d S )Nr   )replace_with_bitnet_linear)modules_to_not_convertr   pre_quantized)integrationsr.   get_modules_to_not_convertr   r/   r0   )r   r*   r-   r   r.   r   r   r   $_process_model_before_weight_loadingQ   s   

z6BitNetHfQuantizer._process_model_before_weight_loading
max_memoryreturnc                 C   s   dd |  D }|S )Nc                 S   s   i | ]	\}}||d  qS )g?r   ).0keyvalr   r   r   
<dictcomp>e   s    z7BitNetHfQuantizer.adjust_max_memory.<locals>.<dictcomp>)items)r   r4   r   r   r   adjust_max_memoryd   s   z#BitNetHfQuantizer.adjust_max_memorytarget_dtypetorch.dtypec                 C   s
   t j}|S r   )r    int8)r   r<   r   r   r   adjust_target_dtypeh   s   z%BitNetHfQuantizer.adjust_target_dtypec                 C   s   dS )NTr   )r   safe_serializationr   r   r   is_serializablel   r,   z!BitNetHfQuantizer.is_serializablec                 C      | j jdko| j jdkS )Nautobitlinearonliner   linear_classquantization_moder   r   r   r   is_trainableo   s   
zBitNetHfQuantizer.is_trainablec                 C   rB   )zUFlag indicating whether the quantized model can carry out quantization aware trainingrC   rD   rE   rH   r   r   r   is_qat_trainablev   s   
z"BitNetHfQuantizer.is_qat_trainable)r*   r   r   )r<   r=   r5   r=   )__name__
__module____qualname____doc__ requires_parameters_quantizationrequires_calibrationrequired_packagesr   r)   r+   r   liststrr3   r&   r   intr;   r?   rA   propertyboolrI   rJ   __classcell__r   r   r   r   r       s*    


2

r   )typingr   r   r   baser   modeling_utilsr   utilsr	   r
   r   r    
get_loggerrK   r#   r   r   r   r   r   <module>   s   
