o
    
sh2                     @   s   d dl mZmZ ddlmZ ddlmZmZmZm	Z	 ddl
mZ ddlmZ er.ddlmZ e r7d d	lmZ e r>d d
lZe	eZdd ZG dd deZd
S )    )TYPE_CHECKINGAny   )prepare_for_hqq_linear)is_accelerate_availableis_hqq_availableis_torch_availablelogging   )HfQuantizer)get_module_from_name)PreTrainedModel)remove_hook_from_moduleNc                 C   s.   | dd d }| }|D ]}|j| }q|S )N.)split_modules)modelnamemodule_treeparentm r   c/var/www/html/alpaca_bot/venv/lib/python3.10/site-packages/transformers/quantizers/quantizer_hqq.pyfind_parent%   s
   r   c                       s  e Zd ZdZdZdZdZdgZ fddZdd Z	d	d
de
e dede
e fddZd	d
de
e de
e de
e fddZd	d
dddedeeef def
ddZd	d
dddedddeeef de
e fddZdd  Z			
d*d!d"Zd*d#d$Zd+d&d'Zedefd(d)Z  ZS ),HqqHfQuantizerz
    HQQ quantizer base HF class.
    nn.Linear modules are first tagged with quant_config in _process_model_before_weight_loading().
    The actual quantization and offloading to the GPU is done in check_quantized_param().
    FThqqc                    s$   t  j|fi | d | _d| _d S )NF)super__init__dtypeusing_multi_gpu)selfquantization_configkwargs	__class__r   r   r   9   s   
zHqqHfQuantizer.__init__c                 O   s   t  std|dds|ddrtd| jd u r/d|v r&|d | _n	tj| _td |d}t	|t
rVd	| v sEd
| v rItdtt| dk| _d S d S )NzA valid HQQ version (>=0.2.1) is not available. Please follow the instructions to install it: `https://github.com/mobiusml/hqq/`.from_tfF	from_flaxzwConverting weights from tf/flax weights is currently not supported, please make sure the weights are in PyTorch format.r   zOSetting dtype to torch.float32 as the default value since it was not specified.
device_mapcpudiskzYou are attempting to use an HQQ model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.r
   )r   ImportErrorget
ValueErrorr   torchfloat32loggerinfo
isinstancedictvalueslensetr    )r!   argsr#   r(   r   r   r   validate_environment>   s*   



z#HqqHfQuantizer.validate_environmentr   r   missing_keysprefixreturnc                 K   s   | j r
dd |D S |S )Nc                 S   s   g | ]}d |vr|qS )weightr   ).0keyr   r   r   
<listcomp>_       z6HqqHfQuantizer.update_missing_keys.<locals>.<listcomp>)pre_quantized)r!   r   r9   r:   r#   r   r   r   update_missing_keys[   s   z"HqqHfQuantizer.update_missing_keysexpected_keysloaded_keysc                    sJ  | j s|S  fdd t|}t rddlm} | D ]\}}||_qt } || t }	|D ]|jjd D ]}
|
v rD|		 q9q1||	8 }|d d t
jddd d	h }t }|D ]tfd
d|D rp|	 q^||8 }|D ])d |v r|	d  n|fdd|D  d |v r|	d  qwt|S )Nc                    s:   |   D ]\}}t|tjjr||j  || qd S N)named_childrenr2   r.   nnLinearaddr   )r   layersr   module)_find_hqq_quantizable_layersr   r   rL   k   s
   zIHqqHfQuantizer.update_expected_keys.<locals>._find_hqq_quantizable_layersr   	HQQLinearskip_modulesr)   Flinear_layerquant_configcompute_dtypedevicedel_origbiasc                 3   s    | ]}| v V  qd S rE   r   )r=   _module)r>   r   r   	<genexpr>   s    z6HqqHfQuantizer.update_expected_keys.<locals>.<genexpr>z.weightc                    s   h | ]} d  | qS )r   r   )r=   _ref_key)rW   r   r   	<setcomp>   r@   z6HqqHfQuantizer.update_expected_keys.<locals>.<setcomp>z.bias)rA   r6   r   hqq.core.quantizerN   named_modulesr   configr"   rI   r.   float16state_dict_keysanyupdatelist)r!   r   rC   rD   new_keysrN   r   rK   _valid_modules_skipped_modules_skip_module	_ref_keys_rm_keysr   )rL   rW   r>   r   update_expected_keysd   sR   

	
z#HqqHfQuantizer.update_expected_keysparam_valueztorch.Tensor
param_name
state_dictc           	      K   sh   t  r	ddlm} t||\}}| jr t|tjj|fo|dkS t|tjjr+|dkp3t||o3|dkS )Nr   rM   r<   rV   )	r   r[   rN   r   rA   r2   r.   rG   rH   )	r!   r   rj   rk   rl   r#   rN   rK   tensor_namer   r   r   check_quantized_param   s   z$HqqHfQuantizer.check_quantized_paramtarget_deviceztorch.deviceunexpected_keysc                 C   sv  t  rddlm} td|fdd}||_t||\}	}
d|ddd }t||}|dd }|
d	kr:dS i }|	 D ] \}}|d |v r`|||dd < |dur`||v r`|
| q@| jrt|	|rkdS |dd| j|d
d}|| |jdurt|jtjrtj|j|_| jr| |}t||| |	`~	tj  dS |	 D ]\}}t|	|tj| q|jjd }|jjd }d|	jddd }d}d|v r|}n||v r|| }|D ]}||	jv rd} nq|dur&||	|| j|dd}|jdurt|jtjrtj|j|_| jr| |}t||| n|	j| j|d}	t|||	 tj  dS )a  
        Each nn.Linear layer is processed here.
        We first check if the corresponding module state_dict contains already HQQ quantized parameters.
        If not, we create a temp linear layer with the module state_dict params and use it for quantization
        r   rM   _selfc                 S   s   t jd| j| jdS )Nr   r   rT   )r.   emptyrS   rT   )rq   r   r   r   r<      s   z5HqqHfQuantizer.create_quantized_param.<locals>.weightr   Nr   rV   FrP   rR   rO   weight_quant_paramsT)rR   rS   rT   rU   rr   )r   r[   rN   propertyr<   r   joinr   r   itemsremoverA   r2   r   load_state_dictrV   r.   TensorrG   	Parameterr    _patch_layer_for_multigpusetattr__dict__cudaempty_cacher]   r"   r   to)r!   r   rj   rk   ro   rl   rp   rN   r<   rK   rm   
layer_nameparent_modulenodemodule_state_dictkv	hqq_layerr>   tensorrR   rO   
module_tagmodule_quant_configskip_moduler   r   r   create_quantized_param   s   








z%HqqHfQuantizer.create_quantized_paramc                    s$   t dd   fdd_S )Nc                 S   s4   t || j|   }| jd ur|| j7 }|S rE   )r.   matmulr   rT   
dequantizetrV   )r!   xoutr   r   r   forward_with_device.  s   

zEHqqHfQuantizer._patch_layer_for_multigpu.<locals>.forward_with_devicec                    s
    | S rE   r   )r   r   r   r   r   <lambda>4  s   
 z:HqqHfQuantizer._patch_layer_for_multigpu.<locals>.<lambda>)r   forward)r!   r   r   r   r   r}   +  s   z(HqqHfQuantizer._patch_layer_for_multigpuc                 K   s   t || jd}d S )N)r"   )r   r"   r!   r   r#   r   r   r   $_process_model_before_weight_loading7  s   z3HqqHfQuantizer._process_model_before_weight_loadingc                 K   s   d|_ |  |_|S NT)is_hqq_quantizedis_serializableis_hqq_serializabler   r   r   r   #_process_model_after_weight_loading@  s   
z2HqqHfQuantizer._process_model_after_weight_loadingNc                 C      dS r   r   )r!   safe_serializationr   r   r   r   E  s   zHqqHfQuantizer.is_serializablec                 C   r   r   r   )r!   r   r   r   is_trainableH  s   zHqqHfQuantizer.is_trainable)r   r   rE   )__name__
__module____qualname____doc__use_keep_in_fp32_modules requires_parameters_quantizationrequires_calibrationrequired_packagesr   r8   rb   strrB   ri   r3   r   boolrn   r   r}   r   r   r   rv   r   __classcell__r   r   r$   r   r   -   st    
	
=



s

	
r   )typingr   r   integrationsr   utilsr   r   r   r	   baser   quantizers_utilsr   modeling_utilsr   accelerate.hooksr   r.   
get_loggerr   r0   r   r   r   r   r   r   <module>   s   
