o
    
sh!                     @   s   d dl mZmZmZ ddlmZ ddlmZ ddlm	Z	 er$ddl
mZ ddlmZmZmZmZmZ dd	lmZ e r?d d
lZeeZG dd deZd
S )    )TYPE_CHECKINGAnyOptional   )tqdm   )HfQuantizer)get_module_from_name)PreTrainedModel)is_accelerate_availableis_flute_availableis_hadamard_availableis_torch_availablelogging)QuantizationConfigMixinNc                       s  e Zd ZdZdZdZddgZdef fddZd	d
 Z	d-ddZ
	d.dddddedddeeef deee  fddZ	d.dddeee  fddZd/dd Zd!ee d"edee fd#d$Zedefd%d&Zd.d'd(Zdddddedeeef def
d)d*Zd+d, Z  ZS )0HiggsHfQuantizerz
    Quantizer of the HIGGS method. Enables the loading of prequantized models and in-flight quantization of full-precision models.
    FTzflute-kernelfast_hadamard_transformquantization_configc                    s   t  j|fi | || _d S N)super__init__r   )selfr   kwargs	__class__ e/var/www/html/alpaca_bot/venv/lib/python3.10/site-packages/transformers/quantizers/quantizer_higgs.pyr   +   s   
zHiggsHfQuantizer.__init__c                 K   s~   t j s	tdt stdt stdt std|d u r&tdt	|t
r;d| v s7d| v r=tdd S d S )	NzNHIGGS quantization is only supported on GPU. Please use a different quantizer.zHUsing `higgs` quantization requires Accelerate: `pip install accelerate`zLUsing `higgs` quantization requires FLUTE: `pip install flute-kernel>=0.3.0`zbUsing `higgs` quantization requires fast_hadamard_transform: `pip install fast_hadamard_transform`zwYou are attempting to load a HIGGS model without setting device_map. Please set device_map comprised of 'cuda' devices.cpudiskzYou are attempting to load a HIGGS model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.)torchcudais_availableNotImplementedErrorr   ImportErrorr   r   
ValueError
isinstancedictvalues)r   
device_mapr   r   r   r   validate_environment/   s&   
"z%HiggsHfQuantizer.validate_environmentdtypetorch.dtypereturnc                 C   sD   |d u rt d tj}|S |tjkr |tjkr td| d|S )NzG`dtype` is None. Setting `dtype=torch.float16` for FLUTE compatibility.zInvalid `dtype` zS. HIGGS quantization only supports `dtype=torch.float16` or `dtype=torch.bfloat16`.)loggerinfor   float16bfloat16r$   )r   r*   r   r   r   update_dtypeI   s   

zHiggsHfQuantizer.update_dtypeNmodelr
   param_valueztorch.Tensor
param_nametarget_deviceztorch.device
state_dictunexpected_keysc                 C   s  ddl m} 	 |||| jj| jj| jj| jj}~t||\}	}
d	|
dd d }| D ]=\}}||	jv rGtjj|dd|	j|< q2||	jv rVtj||	j|< q2|dkrf||	_| | jj|< q2td| d	|	 |d ur||v r|| d S d S d S )
Nr   )quantize_with_higgs.F)requires_gradtune_metadatazUnexpected key z in module )integrationsr8   tor   bitsp
group_sizehadamard_sizer	   joinsplititems_parametersr   nn	Parameter_buffersBufferr<   to_dictr$   remove)r   r2   r3   r4   r5   r6   r7   r8   
flute_dictmodule_module_namekeyvaluer   r   r   create_quantized_paramT   s0   	

z'HiggsHfQuantizer.create_quantized_paramkeep_in_fp32_modulesc                 K   s@   ddl m} | || jj|| _||| j| jd | j|j_d S )Nr   )replace_with_higgs_linear)r   modules_to_not_convert)r=   rU   get_modules_to_not_convertr   rV   config)r   r2   rT   r   rU   r   r   r   $_process_model_before_weight_loading{   s   
z5HiggsHfQuantizer._process_model_before_weight_loadingc           
         s   ddl m}m} ddlm} ddlm  i } fdd| D }t|	 dd	d
D ]@\}}	|	j
j|vr?||	j
jd||	j
j< ||	j
j |	_|| jj| |	_||	j
j|	jj|	jd\|	j
_|	_|	j | jj|< q*d S )Nr   )TuneMetaDatamaybe_tune_and_repack)make_workspace_streamkr   HiggsLinearc                    s    i | ]\}}t | r||qS r   r%   .0namerN   r]   r   r   
<dictcomp>   s     zHHiggsHfQuantizer._process_model_after_weight_loading.<locals>.<dictcomp>zRepacking HIGGS modulesF)descleave)device)weightscalesmetadata)
flute.tunerZ   r[   flute.utilsr\   r=   r^   named_modulesr   rE   rg   rf   	workspace	from_dictr   r<   datarh   rK   )
r   r2   r   rZ   r[   r\   flute_workspacesflute_modulesrb   rN   r   r]   r   #_process_model_after_weight_loading   s"   z4HiggsHfQuantizer._process_model_after_weight_loadingmissing_keysprefixc                    sL   ddl m   fdd| D dtdtffddfd	d
|D S )Nr   r]   c                    s   h | ]\}}t | r|qS r   r_   r`   r]   r   r   	<setcomp>   s    z7HiggsHfQuantizer.update_missing_keys.<locals>.<setcomp>rQ   r,   c                    s>    ds
 drdS  d  t fddD S )Nz.weightz.biasFr9   c                 3   s     | ]}|v p| v V  qd S r   r   )ra   rb   full_keyrQ   r   r   	<genexpr>   s    zNHiggsHfQuantizer.update_missing_keys.<locals>.should_update.<locals>.<genexpr>)endswithany)rQ   )higgs_namesrt   rv   r   should_update   s   z;HiggsHfQuantizer.update_missing_keys.<locals>.should_updatec                    s   g | ]} |s|qS r   r   )ra   rQ   )r|   r   r   
<listcomp>   s    z8HiggsHfQuantizer.update_missing_keys.<locals>.<listcomp>)r=   r^   rl   strbool)r   r2   rs   rt   r   )r^   r{   rt   r|   r   update_missing_keys   s   z$HiggsHfQuantizer.update_missing_keysc                 C      dS )NFr   )r   r   r   r   is_trainable   s   zHiggsHfQuantizer.is_trainablec                 C   r   )NTr   )r   safe_serializationr   r   r   is_serializable   s   z HiggsHfQuantizer.is_serializablec           	      K   s@   ddl m} t||\}}t||r|dkr|jtjkrdS dS )Nr   r]   rg   TF)r=   r^   r	   r%   r*   r   int16)	r   r2   r3   r4   r6   r   r^   rN   tensor_namer   r   r   check_quantized_param   s
   z&HiggsHfQuantizer.check_quantized_paramc                 C   s   ddl m} ||}|S )Nr   )dequantize_higgs)r=   r   )r   r2   r   r   r   r   _dequantize   s   zHiggsHfQuantizer._dequantize)r*   r+   r,   r+   r   )r2   r
   )__name__
__module____qualname____doc__requires_calibration requires_parameters_quantizationrequired_packagesr   r   r)   r1   r~   r&   r   r   listrS   rY   rr   r   propertyr   r   r   r   r   __classcell__r   r   r   r   r   "   sZ    



*





r   )typingr   r   r   utils.loggingr   baser   quantizers_utilsr	   modeling_utilsr
   utilsr   r   r   r   r   utils.quantization_configr   r   
get_loggerr   r-   r   r   r   r   r   <module>   s   
