
    h,                         d dl mZ d dlmZ ddlmZ ddlmZmZm	Z	 ddl
mZ ddlmZ erdd	lmZ  e       rd d
lZ e       rd dlmZ ed        Zee_         e	j*                  e      Z G d de      Zy
)    )defaultdict)TYPE_CHECKING   )prepare_for_hqq_linear)is_hqq_availableis_torch_availablelogging   )HfQuantizer)get_module_from_name)PreTrainedModelN)	HQQLinearc                 Z    t        j                  d| j                  | j                        S )Nr   )dtypedevice)torchemptycompute_dtyper   selfs    d/var/www/html/aiagenthome/venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_hqq.pyweightr   %   s    {{1D$6$6t{{KK    c            	            e Zd ZdZdZdZdZdgZ fdZddde	e
   d	e
d
e	e
   fdZddde	e
   de	e
   d
e	e
   fdZddde
d
efdZddddde
ddfdZd Z	 	 ddZddZddZed
efd       Z xZS )HqqHfQuantizerz
    HQQ quantizer base HF class.
    nn.Linear modules are first tagged with quant_config in _process_model_before_weight_loading().
    FThqqc                    t               st        d      t        |   |fi | d | _        d| _        t        d d       j                         dhz
  | _        |j                  dd      s|j                  dd      rt        d      | j                  9d|v r|d   | _        n*t        j                  | _        t        j                  d       |j                  d	      }t        |t               rZd
|j#                         v sd|j#                         v rt        d      t%        t'        |j#                                     dkD  | _        y y )NzA valid HQQ version (>=0.2.1) is not available. Please follow the instructions to install it: `https://github.com/mobiusml/hqq/`.Fbiasfrom_tf	from_flaxzwConverting weights from tf/flax weights is currently not supported, please make sure the weights are in PyTorch format.r   zOSetting dtype to torch.float32 as the default value since it was not specified.
device_mapcpudiskzYou are attempting to use an HQQ model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.r
   )r   ImportErrorsuper__init__r   using_multi_gpur   state_dict_keyshqq_keysget
ValueErrorr   float32loggerinfo
isinstancedictvalueslenset)r   quantization_configkwargsr!   	__class__s       r   r&   zHqqHfQuantizer.__init__9   s5   ! T  	,77
$!$-==?6(J::i'6::k5+I; 
 ::& #G_
"]]
mnZZ-
j$'
))++v9J9J9L/L h 
 (+3z/@/@/B+C'Dq'H$ (r   modelr   missing_keysprefixreturnc                 R    | j                   r|D cg c]	  }d|vs| c}S |S c c}w )Nr   )pre_quantized)r   r7   r8   r9   r5   keys         r   update_missing_keysz"HqqHfQuantizer.update_missing_keys[   s5     #/I<CHC4GC<II Js   	$$expected_keysloaded_keysc                    | j                   s|S fdt        |      }|j                         D ]  \  }}||_         t               } ||       t               }|D ]6  }	|j                  j
                  d   D ]  }
|
|	v s|j                  |	        8 ||z  }t        d d t        j                  dd      j                         dhz
  }t               }|D ](  t        fd|D              s|j                         * ||z  }|D ]_  }	|	dz   |v r|j                  |	dz          n%|j                  |D ch c]
  }|	d	z   |z    c}       |	d
z   |v sL|j                  |	d
z          a t        |      S c c}w )Nc                     | j                         D ]M  \  }}t        |t        j                  j                        r|j                  |j                          ||       O y N)named_childrenr/   r   nnLinearaddname)r7   layersrH   module_find_hqq_quantizable_layerss       r   rK   zIHqqHfQuantizer.update_expected_keys.<locals>._find_hqq_quantizable_layersk   sF     % 4 4 6ffuxx8JJv{{+,VV< !7r   skip_modulesr"   Flinear_layerquant_configr   r   del_origr   c              3   &   K   | ]  }|v  
 y wrC    ).0_moduler=   s     r   	<genexpr>z6HqqHfQuantizer.update_expected_keys.<locals>.<genexpr>   s     @g7c>   z.weight.z.bias)r<   r3   named_modulesrH   configr4   rG   r   r   float16r(   anyupdatelist)r   r7   r?   r@   new_keysrH   rJ   _valid_modules_skipped_modulesrT   _skip_module	_ref_keys_rm_keys_ref_keyrK   r=   s                 @@r   update_expected_keysz#HqqHfQuantizer.update_expected_keysd   s    !!  	= }% "//1LD&FK 2 $UN; 5%G % @ @ P7*$((1 !Q & 	** --
 /
vh'	 5C@@@S!  	H &G"k1Wy01) T)h3!9) TU K/Ww./ & H~	 !Us   9E8

param_namec                 h    t        ||      \  }}t        |t        j                  j                        S rC   )r   r/   r   rE   rF   )r   r7   rf   r5   rJ   _s         r   param_needs_quantizationz'HqqHfQuantizer.param_needs_quantization   s)    (
;	 &%((//22r   param_valueztorch.Tensortarget_deviceztorch.devicec                    t        ||      \  }|j                  dd      d   }t        ||      \  }}	|j                  j                  d   }
|j                  j                  d   }t	        fd|D              r2j                  ||j                  || j                        idd	
       y | j                  r?t        | d      st        t              | _        | j                  |   j                  ||i       | j                  |   t        fd| j                  D              rdv sj                   t#        d d | j                  |d      }|j                         |j                   Rt%        |j                   t&        j(                        r.t&        j*                  j-                  |j                         |_        | j.                  r| j1                  |      }t3        ||	|       | j                  |= y j                  ||idd	
       j4                  j6                  j8                  dk7  xr3 j                   d u xs# j                   j6                  j8                  dk7  }|rdj;                  j<                  j?                  d      dd        }d|
v r|
}n	||
v r|
|   }t#        | j                  |d	      }|j                   Rt%        |j                   t&        j(                        r.t&        j*                  j-                  |j                         |_        | j.                  r| j1                  |      }t3        ||	|       y y )NrW   r
   r   rO   rL   c              3   :   K   | ]  }|j                   v   y wrC   )rH   )rS   skip_modulerJ   s     r   rU   z8HqqHfQuantizer.create_quantized_param.<locals>.<genexpr>   s     J\k{fkk)\s   )r   r   FT)strictassign
hqq_paramsc              3   &   K   | ]  }|v  
 y wrC   rR   )rS   krq   s     r   rU   z8HqqHfQuantizer.create_quantized_param.<locals>.<genexpr>   s     :Mq1
?MrV   r   rM   metaweight_quant_params)rO   r   r   rP   ) r   rsplitrY   r4   r[   load_state_dicttor   r<   hasattrr   r0   rq   r\   allr)   r   r   r/   r   TensorrE   	Parameterr'   _patch_layer_for_multigpusetattrr   r   typejoinrH   split)r   r7   rj   rf   rk   r5   tensor_namemodule_nameparent_modulenoderO   rL   	hqq_layermodule_is_ready
module_tagmodule_quant_configrq   rJ   s                   @@r   create_quantized_paramz%HqqHfQuantizer.create_quantized_param   s    35*E ''Q/225+Ft||77G||77G J\JJ""knnMnTU^clp #   4."-d"3OOK(//k0JK5J :DMM::*@TX^XcXcXk%!%!%"&**("	 ))*5>>-*Y^^U\\2Z%*XX%7%7	%GIN'' $ > >y IItY7OOK0& 	[9%PTU !--..33v= 
KK4D6;;#5#5#:#:f#D 	 &++"3"3C"8"=>J$4&2#|+&2:&>#!0"jj$I ~~)j.V!&!3!3INN!C	## ::9E	M43+ r   c                 &    d fd_         S )Nc                     t        j                  |j                  | j                        | j	                         j                               }| j                  || j                  z  }|S rC   )r   matmulry   r   
dequantizetr   )r   xouts      r   forward_with_devicezEHqqHfQuantizer._patch_layer_for_multigpu.<locals>.forward_with_device   sL    ,,qttDKK0$//2C2E2E2GHCyy$tyy Jr   c                      |       S rC   rR   )r   r   r   s    r   <lambda>z:HqqHfQuantizer._patch_layer_for_multigpu.<locals>.<lambda>   s    &9)Q&Gr   )forward)r   r   r   s    `@r   r~   z(HqqHfQuantizer._patch_layer_for_multigpu   s    	 H	r   c                 2    t        || j                        }y )N)r4   )r   r4   r   r7   r5   s      r   $_process_model_before_weight_loadingz3HqqHfQuantizer._process_model_before_weight_loading  s     'u$BZBZ[r   c                 >    d|_         | j                         |_        |S NT)is_hqq_quantizedis_serializableis_hqq_serializabler   s      r   #_process_model_after_weight_loadingz2HqqHfQuantizer._process_model_after_weight_loading
  s     !%$($8$8$:!r   c                      yr   rR   )r   safe_serializations     r   r   zHqqHfQuantizer.is_serializable  s    r   c                      yr   rR   r   s    r   is_trainablezHqqHfQuantizer.is_trainable  s    r   )r7   r   rC   )__name__
__module____qualname____doc__use_keep_in_fp32_modules requires_parameters_quantizationrequires_calibrationrequired_packagesr&   r]   strr>   re   boolri   r   r~   r   r   r   propertyr   __classcell__)r6   s   @r   r   r   .   s   
  %'+$  ID & 6:3i IL 	c 9&97;Cy9OSTWy9	c9v3.? 3S 3_c 3P4 P4 $P4 	P4
 &P4d\ \
 d  r   r   )collectionsr   typingr   integrationsr   utilsr   r   r	   baser   quantizers_utilsr   modeling_utilsr   r   hqq.core.quantizer   r   r   
get_loggerr   r-   r   rR   r   r   <module>r      sx    $   1 A A  2 0 +
 L L I			H	%f[ fr   