
    <h2                         S SK JrJr  SSKJr  SSKJrJrJrJ	r	  SSK
Jr  SSKJr  \(       a  SSKJr  \" 5       (       a  S S	KJr  \" 5       (       a  S S
Kr\	R&                  " \5      rS r " S S\5      rg
)    )TYPE_CHECKINGAny   )prepare_for_hqq_linear)is_accelerate_availableis_hqq_availableis_torch_availablelogging   )HfQuantizer)get_module_from_name)PreTrainedModel)remove_hook_from_moduleNc                 b    UR                  S5      S S nU nU H  nUR                  U   nM     U$ )N.)split_modules)modelnamemodule_treeparentms        ]/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/quantizers/quantizer_hqq.pyfind_parentr   %   s8    **S/#2&KF# M    c                   0  ^  \ rS rSrSrSrSrSrS/rU 4S jr	S r
SS	S
\\   S\S\\   4S jrSS	S\\   S\\   S\\   4S jrSS	SSS\S\\\4   S\4
S jrSS	SSS\SSS\\\4   S\\   4S jrS r  S S jrS S jrS!S jr\S\4S j5       rSrU =r$ )"HqqHfQuantizer-   z
HQQ quantizer base HF class.
nn.Linear modules are first tagged with quant_config in _process_model_before_weight_loading().
The actual quantization and offloading to the GPU is done in check_quantized_param().
FThqqc                 D   > [         TU ]  " U40 UD6  S U l        SU l        g )NF)super__init__torch_dtypeusing_multi_gpu)selfquantization_configkwargs	__class__s      r   r#   HqqHfQuantizer.__init__9   s&    ,77$r   c                 B   [        5       (       d  [        S5      eUR                  SS5      (       d  UR                  SS5      (       a  [        S5      eU R                  c;  SU;   a  US   U l        O*[
        R                  U l        [        R                  S5        UR                  S5      n[        U[        5      (       a^  S	UR                  5       ;   d  S
UR                  5       ;   a  [        S5      e[        [        UR                  5       5      5      S:  U l        g g )NzA valid HQQ version (>=0.2.1) is not available. Please follow the instructions to install it: `https://github.com/mobiusml/hqq/`.from_tfF	from_flaxzwConverting weights from tf/flax weights is currently not supported, please make sure the weights are in PyTorch format.r$   zUSetting torch_dtype to torch.float32 as the default value since it was not specified.
device_mapcpudiskzYou are attempting to use an HQQ model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.r   )r   ImportErrorget
ValueErrorr$   torchfloat32loggerinfo
isinstancedictvalueslensetr%   )r&   argsr(   r.   s       r   validate_environment#HqqHfQuantizer.validate_environment>   s    "" T  ::i''6::k5+I+I; 
 #&#)-#8 #(== stZZ-
j$''
))++v9J9J9L/L h 
 (+3z/@/@/B+C'Dq'H$ (r   r   r   missing_keysprefixreturnc                 f    U R                   (       a  U Vs/ sH  nSU;  d  M  UPM     sn$ U$ s  snf )Nweight)pre_quantized)r&   r   r@   rA   r(   keys         r   update_missing_keys"HqqHfQuantizer.update_missing_keys[   s5     #/I<CHC4GC<II Js   	..expected_keysloaded_keysc                 N  ^^ U R                   (       d  U$ U4S jm[        U5      n[        5       (       Ga`  SSKJn  UR                  5        H  u  pgXgl        M     [        5       nT" X5        [        5       n	U H;  n
UR                  R                  S    H  nX;   d  M
  U	R                  U
5        M     M=     X-  nU" S S [        R                  SSS9R                  5       S1-
  n[        5       nU H0  m[        U4S	 jU 5       5      (       d  M  UR                  T5        M2     XM-  nU Hf  n
U
S
-   U;   a  UR                  U
S
-   5        O&UR                  U Vs1 sH
  oS-   U-   iM     sn5        U
S-   U;   d  MR  UR                  U
S-   5        Mh     [        U5      $ s  snf )Nc                    > U R                  5        HQ  u  p#[        U[        R                  R                  5      (       a  UR                  UR                  5        T" X15        MS     g N)named_childrenr8   r4   nnLinearaddr   )r   layersr   module_find_hqq_quantizable_layerss       r   rT   IHqqHfQuantizer.update_expected_keys.<locals>._find_hqq_quantizable_layersk   sE     % 4 4 6fuxx88JJv{{+,V< !7r   r   	HQQLinearskip_modulesr/   Flinear_layerquant_configcompute_dtypedevicedel_origbiasc              3   *   >#    U H	  oT;   v   M     g 7frM    ).0_modulerF   s     r   	<genexpr>6HqqHfQuantizer.update_expected_keys.<locals>.<genexpr>   s     D^'#~^s   z.weightr   z.bias)rE   r<   r   hqq.core.quantizerW   named_modulesr   configr'   rQ   r4   float16state_dict_keysanyupdatelist)r&   r   rI   rJ   new_keysrW   r   rS   _valid_modules_skipped_modulesrc   _skip_module	_ref_keys_rm_keys_ref_keyrT   rF   s                  @@r   update_expected_keys#HqqHfQuantizer.update_expected_keysd   s    !!  	= }%3 !& 3 3 5" !6 !UN(?  #u)$)LL$D$D^$TL#.(,,W5 %U * .N "!!#mm o6(+I uHD^DDDLL%    H *Y&+5LL9!45OOi$Xi(s]X%=i$XYW$3LL7!23 * H~	 %Ys   F"
param_valueztorch.Tensor
param_name
state_dictc                 j   [        5       (       a  SSKJn  [        X5      u  pxU R                  (       a2  [        U[        R                  R                  W45      =(       a    US:g  $ [        U[        R                  R                  5      =(       a    US:H  =(       d    [        UW5      =(       a    US:H  $ )Nr   rV   rD   r_   )	r   rf   rW   r   rE   r8   r4   rO   rP   )	r&   r   rw   rx   ry   r(   rW   rS   tensor_names	            r   check_quantized_param$HqqHfQuantizer.check_quantized_param   s     325Ev'CDa+YaJaa 6588??3 ,8+M vy1KkV6Kr   target_deviceztorch.deviceunexpected_keysc                 :   [        5       (       a  SSKJn  [        SU4S j5       nXl        [        X5      u  pSR                  UR                  S5      SS 5      n[        X5      nUR                  S5      S   nU
S:X  a  g0 nUR                  5        HD  u  nnUS-   U;   d  M  UXR                  S5      S   '   Uc  M,  X;   d  M3  UR                  U5        MF     U R                  (       a  [        U	W5      (       a  gU" SSU R                  US	S
9nUR                  U5        UR                  bW  [        UR                  [         R"                  5      (       a.  [         R$                  R'                  UR                  5      Ul        U R(                  (       a  U R+                  U5      n[-        XU5        U	?A	[         R0                  R3                  5         gUR                  5        H0  u  nn[-        U	U[         R$                  R'                  U5      5        M2     UR4                  R6                  S   nUR4                  R6                  S   nSR                  U	R8                  R                  S5      SS 5      nSnSU;   a  UnOUU;   a  UU   nU H  nUU	R8                  ;   d  M  Sn  O   Ub  W" U	UU R                  USS9nUR                  bW  [        UR                  [         R"                  5      (       a.  [         R$                  R'                  UR                  5      Ul        U R(                  (       a  U R+                  U5      n[-        XU5        O&U	R;                  U R                  US9n	[-        XU	5        [         R0                  R3                  5         g)z
Each nn.Linear layer is processed here.
We first check if the corresponding module state_dict contains already HQQ quantized parameters.
If not, we create a temp linear layer with the module state_dict params and use it for quantization
r   rV   _selfc                 V    [         R                  " SU R                  U R                  S9$ )Nr   dtyper]   )r4   emptyr\   r]   )r   s    r   rD   5HqqHfQuantizer.create_quantized_param.<locals>.weight   s    {{1E,?,?UUr   r   Nr   r_   FrY   r[   rX   weight_quant_paramsT)r[   r\   r]   r^   r   )r   rf   rW   propertyrD   r   joinr   r   itemsremoverE   r8   r$   load_state_dictr_   r4   TensorrO   	Parameterr%   _patch_layer_for_multigpusetattr__dict__cudaempty_cacherh   r'   r   to)r&   r   rw   rx   r~   ry   r   rW   rD   rS   r{   
layer_nameparent_modulenodemodule_state_dictkv	hqq_layerrF   tensorr[   rX   
module_tagmodule_quant_configskip_modules                            r   create_quantized_param%HqqHfQuantizer.create_quantized_param   s*    3
 Vi V V  &25EXXj..s3CR89
#E6$R(&  $$&DAqC1$67!''#,r"23".13G#**1-	 ' &),,%!%!%"&"2"2("	 %%&78~~)j.V.V!&!3!3INN!C	## ::9E	M3 JJ""$ -224KCFC!3!3F!;< 5
 ||77G||77GXXfkk//4RS9:
" L0".<'".z":'Kfkk)&*# (
 *!0"..$I ~~)j.V.V!&!3!3INN!C	## ::9E	M3 YYT%5%5mYLFM0

 r   c                 >   ^^ [        T5      mS mUU4S jTl        T$ )Nc                     [         R                  " UR                  U R                  5      U R	                  5       R                  5       5      nU R                  b  X R                  -  nU$ rM   )r4   matmulr   r]   
dequantizetr_   )r&   xouts      r   forward_with_deviceEHqqHfQuantizer._patch_layer_for_multigpu.<locals>.forward_with_device.  sJ    ,,qttDKK0$//2C2E2E2GHCyy$yy Jr   c                    > T" TU 5      $ rM   ra   )r   r   r   s    r   <lambda>:HqqHfQuantizer._patch_layer_for_multigpu.<locals>.<lambda>4  s    &9)Q&Gr   )r   forward)r&   r   r   s    `@r   r   (HqqHfQuantizer._patch_layer_for_multigpu+  s#    +I6		 H	r   c                 *    [        XR                  S9ng )N)r'   )r   r'   r&   r   r(   s      r   $_process_model_before_weight_loading3HqqHfQuantizer._process_model_before_weight_loading7  s     'uBZBZ[r   c                 >    SUl         U R                  5       Ul        U$ NT)is_hqq_quantizedis_serializableis_hqq_serializabler   s      r   #_process_model_after_weight_loading2HqqHfQuantizer._process_model_after_weight_loading@  s     !%$($8$8$:!r   c                     gr   ra   )r&   safe_serializations     r   r   HqqHfQuantizer.is_serializableE  s    r   c                     gr   ra   )r&   s    r   is_trainableHqqHfQuantizer.is_trainableH  s    r   )r$   r%   )r   r   rM   )__name__
__module____qualname____firstlineno____doc__use_keep_in_fp32_modules requires_parameters_quantizationrequires_calibrationrequired_packagesr#   r>   rm   strrG   ru   r9   r   boolr|   r   r   r   r   r   r   r   __static_attributes____classcell__)r)   s   @r   r   r   -   sX     %'+$ %
I: & 6:3i IL 	c ;&;7;Cy;OSTWy;	c;z  $ 	
 cN 
.p! p! $p! 	p!
 &p! cNp! cp!f
\ \
 d  r   r   )typingr   r   integrationsr   utilsr   r   r	   r
   baser   quantizers_utilsr   modeling_utilsr   accelerate.hooksr   r4   
get_loggerr   r6   r   r   ra   r   r   <module>r      s^    & 1 Z Z  2 0 8			H	%][ ]r   