
    <h.                        S r SSKJrJr  SSKrSSKJr  SSKJr  SSK	J
r
  SSKJr  SSKJr  SS	KJr  S
SKJrJrJrJrJrJrJr  SSKJr  Sr " S S\R8                  5      r " S S\5      r " S S\5      r " S S\R8                  5      r  " S S\5      r! " S S\5      r"\ " S S\5      5       r# " S S\\#5      r$ " S S\5      r% " S  S!\5      r&/ S"Qr'g)#zPyTorch Hubert model.    )OptionalUnionN   )ACT2FN)is_deepspeed_zero3_enabled)BaseModelOutput)PreTrainedModel)auto_docstring   )Wav2Vec2EncoderWav2Vec2EncoderStableLayerNormWav2Vec2FeatureEncoderWav2Vec2ForCTC!Wav2Vec2ForSequenceClassificationWav2Vec2ModelWav2Vec2SamePadLayer   )HubertConfigc                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )HubertPositionalConvEmbedding*   c                 2  > [         TU ]  5         [        R                  " UR                  UR                  UR
                  UR
                  S-  UR                  S9U l        S U l        UR                  (       a'  [        R                  " UR                  5      U l        GO[        R                  R                  n[        [        R                  R                  S5      (       a$  [        R                  R                  R                  n[        5       (       Ga%  SS KnUR"                  R%                  U R                  R&                  SS9   U" U R                  SSS9U l        S S S 5        [        U R                  S5      (       aU  U R                  R                  R&                  R(                  nU R                  R                  R&                  R*                  nO,U R                  R,                  nU R                  R.                  nUR"                  R1                  X5        UR"                  R1                  X5        OU" U R                  SSS9U l        [3        UR
                  5      U l        [6        UR8                     U l        g ! , (       d  f       GN,= f)	Nr   )kernel_sizepaddinggroupsweight_normr   modifier_rankweight)namedimparametrizations)super__init__nnConv1dhidden_sizenum_conv_pos_embeddingsnum_conv_pos_embedding_groupsconv
batch_normconv_pos_batch_normBatchNorm1dutilsr   hasattrr"   r   	deepspeedzeroGatheredParametersr   	original0	original1weight_gweight_vregister_external_parameterHubertSamePadLayerr   r   feat_extract_activation
activation)selfconfigr   r0   r5   r6   	__class__s         a/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/hubert/modular_hubert.pyr$   &HubertPositionalConvEmbedding.__init__+   s   II6622a777
	 %% nnV-?-?@DO((..Krxx00-@@ hh77CC)++ ^^66tyy7G7GWX6Y +DIIH! LDI Z499&899#yy99@@JJH#yy99@@JJH#yy11H#yy11H::4J::4J'		aH	)&*H*HI !?!?@ ZYs   
J
Jc                     UR                  SS5      nU R                  b  U R                  U5      nU R                  U5      nU R                  U5      nU R	                  U5      nUR                  SS5      nU$ )Nr   r   )	transposer+   r*   r   r:   r;   hidden_statess     r>   forward%HubertPositionalConvEmbedding.forwardP   sn    %//15??& OOM:M		-0]36%//15    )r:   r+   r*   r   __name__
__module____qualname____firstlineno__r$   rD   __static_attributes____classcell__r=   s   @r>   r   r   *   s    #AJ	 	rF   r   c                       \ rS rSrSrg)r8   \    NrH   rI   rJ   rK   rL   rQ   rF   r>   r8   r8   \       rF   r8   c                       \ rS rSrSrg)HubertFeatureEncoder`   rQ   NrR   rQ   rF   r>   rU   rU   `   rS   rF   rU   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )HubertFeatureProjectiond   c                 x  > [         TU ]  5         UR                  U l        U R                  (       a1  [        R                  " UR
                  S   UR                  S9U l        [        R                  " UR
                  S   UR                  5      U l
        [        R                  " UR                  5      U l        g )N)eps)r#   r$   feat_proj_layer_normr%   	LayerNormconv_dimlayer_norm_eps
layer_normLinearr'   
projectionDropoutfeat_proj_dropoutdropoutr;   r<   r=   s     r>   r$    HubertFeatureProjection.__init__e   s}    $*$?$?!$$ ll6??2+>FDYDYZDO))FOOB$79K9KLzz&":":;rF   c                     U R                   (       a  U R                  U5      nU R                  U5      nU R                  U5      nU$ )N)r]   ra   rc   rf   rB   s     r>   rD   HubertFeatureProjection.forwardm   s;    $$ OOM:M6]3rF   )rf   r]   ra   rc   rG   rN   s   @r>   rX   rX   d   s    < rF   rX   c                       \ rS rSrSrg)HubertEncoderv   rQ   NrR   rQ   rF   r>   rl   rl   v   rS   rF   rl   c                       \ rS rSrSrg)HubertEncoderStableLayerNormz   rQ   NrR   rQ   rF   r>   ro   ro   z   rS   rF   ro   c                       \ rS rSr% \\S'   SrSrSrSr	Sr
SrS rS\\R                  \4   4S jrS	\S
\R                  4S jrSrg)HubertPreTrainedModel~   r<   hubertinput_valuesTc                    [        U[        R                  5      (       ak  UR                  R                  R                  SU R                  R                  S9  UR                  b%  UR                  R                  R                  5         gg[        U[        R                  [        R                  [        R                  45      (       aJ  UR                  R                  R                  5         UR                  R                  R                  S5        g[        U[        R                  5      (       Gai  [        5       (       a  SSKn[#        US5      (       a~  [#        US5      (       am  UR$                  R'                  UR(                  UR*                  /SS9   [        R,                  R/                  UR                  R                  5        SSS5        OUR$                  R'                  UR                  SS9   [        R,                  R/                  UR                  R                  5        SSS5        O3[        R,                  R/                  UR                  R                  5        UR                  b%  UR                  R                  R                  5         gg[        U[0        5      (       a7  [#        US	5      (       a%  UR2                  R                  R5                  5         gg[        U[6        5      (       aR  [#        US
5      (       a@  UR8                  R                  R                  SU R                  R:                  S-   -  5        ggg! , (       d  f       N= f! , (       d  f       GN= f)zInitialize the weights        )meanstdNg      ?r   r6   r5   r   masked_spec_embedlayer_weightsr   )
isinstancer%   rb   r   datanormal_r<   initializer_rangebiaszero_r^   	GroupNormr-   fill_r&   r   r0   r/   r1   r2   r6   r5   initkaiming_normal_HubertModelrz   uniform_HubertForSequenceClassificationr{   num_hidden_layers)r;   moduler0   s      r>   _init_weights#HubertPreTrainedModel._init_weights   sV   fbii(( MM&&CT[[5R5R&S{{&  &&( 'r||R^^ LMMKK""$MM$$S)		**)++ 6:..76:3N3N"::FOOV__;]mn:o//0B0BC po #::6==XY:Z//0B0BC [Z ''(:(:;{{&  &&( ',,v233((--668 4 ?@@v//$$))//t{{7T7TWX7X0YZ 0 A po [Zs   4M94M!
M!
M0input_lengthsc                     S n[        U R                  R                  U R                  R                  5       H  u  p4U" XU5      nM     U$ )z8
Computes the output length of the convolutional layers
c                 8    [         R                  " X-
  USS9S-   $ )Nfloor)rounding_moder   )torchdiv)input_lengthr   strides      r>   _conv_out_lengthPHubertPreTrainedModel._get_feat_extract_output_lengths.<locals>._conv_out_length   s      99\7wWZ[[[rF   )zipr<   conv_kernelconv_stride)r;   r   r   r   r   s        r>    _get_feat_extract_output_lengths6HubertPreTrainedModel._get_feat_extract_output_lengths   sG    
	\
 $'t{{'>'>@W@W#XK,]PM $Y rF   feature_vector_lengthattention_maskc                    U R                  UR                  S5      5      R                  [        R                  5      nUR
                  S   n[        R                  " XA4UR                  UR                  S9nSU[        R                  " UR
                  S   UR                  S9US-
  4'   UR                  S/5      R                  S5      R                  S/5      R                  5       nU$ )Nr[   r   )dtypedevicer   )r   )r   sumtor   longshapezerosr   r   arangeflipcumsumbool)r;   r   r   output_lengths
batch_sizes        r>   "_get_feature_vector_attention_mask8HubertPreTrainedModel._get_feature_vector_attention_mask   s    >>~?Q?QRT?UVYYZ_ZdZde#))!,
/~7K7KTbTiTi
 uv^%9%9!%<^EZEZ[]kno]opq',,bT299"=BBB4HMMOrF   rQ   N)rH   rI   rJ   rK   r   __annotations__base_model_prefixmain_input_namesupports_gradient_checkpointing_supports_flash_attn_supports_sdpa_supports_flex_attnr   r   r   
LongTensorintr   r   rL   rQ   rF   r>   rr   rr   ~   sh     $O&*#N[BeEDTDTVYDY>Z 
 
]b]m]m 
rF   rr   c                      ^  \ rS rSrS\4U 4S jjrS rS r     SS\\	R                     S\\	R                     S\\	R                     S	\\   S
\\   S\\   S\\\4   4S jjrSrU =r$ )r      r<   c                   > [         TU ]  U5        Xl        [        U5      U l        [        U5      U l        UR                  S:  d  UR                  S:  aG  [        R                  " [        R                  " UR                  5      R                  5       5      U l        UR                   (       a  [#        U5      U l        O['        U5      U l        U R)                  5         U ?g )Nrw   )r#   r$   r<   rU   feature_extractorrX   feature_projectionmask_time_probmask_feature_probr%   	Parameterr   Tensorr'   r   rz   do_stable_layer_normro   encoderrl   	post_initadapterrg   s     r>   r$   HubertModel.__init__   s     !5f!="9&"A  3&&*B*BS*H%'\\%,,v?Q?Q2R2[2[2]%^D"&&7?DL(0DL 	LrF   c                     [        S5      eNzNot needed for HubertAttributeErrorr;   s    r>   freeze_feature_extractor$HubertModel.freeze_feature_extractor       455rF   c                     [        S5      er   r   r   s    r>   freeze_feature_encoder"HubertModel.freeze_feature_encoder   r   rF   ru   r   mask_time_indicesoutput_attentionsoutput_hidden_statesreturn_dictreturnc                    Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nU R	                  U5      nUR                  SS5      nUb  U R                  UR                  S   U5      nU R                  U5      nU R                  XS9nU R                  UUUUUS9n	U	S   nU(       d	  U4U	SS -   $ [        UU	R                  U	R                  S9$ )a  
mask_time_indices (`torch.BoolTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Indices to mask extracted features for contrastive loss. When in training mode, model learns to predict
    masked extracted features in *config.proj_codevector_dim* space.

Example:

```python
>>> from transformers import AutoProcessor, HubertModel
>>> from datasets import load_dataset

>>> processor = AutoProcessor.from_pretrained("facebook/hubert-large-ls960-ft")
>>> model = HubertModel.from_pretrained("facebook/hubert-large-ls960-ft")


>>> def map_to_array(example):
...     example["speech"] = example["audio"]["array"]
...     return example


>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.map(map_to_array)

>>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values  # Batch size 1
>>> hidden_states = model(input_values).last_hidden_state
```Nr   r   )r   )r   r   r   r   r   )last_hidden_staterC   
attentions)r<   r   r   use_return_dictr   rA   r   r   r   _mask_hidden_statesr   r   rC   r   )
r;   ru   r   r   r   r   r   extract_featuresrC   encoder_outputss
             r>   rD   HubertModel.forward   s)   F 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B]11,?+55a;%!DDEUE[E[\]E^`noN//0@A000d,,)/!5# ' 
 (*!#oab&999+)77&11
 	
rF   )r<   r   r   r   rz   )NNNNN)rH   rI   rJ   rK   r   r$   r   r   r   r   r   FloatTensorr   r   tupler   rD   rL   rM   rN   s   @r>   r   r      s    | &66 269=,0/3&*D
u||,D
 !.D
 $E$5$56	D

 $D>D
 'tnD
 d^D
 
uo%	&D
 D
rF   r   c                       \ rS rSrSrg)HubertForCTCi&  rQ   NrR   rQ   rF   r>   r   r   &  rS   rF   r   c                       \ rS rSrSrg)r   i*  rQ   NrR   rQ   rF   r>   r   r   *  rS   rF   r   )r   r   r   rr   )(__doc__typingr   r   r   torch.nnr%   activationsr   integrations.deepspeedr   modeling_outputsr   modeling_utilsr	   r.   r
   wav2vec2.modeling_wav2vec2r   r   r   r   r   r   r   configuration_hubertr   _HIDDEN_STATES_START_POSITIONModuler   r8   rU   rX   rl   ro   rr   r   r   r   __all__rQ   rF   r>   <module>r      s     "   ! @ / - #   / !" /BII /d	- 		1 	bii $	O 		#A 	 CO C CL^
-!6 ^
B	> 		&G 	 frF   