
    <hI                        S r SSKrSSKrSSKJrJr  SSKrSSKrSSKJr  SSK	J
r
  SSKJr  SSKJr  SS	KJr  SS
KJr  SSKJr  SSKJrJrJrJrJrJrJrJrJrJrJ r   SSK!J"r"  Sr# " S S\5      r$ " S S\5      r% " S S\5      r& " S S\RN                  5      r( " S S\5      r) " S S\RN                  5      r* " S S\5      r+ " S S\+5      r, " S  S!\5      r- " S" S#\5      r. " S$ S%\5      r/ " S& S'\RN                  5      r0\ " S( S)\5      5       r1\ " S* S+\15      5       r2 " S, S-\5      r3 " S. S/\5      r4/ S0Qr5g)1zPyTorch SEW model.    N)OptionalUnion)nn   )ACT2FN)is_deepspeed_zero3_enabled)is_fsdp_managed_module)BaseModelOutput)PreTrainedModel)auto_docstring   )Wav2Vec2AttentionWav2Vec2EncoderLayerWav2Vec2FeatureEncoderWav2Vec2FeedForwardWav2Vec2ForCTC!Wav2Vec2ForSequenceClassificationWav2Vec2GroupNormConvLayerWav2Vec2LayerNormConvLayerWav2Vec2NoLayerNormConvLayerWav2Vec2SamePadLayer_compute_mask_indices   )	SEWConfigc                       \ rS rSrSrg)SEWNoLayerNormConvLayer2    N__name__
__module____qualname____firstlineno____static_attributes__r       [/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/sew/modular_sew.pyr   r   2       r%   r   c                       \ rS rSrSrg)SEWLayerNormConvLayer6   r   Nr   r   r%   r&   r)   r)   6   r'   r%   r)   c                       \ rS rSrSrg)SEWGroupNormConvLayer:   r   Nr   r   r%   r&   r,   r,   :   r'   r%   r,   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )SEWPositionalConvEmbedding>   c           	        > [         TU ]  5         [        R                  " UR                  UR                  UR
                  UR
                  S-  UR                  UR                  S9U l        [        R                  R                  n[        [        R                  R                  S5      (       a$  [        R                  R                  R                  n[        5       (       Ga%  SS KnUR                  R!                  U R                  R"                  SS9   U" U R                  SSS9U l        S S S 5        [        U R                  S5      (       aU  U R                  R                  R"                  R$                  nU R                  R                  R"                  R&                  nO,U R                  R(                  nU R                  R*                  nUR                  R-                  X5        UR                  R-                  X5        OU" U R                  SSS9U l        [/        UR
                  5      U l        [2        UR4                     U l        g ! , (       d  f       GN,= f)	Nr   )kernel_sizepaddinggroupsstrideweight_normr   modifier_rankweight)namedimparametrizations)super__init__r   Conv1dhidden_sizenum_conv_pos_embeddingsnum_conv_pos_embedding_groupssqueeze_factorconvutilsr6   hasattrr<   r   	deepspeedzeroGatheredParametersr9   	original0	original1weight_gweight_vregister_external_parameterSEWSamePadLayerr3   r   feat_extract_activation
activation)selfconfigr6   rG   rL   rM   	__class__s         r&   r>   #SEWPositionalConvEmbedding.__init__?   s   II6622a777((
	 hh**288,,m<<((33??K%''224993C3CST2U'		aH	 Vtyy"4559955<<FF9955<<FF99--99--NN66tFNN66tF#DIIH!DDI&v'E'EF !?!?@ VUs   I
I"c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ N)rD   r3   rQ   )rR   hidden_statess     r&   forward"SEWPositionalConvEmbedding.forwarda   s2    		-0]36r%   )rQ   rD   r3   r    r!   r"   r#   r>   rY   r$   __classcell__rT   s   @r&   r/   r/   >   s     AD r%   r/   c                       \ rS rSrSrg)rO   i   r   Nr   r   r%   r&   rO   rO   i   r'   r%   rO   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )SEWUpsamplingm   c                    > [         TU ]  5         [        R                  " UR                  UR                  UR
                  -  5      U l        [        UR                     U l	        UR
                  U l        g rW   )
r=   r>   r   Linearr@   rC   
projectionr   rP   rQ   rR   rS   rT   s     r&   r>   SEWUpsampling.__init__n   sW    ))F$6$68J8JVMbMb8bc !?!?@$33r%   c                 &   U R                  U5      nU R                  U5      nU R                  S:  a^  UR                  5       u  p#nX0R                  -  nX@R                  -  nUR	                  X#U R                  U5      nUR	                  X%U5      nU$ )Nr   )re   rQ   rC   sizereshape)rR   rX   bszsrc_lensrc_embed_dimtgt_lentgt_embed_dims          r&   rY   SEWUpsampling.forwardt   s    66"*7*<*<*>'C- 3 33G)-@-@@M)11#@S@SUbcM)11#NMr%   )rQ   re   rC   r[   r]   s   @r&   ra   ra   m   s    4 r%   ra   c                       \ rS rSrSrg)SEWFeatureEncoder   r   Nr   r   r%   r&   rr   rr      r'   r%   rr   c                   (   ^  \ rS rSrU 4S jrSrU =r$ )SEWFeatureExtractor   c                    > [         TU ]  U5        [        R                  " SU R                  R
                   SU R                  R                  S   R
                   S3[        5        g )NzThe class `zD` has been depreciated and will be removed in Transformers v5. Use `r   z
` instead.)r=   r>   warningswarnrT   r    	__bases__FutureWarningrf   s     r&   r>   SEWFeatureExtractor.__init__   s[     $..112 3NN,,Q/889E 		
r%   r   )r    r!   r"   r#   r>   r$   r\   r]   s   @r&   ru   ru      s    
 
r%   ru   c                       \ rS rSrSrg)SEWAttention   r   Nr   r   r%   r&   r~   r~      r'   r%   r~   c                       \ rS rSrSrg)SEWFeedForward   r   Nr   r   r%   r&   r   r      r'   r%   r   c                       \ rS rSrSrg)SEWEncoderLayer   r   Nr   r   r%   r&   r   r      r'   r%   r   c                   :   ^  \ rS rSrU 4S jr    SS jrSrU =r$ )
SEWEncoder   c                   > [         TU ]  5         Xl        [        U5      U l        [
        R                  " UR                  UR                  5      U l        [
        R                  " UR                  UR                  S9U l        [
        R                  " UR                  5      U l        [
        R                   " [#        UR$                  5       Vs/ sH  n['        U5      PM     sn5      U l        [+        U5      U l        SU l        g s  snf )NepsF)r=   r>   rS   r/   pos_conv_embedr   	AvgPool1drC   pool	LayerNormr@   layer_norm_eps
layer_normDropouthidden_dropoutdropout
ModuleListrangenum_hidden_layersr   layersra   upsamplegradient_checkpointing)rR   rS   _rT   s      r&   r>   SEWEncoder.__init__   s    8@LL!6!68M8MN	,,v'9'9v?T?TUzz&"7"78mmeFLdLdFe$fFe_V%<Fe$fg%f-&+# %gs   Dc           	         U(       a  SOS nU(       a  SOS nUGb  UR                  S5      R                  SSUR                  S   5      nU R                  R                  S:X  a  SX) '   Ub  SU;   a  UOS nGO_SX) '   UR                  5       R                  S5      n	XR                  R                  -  n
UR                  S   U R                  R                  -  n[        R                  " SXR                  S9R                  SS5      R                  U
R                  S   S5      nXR                  SS5      :  R                  5       nS	US S 2S S S S 24   R                  UR                  S
9-
  nU[        R                  " UR                  5      R                   -  nUR                  UR                  S   SUR                  S   UR                  S   5      nUR                  S   nUR#                  SS5      nU R%                  U5      nU R'                  U5      n[!        UR)                  S5      UR)                  S5      5      nUSS U24   USS U24   -   nUR#                  SS5      nU R+                  U5      nU R-                  U5      n[/        5       =(       d    [1        U 5      nU R2                   H  nU(       a  Xa4-   n[        R4                  " / 5      nU R6                  =(       a    UU R                  R8                  :  nU(       a  U(       a  U" XUS9nUS   nU(       a  SnU(       d  M}  UWS   4-   nM     U(       a  Xa4-   nU R;                  U5      nUR                  S   U:  a3  [<        R>                  RA                  USSSXR                  S   -
  45      nU(       d  [C        S XU4 5       5      $ [E        UUUS9$ )Nr   r   r   flash_attention_2        r   device      ?)dtype.)attention_maskoutput_attentionsNNc              3   ,   #    U H  oc  M  Uv   M     g 7frW   r   ).0vs     r&   	<genexpr>%SEWEncoder.forward.<locals>.<genexpr>   s     m$[q$[s   	last_hidden_staterX   
attentions)#	unsqueezerepeatshaperS   _attn_implementationlongsumrC   torcharanger   viewexpandtor   finfomin	transposer   r   ri   r   r   r   r	   r   randtraining	layerdropr   r   
functionalpadtupler
   )rR   rX   r   r   output_hidden_statesreturn_dictall_hidden_statesall_self_attentionsexpand_attention_maskinput_lengthsoutput_lengthsmax_encoder_lengthattention_idsn_input_timestepsposition_embeddingspooled_hidden_states
min_lengthsynced_gpuslayerdropout_probabilityskip_the_layerlayer_outputss                         r&   rY   SEWEncoder.forward   s    #7BD$5b4%$2$<$<R$@$G$G1mNaNabcNd$e!{{//3FF8;454B4NSTXfSfmq 9<45!/!4!4!6 ; ;B ?!.++2L2L!L%2%8%8%;t{{?Y?Y%Y"LL$6?T?TUT!R[VN003R8 
 #02E2Eb!2L"L!R!R!T "%~atQ6F'G'J'JQ^QdQd'J'e!e!/%++m>Q>Q2R2V2V!V!/!6!6"((+Q0D0DR0H.J^J^_aJb" *//2%//15"11-@#yy7,11"57K7P7PQS7TU
,S+:+-=>ATUXZe[eZeUeAff%//156]302R6LT6R[[E#$58H$H! #(**R.!]]Z/BT[[EZEZ/ZN![ %!Te! !.a 0 ,  &9]1=M<O&O#' !*   14D Dm4q!$55MM--maAGX[n[nop[qGq=rsMm]GZ$[mmm++*
 	
r%   )rS   r   r   r   r   r   r   r   )NFFTr[   r]   s   @r&   r   r      s"    	, "W
 W
r%   r   c                       \ rS rSr% \\S'   SrSrSrSr	Sr
SrS rS\\R                  \4   4S	 jrS
\S\R                  4S jrSrg)SEWPreTrainedModeli  rS   sewinput_valuesTFc           
         [        U[        5      (       a  [        R                  R	                  UR
                  R                  SS[        R                  " SUR
                  R                  S   UR
                  R                  -  -  5      -  S9  [        R                  R                  UR
                  R                  S5        GO)[        U[        R                  5      (       a:  UR                  R                  R	                  SU R                  R                   S9  GO[        U[        R"                  [        R$                  45      (       aK  UR                  R                  R'                  5         UR                  R                  R)                  S5        GOV[        U[        R*                  5      (       Ga6  [-        5       (       a  SSKn[1        US5      (       a~  [1        US	5      (       am  UR2                  R5                  UR6                  UR8                  /SS
9   [        R                  R;                  UR                  R                  5        SSS5        OUR2                  R5                  UR                  SS
9   [        R                  R;                  UR                  R                  5        SSS5        O3[        R                  R;                  UR                  R                  5        [        U[        R                  [        R*                  45      (       a3  UR                  b%  UR                  R                  R'                  5         ggg! , (       d  f       Nq= f! , (       d  f       N= f)zInitialize the weightsr   r   r   )meanstdr   r   NrM   rL   r7   )
isinstancer/   r   initnormal_rD   r9   mathsqrtr2   in_channels	constant_biasrd   datarS   initializer_ranger   	GroupNormzero_fill_r?   r   rG   rF   rH   rI   rM   rL   kaiming_normal_)rR   modulerG   s      r&   _init_weights SEWPreTrainedModel._init_weights  sB   f899GGOO""		!v{{'>'>q'AFKKD[D['["\]]  
 GGfkk..2		** MM&&CT[[5R5R&Sr|| <==KK""$MM$$S)		**)++ 6:..76:3N3N"::FOOV__;]mn:o//0B0BC po #::6==XY:Z//0B0BC [Z ''(:(:;fryy"))455&++:QKK""$ ;R5 po [Zs   4M 4M$
M!$
M2r   c                     S n[        U R                  R                  U R                  R                  5       H  u  p4U" XU5      nM     U$ )z8
Computes the output length of the convolutional layers
c                 8    [         R                  " X-
  USS9S-   $ )Nfloor)rounding_moder   )r   div)input_lengthr2   r5   s      r&   _conv_out_lengthMSEWPreTrainedModel._get_feat_extract_output_lengths.<locals>._conv_out_length3  s      99\7wWZ[[[r%   )ziprS   conv_kernelconv_stride)rR   r   r   r2   r5   s        r&    _get_feat_extract_output_lengths3SEWPreTrainedModel._get_feat_extract_output_lengths.  sG    
	\
 $'t{{'>'>@W@W#XK,]PM $Y r%   feature_vector_lengthr   c                    U R                  UR                  S5      5      R                  [        R                  5      nUR
                  S   n[        R                  " XA4UR                  UR                  S9nSU[        R                  " UR
                  S   UR                  S9US-
  4'   UR                  S/5      R                  S5      R                  S/5      R                  5       nU$ )Nr   r   )r   r   r   r   )r   r   r   r   r   r   zerosr   r   r   flipcumsumbool)rR   r   r   r   
batch_sizes        r&   "_get_feature_vector_attention_mask5SEWPreTrainedModel._get_feature_vector_attention_mask=  s    >>~?Q?QRT?UVYYZ_ZdZde#))!,
/~7K7KTbTiTi
 uv^%9%9!%<^EZEZ[]kno]opq',,bT299"=BBB4HMMOr%   r   N)r    r!   r"   r#   r   __annotations__base_model_prefixmain_input_namesupports_gradient_checkpointing_supports_flash_attn_supports_sdpa_supports_flex_attnr   r   r   
LongTensorintr   r  r$   r   r%   r&   r   r     sg    $O&*#N%@eEDTDTVYDY>Z 
 
]b]m]m 
r%   r   c                   >  ^  \ rS rSrS\4U 4S jjr  SS\R                  S\\R                     S\\R                     4S jjr
\     SS\\R                     S\\R                     S\\R                     S	\\   S
\\   S\\   S\\\4   4S jj5       rSrU =r$ )SEWModeliJ  rS   c                   > [         TU ]  U5        Xl        [        U5      U l        [
        R                  " UR                  S   UR                  S9U l	        UR                  S   UR                  :g  U l        U R                  (       a3  [
        R                  " UR                  S   UR                  5      U l        [
        R                  " UR                  5      U l        UR"                  S:  d  UR$                  S:  aG  [
        R&                  " [(        R*                  " UR                  5      R-                  5       5      U l        [1        U5      U l        U R5                  5         g )Nr   r   r   )r=   r>   rS   rr   feature_extractorr   r   conv_dimr   r   r@   project_featuresrd   feature_projectionr   feat_proj_dropoutfeature_dropoutmask_time_probmask_feature_prob	Parameterr   Tensoruniform_masked_spec_embedr   encoder	post_initrf   s     r&   r>   SEWModel.__init__L  s     !26!:,,vr':@U@UV & 3v7I7I I  &(ii0CVEWEW&XD#!zz&*B*BC  3&&*B*BS*H%'\\%,,v?Q?Q2R2[2[2]%^D"!&) 	r%   rX   mask_time_indicesr   c                    [        U R                  SS5      (       d  U$ UR                  5       u  pEnUb(  U R                  R	                  UR
                  5      X'   OU R                  R                  S:  a  U R                  (       a  [        XE4U R                  R                  U R                  R                  UU R                  R                  S9n[        R                  " X!R                  [        R                  S9nU R                  R	                  UR
                  5      X'   U R                  R                  S:  a  U R                  (       a  [        XF4U R                  R                  U R                  R                   U R                  R"                  S9n[        R                  " XqR                  [        R                  S9nUSS2S4   R%                  SUS5      nSX'   U$ )	z
Masks extracted features along time axis and/or along feature axis according to
[SpecAugment](https://huggingface.co/papers/1904.08779).
apply_spec_augmentTNr   )	mask_probmask_lengthr   	min_masks)r   r   )r#  r$  r%  r   )getattrrS   ri   r  r   r   r  r   r   mask_time_lengthmask_time_min_masksr   tensorr   r  r  mask_feature_lengthmask_feature_min_masksr   )rR   rX   r   r   r  sequence_lengthr@   mask_feature_indicess           r&   _mask_hidden_statesSEWModel._mask_hidden_states`  s    t{{$8$??   4A3E3E3G0
[(/3/E/E/H/HI\I\/]M,[[''!+ 5-++44 KK88-++99! !&->G[G[chcmcm n/3/E/E/H/HI\I\/]M,;;((1,#8)++77 KK;;++<<	$  $)<<0DMaMainisis#t #74#@#G#GO]_#` 23M/r%   r   r   r   r   returnc                 b   Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nU R	                  U5      nUR                  SS5      nU R                  U5      nU R                  (       a  U R                  U5      nU R                  U5      nUb  U R                  UR                  S   U5      nU R                  XS9nU R                  UUUUUS9n	U	S   nU(       d	  U4U	SS -   $ [        UU	R                  U	R                   S9$ )a  
mask_time_indices (`torch.BoolTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Indices to mask extracted features for contrastive loss. When in training mode, model learns to predict
    masked extracted features in *config.proj_codevector_dim* space.
Nr   r   )r   )r   r   r   r   r   r   )rS   r   r   use_return_dictr  r   r   r  r  r  r  r   r.  r  r
   rX   r   )
rR   r   r   r   r   r   r   extract_featuresrX   encoder_outputss
             r&   rY   SEWModel.forward  sR    2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B]11,?+55a;??+;<  #667GH,,-=>%!DD]EXEXYZE[]klN000d,,)/!5# ' 
 (*!#oab&999+)77&11
 	
r%   )rS   r  r  r  r  r   r  r  r   )NNNNN)r    r!   r"   r#   r   r>   r   FloatTensorr   r  r.  r   r  r  r   r   r
   rY   r$   r\   r]   s   @r&   r  r  J  s    y . :>59	,((, $E$5$56, !!1!12	,\  269=,0/3&*3
u||,3
 !.3
 $E$5$56	3

 $D>3
 'tn3
 d^3
 
uo%	&3
 3
r%   r  c                       \ rS rSrSrg)	SEWForCTCi  r   Nr   r   r%   r&   r8  r8    r'   r%   r8  c                       \ rS rSrSrg)SEWForSequenceClassificationi  r   Nr   r   r%   r&   r:  r:    r'   r%   r:  )r8  r:  r  r   )6__doc__r   rx   typingr   r   r   torch.utils.checkpointr   activationsr   integrations.deepspeedr   integrations.fsdpr	   modeling_outputsr
   modeling_utilsr   rE   r   wav2vec2.modeling_wav2vec2r   r   r   r   r   r   r   r   r   r   r   configuration_sewr   _HIDDEN_STATES_START_POSITIONr   r)   r,   Moduler/   rO   ra   rr   ru   r~   r   r   r   r   r  r8  r:  __all__r   r%   r&   <module>rH     sX      "    ! @ 7 / - #    ) !" 	: 		6 		6 	( (V	* 	BII ,	. 	
+ 
	$ 		( 		* 	c
 c
L B B BJ w
! w
 w
t	 		#D 	 Zr%   