
    PhB                       d Z ddlZddlZddlmZ ddlmZmZmZm	Z	 ddl
Z
ddlmc mZ ddl
mZ ddlmZ ddlmZ dd	lmZmZmZ dd
lmZmZ ddlmZmZmZmZ ddlm Z m!Z!m"Z"m#Z#m$Z$m%Z% ddl&m'Z'm(Z(m)Z)  e$jT                  e+      Z,d Z-d Z.d Z/dbdZ0de
jb                  de
jb                  fdZ2e e!d       G d de                     Z3e e!d       G d de                     Z4ee! G d de                     Z5 G d d ejl                        Z7 G d! d"ejl                        Z8 G d# d$ejl                        Z9 G d% d&ejl                        Z: G d' d(ejl                        Z; G d) d*ejl                        Z< G d+ d,ejl                        Z= G d- d.ejl                        Z> G d/ d0ejl                        Z? G d1 d2e      Z@ G d3 d4ejl                        ZA G d5 d6ejl                        ZB G d7 d8ejl                        ZC G d9 d:ejl                        ZD	 	 dcd;ejl                  d<e
jb                  d=e
jb                  d>e
jb                  d?ee
jb                     d@eEdAeEdBee
jb                     fdCZF G dD dEejl                        ZG G dF dGejl                        ZH G dH dIejl                        ZI G dJ dKejl                        ZJ G dL dMejl                        ZK G dN dOe      ZL G dP dQejl                        ZM G dR dSejl                        ZNe! G dT dUe             ZO G dV dWeO      ZP e!dX       G dY dZeO             ZQe! G d[ d\eO             ZRe! G d] d^eO             ZSe! G d_ d`eO             ZTg daZUy)dzPyTorch CLAP model.    N)	dataclass)AnyCallableOptionalUnion)nn   )ACT2FN)GradientCheckpointingLayer)BaseModelOutputBaseModelOutputWithPooling,BaseModelOutputWithPoolingAndCrossAttentions)ALL_ATTENTION_FUNCTIONSPreTrainedModel)apply_chunking_to_forward find_pruneable_heads_and_indicesmeshgridprune_linear_layer)ModelOutputauto_docstringcan_return_tuplefilter_out_non_signature_kwargslogging	torch_int   )ClapAudioConfig
ClapConfigClapTextConfigc                     | j                   \  }}}| dddddddf   j                  dd|d      }|j                  |||z  |      }|S )ae  
    Interpolate data in time domain. This is used to compensate the resolution reduction in downsampling of a CNN.

    Args:
        hidden_states (`torch.FloatTensor` of shape (batch_size, time_length, classes_num)):
            Input hidden states
        ratio (`int`):
            The ratio of the length of the output to the length of the input.
    Nr   )shaperepeatreshape)hidden_statesratio
batch_sizetime_lengthclasses_num	upsampleds         `/var/www/html/saasai/venv/lib/python3.12/site-packages/transformers/models/clap/modeling_clap.pyinterpolater*   +   sX     .;-@-@*ZkaD!m,33Aq%CI!!*kE.A;OI    c                     | j                   \  }}}}| j                  |||z  |||z  ||      } | j                  dddddd      j                         j                  d|||      }|S )aR  
    Returns the resized hidden states. The output shape should be `(batch_size * num_windows, window_size, window_size,
    num_channels)`

    Args:
        hidden_states (`torch.FloatTensor` of shape `(batch_size, height, width, num_channels)`):
            Input hidden states
        window_size (`int`):
            Window size
    r   r   r	            r    viewpermute
contiguous)r#   window_sizer%   heightwidthnum_channelswindowss          r)   window_partitionr:   <   s}     /<.A.A+J|!&&Fk);8Lk[gM ##Aq!Q15@@BGGKYdfrsGNr+   c                     | j                   d   }| j                  d||z  ||z  |||      } | j                  dddddd      j                         j                  d|||      } | S )a  
    Merges windows to produce higher resolution features.
    Args:
        windows (`torch.FloatTensor` of shape `(num_windows * batch_size, window_size, window_size, num_channels)`):
            Input windows
        window_size (`int`):
            Window size
        height (`int`):
            Height of the resized audio
        width (`int`):
            Width of the resized audio
    r0   r   r   r	   r-   r.   r/   r1   )r9   r5   r6   r7   r8   s        r)   window_reverser<   Q   sn     ==$Lll2v4e{6JKYdfrsGooaAq!Q/::<AA"feUabGNr+   c                     | j                  |      j                         }t        j                  |d      j	                  |      |z   |z  }|j                         |z   S )a  
    Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
    are ignored. This is modified from fairseq's `utils.make_positions`.

    Args:
        x: torch.Tensor x:

    Returns: torch.Tensor
    r   dim)neinttorchcumsumtype_aslong)	input_idspadding_idxpast_key_values_lengthmaskincremental_indicess        r)   "create_position_ids_from_input_idsrK   e   sW     <<$((*D <<!4<<TBE[[_cc##%33r+   logitsreturnc                     t        j                  t        |       | j                        }t        j
                  j                  | |      S )Ndevice)rB   arangelenrP   r   
functionalcross_entropy)rL   labelss     r)   contrastive_lossrV   w   s1    \\#f+fmm<F==&&vv66r+   ze
    Base class for text model's outputs that also contains a pooling of the last hidden states.
    )custom_introc                       e Zd ZU dZdZeej                     ed<   dZ	eej                     ed<   dZ
eeej                  df      ed<   dZeeej                  df      ed<   y)ClapTextModelOutputz
    text_embeds (`torch.FloatTensor` of shape `(batch_size, output_dim)` *optional* returned when model is initialized with `with_projection=True`):
        The text embeddings obtained by applying the projection layer to the pooler_output.
    Ntext_embedslast_hidden_state.r#   
attentions)__name__
__module____qualname____doc__rZ   r   rB   FloatTensor__annotations__r[   r#   tupler\    r+   r)   rY   rY   |   sr    
 04K%++,359x 1 129=AM8E%"3"3S"89:A:>Ju00#567>r+   rY   zT
    ClapAudio model output to mimic the output of the original implementation.
    c                       e Zd ZU dZdZeej                     ed<   dZ	eej                     ed<   dZ
eeej                  df      ed<   dZeeej                  df      ed<   y)ClapAudioModelOutputz
    audio_embeds (`torch.FloatTensor` of shape `(batch_size, hidden_size)`):
        The Audio embeddings obtained by applying the projection layer to the pooler_output.
    Naudio_embedsr[   .r#   r\   )r]   r^   r_   r`   rg   r   rB   ra   rb   r[   r#   rc   r\   rd   r+   r)   rf   rf      sr    
 15L(5,,-459x 1 129=AM8E%"3"3S"89:A:>Ju00#567>r+   rf   c                      e Zd ZU dZdZeej                     ed<   dZ	eej                     ed<   dZ
eej                     ed<   dZeej                     ed<   dZeej                     ed<   dZeed<   dZeed	<   d
ee   fdZy)
ClapOutputa  
    loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `return_loss` is `True`):
        Contrastive loss for audio-text similarity.
    logits_per_audio (`torch.FloatTensor` of shape `(audio_batch_size, text_batch_size)`):
        The scaled dot product scores between `audio_embeds` and `text_embeds`. This represents the audio-text
        similarity scores.
    logits_per_text (`torch.FloatTensor` of shape `(text_batch_size, audio_batch_size)`):
        The scaled dot product scores between `text_embeds` and `audio_embeds`. This represents the text-audio
        similarity scores.
    text_embeds (`torch.FloatTensor` of shape `(batch_size, output_dim`):
        The text embeddings obtained by applying the projection layer to the pooled output of [`ClapTextModel`].
    audio_embeds (`torch.FloatTensor` of shape `(batch_size, output_dim`):
        The audio embeddings obtained by applying the projection layer to the pooled output of [`ClapAudioModel`].
    text_model_output (`BaseModelOutputWithPooling`):
        The output of the [`ClapTextModel`].
    audio_model_output (`BaseModelOutputWithPooling`):
        The output of the [`ClapAudioModel`].
    Nlosslogits_per_audiologits_per_textrZ   rg   text_model_outputaudio_model_outputrM   c                 H     t         fd j                         D              S )Nc              3   d   K   | ]'  }|d vr|   nt        |      j                          ) yw))rm   rn   N)getattrto_tuple).0kselfs     r)   	<genexpr>z&ClapOutput.to_tuple.<locals>.<genexpr>   s=      
   KKDGQXY]_`QaQjQjQll s   -0)rc   keysru   s   `r)   rr   zClapOutput.to_tuple   s#     
YY[
 
 	
r+   )r]   r^   r_   r`   rj   r   rB   ra   rb   rk   rl   rZ   rg   rm   r   rn   rc   r   rr   rd   r+   r)   ri   ri      s    & )-D(5$$
%,48hu001837OXe//07/3K%++,304L(5,,-448185929
%* 
r+   ri   c                   *     e Zd ZdZd fd	Zd Z xZS )ClapDropPathz
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). This is a slightly
    refactored version of the `SwinDropPath` implementation.
    c                 0    t         |           || _        y N)super__init__	drop_prob)ru   r   	__class__s     r)   r~   zClapDropPath.__init__   s    "r+   c                 J   | j                   dk(  s| j                  s|S d| j                   z
  }|j                  d   fd|j                  dz
  z  z   }|t	        j
                  ||j                  |j                        z   }|j                          |j                  |      |z  }|S )N        r   r   )r   dtyperP   )
r   trainingr    ndimrB   randr   rP   floor_div)ru   r#   	keep_probr    random_tensoroutputs         r)   forwardzClapDropPath.forward   s    >>S   &	$$Q')DM4F4F4J,KK!EJJuM<O<OXeXlXl$mm""9-=r+   r|   )r]   r^   r_   r`   r~   r   __classcell__r   s   @r)   rz   rz      s    
#r+   rz   c                   .     e Zd ZdZdef fdZd Z xZS )ClapAudioAFFBlockz
    ATTENTIONAL FEATURE FUSION Block from CLAP, since in CLAP we are always in 2D mode, it is not needed to implement
    the 1D version.
    configc                    t         |           |j                  }|j                  }t	        ||z        }t        j                  t        j                  ||ddd      t        j                  |      t        j                  d      t        j                  ||ddd      t        j                  |            | _
        t        j                  t        j                  d      t        j                  ||ddd      t        j                  |      t        j                  d      t        j                  ||ddd      t        j                  |            | _        t        j                         | _        y )Nr   r   kernel_sizestridepaddingT)inplace)r}   r~   patch_embeds_hidden_sizeaff_block_rrA   r   
SequentialConv2dBatchNorm2dReLU	local_attAdaptiveAvgPool2d
global_attSigmoidsigmoid)ru   r   channelsdownsize_ratiointer_channelsr   s        r)   r~   zClapAudioAFFBlock.__init__   s   22++X78IIhAaQRSNN>*GGD!IInhAaQRSNN8$
 --  #IIhAaQRSNN>*GGD!IInhAaQRSNN8$
 zz|r+   c                     ||z   }| j                  |      | j                  |      z   }| j                  |      }d|z  |z  d|z  d|z
  z  z   }|S )Nr-   r   )r   r   r   )ru   r#   residualattention_inputfused_layer_outputr   s         r)   r   zClapAudioAFFBlock.forward   sb    '(2!^^O<t?__!\\*<=]"%77!h,!N`J`:aar+   r]   r^   r_   r`   r   r~   r   r   r   s   @r)   r   r      s    
$ $0r+   r   c                   0     e Zd ZdZdef fdZddZ xZS )ClapAudioPatchEmbedz
    This module converts the hidden states reshaped as an image to patch embeddings ready to be passed to the
    Transformer block.
    r   c                    t         |           t        |j                  t              r|j                  |j                  fn|j                  }t        |j
                  t              r|j
                  |j
                  fn|j
                  }t        |j                  t              r|j                  |j                  fn|j                  }|| _        || _        |d   |d   z  |d   |d   z  f| _        | j                  d   | j                  d   z  | _	        |j                  | _        |j                  | _        |d   |d   z
  dz  |d   |d   z
  dz  f}| j                  r|j                  dk(  rdnd}t        j                  |j                   |z  |j"                  |||      | _        |j&                  rt        j(                  |j"                        nt        j*                         | _        | j                  rZt/        |      | _        t        j                  |j                   |j"                  |d   |d   dz  f|d   |d   dz  f|      | _        y y )Nr   r   r-   channel_mapr.   r   r	   )r}   r~   
isinstance	spec_sizerA   
patch_sizepatch_strideimg_size	grid_sizenum_patchesflatten_patch_embedsflattenenable_fusionfusion_typer   r   patch_embed_input_channelsr   projenable_patch_layer_norm	LayerNormIdentitynormr   fusion_model
mel_conv2d)ru   r   r   r   r   r   scale_factorr   s          r)   r~   zClapAudioPatchEmbed.__init__  s+   ;EfFVFVX[;\F$$f&6&67bhbrbr6@ARARTW6XV 1 12^d^o^o 	 ;EVEXEXZ]:^V  &"5"56djdwdw 	 !("1+a8(1+VW:XY>>!,t~~a/@@22#11qMLO39JqMLYZO<[`a;ab!//f6H6HM6Yq`aII--<++"
	 FLEcEcBLL!@!@Aikititiv	 1& 9D ii11//']JqMA,=>$Qa1)<=DO r+   c                    | j                   r|d d ddd d d d f   }|j                  \  }}}}|| j                  d   k7  s|| j                  d   k7  r2t        d| d| d| j                  d    d| j                  d    d	      | j	                  |      }|j                  d      }t        |      dkD  r||dd d d d d f   j                         }	|	j                  \  }}}}|	j                  ||z  d||      }	| j                  |	      }	|	j                  \  }
}}}|	j                  |||||      }	|	j                  d      j                         j                  d	      }	|	j                  d      }t        j                  j                  j                  |	d||z
  fd
d      }	| j!                  ||   |	      ||<   |}nx|j                  \  }
}
}}|| j                  d   k7  s|| j                  d   k7  r2t        d| d| d| j                  d    d| j                  d    d	      | j	                  |      }| j                  r!|j                  d      j#                  dd      }| j%                  |      }|S )Nr   r   zInput audio size (*z) doesn't match model (z).r0   )r   r-   r	   r   r.   r	   constantr-   )r   r    r   
ValueErrorr   sizerR   r4   r2   r   r3   r   rB   r   rS   padr   	transposer   )ru   r#   is_longer_idxglobal_hidden_statesr%   r8   r6   r7   output_widthlocal_hidden_states_featureslocal_widths                r)   r   zClapAudioPatchEmbed.forward9  s   #0AaCA#>  7K6P6P3Jfeq))UdmmA6F-F (%8OPTP]P]^_P`Oaabcgcpcpqrcsbttvw  $(99-A#B /44R8L=!A%&3M12q!4K&L&W&W&Y#:M:S:S7
L&%&9&>&>zL?XZ[]cej&k#&*oo6I&J#-@-F-F*8VU&9&>&>z<Yacikp&q#&9&A&A/&R&]&]&_&g&ghi&j#166r:&+hh&9&9&=&='!\K-G)H*VW'# 7;6G6G(79L7$]3 1M"/"5"5Aq&%q))UdmmA6F-F (%8OPTP]P]^_P`Oaabcgcpcpqrcsbttvw  !IIm4M<<)11!4>>q!DM		-0r+   r|   r   r   s   @r)   r   r   	  s    
( (T/r+   r   c                        e Zd Z fdZ	 	 	 ddej
                  deej                     deej                     dee   de	ej
                     f
dZ
 xZS )	ClapAudioSelfAttentionc                    t         |           ||z  dk7  rt        d| d| d      || _        t	        ||z        | _        | j                  | j
                  z  | _        t        |t        j                  j                        r|n||f| _        t        j                  t        j                  d| j                  d   z  dz
  d| j                  d   z  dz
  z  |            | _        t        j"                  | j                  d         }t        j"                  | j                  d         }t        j$                  t'        ||gd            }t        j(                  |d      }|d d d d d f   |d d d d d f   z
  }	|	j+                  ddd      j-                         }	|	d d d d dfxx   | j                  d   dz
  z  cc<   |	d d d d dfxx   | j                  d   dz
  z  cc<   |	d d d d dfxx   d| j                  d   z  dz
  z  cc<   |	j/                  d	      }
| j1                  d
|
       t        j2                  | j                  | j                  |j4                        | _        t        j2                  | j                  | j                  |j4                        | _        t        j2                  | j                  | j                  |j4                        | _        t        j<                  |j>                        | _         y )Nr   The hidden size (6) is not a multiple of the number of attention heads ()r-   r   ij)indexingr0   relative_position_indexbias)!r}   r~   r   num_attention_headsrA   attention_head_sizeall_head_sizer   collectionsabcIterabler5   r   	ParameterrB   zerosrelative_position_bias_tablerQ   stackr   r   r3   r4   sumregister_bufferLinearqkv_biasquerykeyvalueDropoutattention_probs_dropout_probdropout)ru   r   r?   	num_headsr5   coords_hcoords_wcoordscoords_flattenrelative_coordsr   r   s              r)   r~   zClapAudioSelfAttention.__init__m  s   ?a#C5(^_h^iijk  $- #&sY#7 !558P8PP%k;??3K3KLKS^`kRl 	 -/LLKKT--a0014T=M=Ma=P9PST9TUW`a-
)
 << 0 0 34<< 0 0 34Xx&:TJKvq1(At4~aqj7QQ)11!Q:EEG1a D$4$4Q$7!$;; 1a D$4$4Q$7!$;; 1a A(8(8(;$;a$?? "1"5"5b"968OPYYt1143E3EFOO\
99T//1C1C&//ZYYt1143E3EFOO\
zz&"E"EFr+   r#   attention_mask	head_maskoutput_attentionsrM   c                    |j                   \  }}}||d| j                  f}| j                  |      j                  |      j	                  dd      }	| j                  |      j                  |      j	                  dd      }
| j                  |      j                  |      j	                  dd      }t        j                  |	|
j	                  dd            }|t        j                  | j                        z  }| j                  | j                  j                  d         }|j                  | j                  d   | j                  d   z  | j                  d   | j                  d   z  d      }|j                  ddd      j                         }||j!                  d      z   }|r|j                   d   }|j                  ||z  || j"                  ||      }||j!                  d      j!                  d      z   }|j                  d| j"                  ||      }t$        j&                  j)                  |d      }| j+                  |      }|||z  }t        j                  ||      }|j                  dddd      j                         }|j-                         d d | j.                  fz   }|j                  |      }|r||f}|S |f}|S )Nr0   r   r-   r   r>   r	   )r    r   r   r2   r   r   r   rB   matmulmathsqrtr   r   r5   r3   r4   	unsqueezer   r   rS   softmaxr   r   r   )ru   r#   r   r   r   r%   r?   r8   hidden_shapequery_layer	key_layervalue_layerattention_scoresrelative_position_bias
mask_shapeattention_probscontext_layernew_context_layer_shapeoutputss                      r)   r   zClapAudioSelfAttention.forward  s    )6(;(;%
C"CT-E-EFjj/44\BLLQPQRHH]+00>HHAN	jj/44\BLLQPQR !<<Y5H5HR5PQ+dii8P8P.QQ!%!B!B4C_C_CdCdegCh!i!7!<!<Q$"2"21"55t7G7G7JTM]M]^_M`7`bd"
 "8!?!?1a!H!S!S!U+.D.N.Nq.QQ%'--a0J/44j(*d6N6NPSUX   0.2J2J12M2W2WXY2ZZ/44R9Q9QSVX[\ --//0@b/I ,,7  -	9O_kB%--aAq9DDF"/"4"4"6s";t?Q?Q>S"S%**+BC6G=/2 O\M]r+   NNFr]   r^   r_   r~   rB   Tensorr   ra   boolrc   r   r   r   s   @r)   r   r   l  sq    #GP 7;15,16||6 !!2!236 E--.	6
 $D>6 
u||	6r+   r   c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )ClapAudioSelfOutputc                     t         |           t        j                  ||      | _        t        j
                  |j                        | _        y r|   )r}   r~   r   r   denser   r   r   ru   r   r?   r   s      r)   r~   zClapAudioSelfOutput.__init__  s6    YYsC(
zz&"E"EFr+   r#   input_tensorrM   c                 J    | j                  |      }| j                  |      }|S r|   r  r   ru   r#   r  s      r)   r   zClapAudioSelfOutput.forward  s$    

=1]3r+   r]   r^   r_   r~   rB   r  r   r   r   s   @r)   r  r    s2    G
U\\  RWR^R^ r+   r  c                        e Zd Z fdZd Z	 	 	 d	dej                  deej                     deej                     dee	   de
ej                     f
dZ xZS )
ClapAudioAttentionc                     t         |           t        ||||      | _        t	        ||      | _        t               | _        y r|   )r}   r~   r   ru   r  r   setpruned_heads)ru   r   r?   r   r5   r   s        r)   r~   zClapAudioAttention.__init__  s8    *63	;O	)&#6Er+   c                 >   t        |      dk(  ry t        || j                  j                  | j                  j                  | j
                        \  }}t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _	        t        | j                  j                  |d      | j                  _        | j                  j                  t        |      z
  | j                  _        | j                  j                  | j                  j                  z  | j                  _        | j
                  j                  |      | _        y Nr   r   r>   rR   r   ru   r   r   r  r   r   r   r   r   r  r   unionru   headsindexs      r)   prune_headszClapAudioAttention.prune_heads     u:?749900$))2O2OQUQbQb
u
 -TYY__eD		*499==%@		,TYY__eD		.t{{/@/@%QO )-		(E(EE
(R		%"&))"?"?$))B_B_"_		 --33E:r+   r#   r   r   r   rM   c                 j    | j                  ||||      }| j                  |d   |      }|f|dd  z   }|S )Nr   r   ru   r   )ru   r#   r   r   r   self_outputsattention_outputr  s           r)   r   zClapAudioAttention.forward  sG     yy	K\];;|AF#%QR(88r+   r  r]   r^   r_   r~   r&  rB   r  r   ra   r  rc   r   r   r   s   @r)   r  r    st    ";* 7;15,1
||
 !!2!23
 E--.	

 $D>
 
u||	
r+   r  c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )ClapAudioIntermediatec                    t         |           t        j                  |t	        |j
                  |z              | _        t        |j                  t              rt        |j                     | _        y |j                  | _        y r|   )r}   r~   r   r   rA   	mlp_ratior  r   
hidden_actstrr
   intermediate_act_fnr  s      r)   r~   zClapAudioIntermediate.__init__  sa    YYsC(8(83(>$?@
f''-'-f.?.?'@D$'-'8'8D$r+   r#   rM   c                 J    | j                  |      }| j                  |      }|S r|   r  r3  ru   r#   s     r)   r   zClapAudioIntermediate.forward
  &    

=100?r+   r  r   s   @r)   r.  r.    #    9U\\ ell r+   r.  c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )ClapAudioOutputc                     t         |           t        j                  t	        |j
                  |z        |      | _        t        j                  |j                        | _	        y r|   )
r}   r~   r   r   rA   r0  r  r   hidden_dropout_probr   r  s      r)   r~   zClapAudioOutput.__init__  sF    YYs6#3#3c#9:C@
zz&"<"<=r+   r#   rM   c                 J    | j                  |      }| j                  |      }|S r|   r  r6  s     r)   r   zClapAudioOutput.forward  s$    

=1]3r+   r  r   s   @r)   r:  r:    s#    >
U\\ ell r+   r:  c                        e Zd Zd fd	Zd Zd Zd Z	 	 	 ddej                  de	e
e
f   deej                     dee   d	ee   d
e	ej                  ej                  f   fdZ xZS )ClapAudioLayerc                    t         |           |j                  | _        || _        |j                  | _        || _        t        j                  ||j                        | _	        t        |||| j                        | _        |dkD  rt        |      nt        j                         | _        t        j                  ||j                        | _        t!        ||      | _        t%        ||      | _        y )Neps)r5   r   )r}   r~   chunk_size_feed_forward
shift_sizer5   input_resolutionr   r   layer_norm_epslayernorm_beforer  	attentionrz   r   	drop_pathlayernorm_afterr.  intermediater:  r   )ru   r   r?   rE  r   drop_path_raterD  r   s          r)   r~   zClapAudioLayer.__init__  s    '-'E'E$$!-- 0 "Sf6K6K L+FCPTP`P`a9G#9Mn5SUS^S^S`!||CV5J5JK1&#>%fc2r+   c                    t        |      | j                  k  rgt        d      | _        t        j
                  j                         r(t	        j                   t	        j                  |            n
t        |      | _        y y Nr   )minr5   r   rD  rB   jit
is_tracingtensor)ru   rE  s     r)   set_shift_and_window_sizez(ClapAudioLayer.set_shift_and_window_size,  s\     D$4$44'lDO=BYY=Q=Q=S		%,,'789Y\]mYn  5r+   c           	         | j                   dkD  rht        j                  d||df||      }t        d| j                         t        | j                   | j                          t        | j                    d       f}t        d| j                         t        | j                   | j                          t        | j                    d       f}d}|D ]  }	|D ]  }
||d d |	|
d d f<   |dz  }  t        || j                        }|j                  d| j                  | j                  z        }|j                  d      |j                  d      z
  }|j                  |dk7  d      j                  |dk(  d      }|S d }|S )Nr   r   r   r0   r-   g      Yr   )	rD  rB   r   slicer5   r:   r2   r   masked_fill)ru   r6   r7   r   rP   img_maskheight_sliceswidth_slicescountheight_slicewidth_slicemask_windows	attn_masks                r)   get_attn_maskzClapAudioLayer.get_attn_mask4  s   ??Q{{Avua#8fUHa$***+t'''$//)9:t&-M a$***+t'''$//)9:t&-L
 E -#/K@EHQk1<=QJE $0 !.
 ,Hd6F6FGL',,R1A1ADDTDT1TUL$..q1L4J4J14MMI!--i1nfEQQR[_`R`befI  Ir+   c                     | j                   || j                   z  z
  | j                   z  }| j                   || j                   z  z
  | j                   z  }ddd|d|f}t        j                  j                  ||      }||fS rN  )r5   r   rS   r   )ru   r#   r6   r7   	pad_right
pad_bottom
pad_valuess          r)   	maybe_padzClapAudioLayer.maybe_padP  s    %%0@0@(@@DDTDTT	&&$2B2B)BBdFVFVV
Ay!Z8
))-Dj((r+   r#   input_dimensionsr   r   always_partitionrM   c                    |s| j                  |       n	 |\  }}|j                         \  }}	}
|}| j                  |      }|j                  ||||
      }| j	                  |||      \  }}|j
                  \  }	}}}	| j                  dkD  r1t        j                  || j                   | j                   fd      }n|}t        || j                        }|j                  d| j                  | j                  z  |
      }| j                  |||j                  |j                        }| j                  ||||      }|d   }|j                  d| j                  | j                  |
      }t        || j                  ||      }| j                  dkD  r/t        j                  || j                  | j                  fd      }n|}|d   dkD  xs |d   dkD  }|r|d d d |d |d d f   j!                         }|j                  |||z  |
      }|| j#                  |      z   }| j%                  |      }| j'                  |      }|| j)                  |      z   }|r	||d	   f}|S |f}|S )
Nr   )r   r-   )shiftsdimsr0   r   )r   r	   r/   r   )rS  r   rG  r2   rd  r    rD  rB   rollr:   r5   r_  r   rP   rH  r<   r4   rI  rJ  rK  r   )ru   r#   re  r   r   rf  r6   r7   r%   r   r   shortcutrc  
height_pad	width_padshifted_hidden_stateshidden_states_windowsr^  attention_outputsr+  attention_windowsshifted_windows
was_paddedlayer_outputlayer_outputss                            r)   r   zClapAudioLayer.forwardW  s     **+;<("/"4"4"6
Ax --m<%**:vuhO %)NN=&%$P!z&3&9&9#:y!??Q$)JJ}tFVY]YhYhXhEipv$w!$1! !11FHXHX Y 5 : :2t?O?ORVRbRb?bdl m&&	)<)<EZEaEa ' 
	 !NN!9iK\ + 
 -Q/,11"d6F6FHXHXZbc():D<L<LjZcd ??Q %

?DOOUYUdUdCelr s /]Q&;*Q-!*;
 1!WfWfufa2G H S S U-22:v~xX 4>>2C#DD++M:((6$t{{<'@@@Q'8';< YeWfr+   )r   r   NFF)r]   r^   r_   r~   rS  r_  rd  rB   r  rc   rA   r   ra   r  r   r   r   s   @r)   r?  r?    s    38) 26,1+0A||A  S/A E--.	A
 $D>A #4.A 
u||U\\)	*Ar+   r?  c                        e Zd Z fdZ	 	 	 d	dej
                  deeef   deej                     dee
   dee
   deej
                     fdZ xZS )
ClapAudioStagec                 h   t         	|           || _        || _        t	        j
                  t        |      D cg c]-  }t        ||||||   |dz  dk(  rdn|j                  dz        / c}      | _	        |& |||t        j                        | _        d| _        y d | _        d| _        y c c}w )Nr-   r   )r   r?   rE  r   rL  rD  )r?   
norm_layerF)r}   r~   r   r?   r   
ModuleListranger?  r5   blocksr   
downsamplepointing)
ru   r   r?   rE  depthr   rI  r~  ir   s
            r)   r~   zClapAudioStage.__init__  s    mm u
 &A !%5'#,Q<%&UaZqf6H6HA6M &

 !()9sr||\DO  #DO'
s   2B/r#   re  r   r   rf  rM   c                    |\  }}t        | j                        D ]  \  }}	|||   nd }
 |	|||
||      }|d   }! |}| j                  )|dz   dz  |dz   dz  }}||||f}| j                  ||      }n||||f}|||f}|r|dd  z  }|S )Nr   r   r-   )	enumerater}  r~  )ru   r#   re  r   r   rf  r6   r7   r  layer_modulelayer_head_maskru  !hidden_states_before_downsamplingheight_downsampledwidth_downsampledoutput_dimensionsstage_outputss                    r)   r   zClapAudioStage.forward  s     )(5OA|.7.CilO(/BSUeM *!,M  6 -:)??&5;aZA4EPQ	VWGW 1!'0BDU V OO,MO_`M!' >&(IK\]]12..Mr+   rv  )r]   r^   r_   r~   rB   r  rc   rA   r   ra   r  r   r   r   s   @r)   rx  rx    sz    < 26,1+0||  S/ E--.	
 $D> #4. 
u||	r+   rx  c            	            e Zd ZdZej
                  fdee   dedej                  ddf fdZ	d Z
d	ej                  d
eeef   dej                  fdZ xZS )ClapAudioPatchMerginga'  
    Patch Merging Layer.

    Args:
        input_resolution (`tuple[int]`):
            Resolution of input feature.
        dim (`int`):
            Number of input channels.
        norm_layer (`nn.Module`, *optional*, defaults to `nn.LayerNorm`):
            Normalization layer class.
    rE  r?   rz  rM   Nc                     t         |           || _        || _        t	        j
                  d|z  d|z  d      | _         |d|z        | _        y )Nr.   r-   Fr   )r}   r~   rE  r?   r   r   	reductionr   )ru   rE  r?   rz  r   s       r)   r~   zClapAudioPatchMerging.__init__  sI     01s7AG%@q3w'	r+   c                     |dz  dk(  xs |dz  dk(  }|r.ddd|dz  d|dz  f}t         j                  j                  ||      }|S )Nr-   r   r   )r   rS   r   )ru   input_featurer6   r7   
should_padrc  s         r)   rd  zClapAudioPatchMerging.maybe_pad  sU    qjAo:519>
Q519a!<JMM--mZHMr+   r  re  c                    |\  }}|j                   \  }}}|j                  ||||      }| j                  |||      }|d d dd ddd dd d f   }|d d dd ddd dd d f   }	|d d dd ddd dd d f   }
|d d dd ddd dd d f   }t        j                  ||	|
|gd      }|j                  |dd|z        }| j                  |      }| j                  |      }|S )Nr   r-   r   r0   r.   )r    r2   rd  rB   catr   r  )ru   r  re  r6   r7   r%   r?   r8   input_feature_0input_feature_1input_feature_2input_feature_3s               r)   r   zClapAudioPatchMerging.forward  s   ((5(;(;%
C%**:vulS}feD'14a4Aq(89'14a4Aq(89'14a4Aq(89'14a4Aq(89		?O_Ve"fhjk%**:r1|;KL		-0}5r+   )r]   r^   r_   r`   r   r   rc   rA   Moduler~   rd  rB   r  r   r   r   s   @r)   r  r    sr    
 XZWcWc (s (# (299 (hl (U\\ U3PS8_ Y^YeYe r+   r  c                        e Zd Z fdZd Z	 	 	 	 	 	 	 ddeej                     deej                     dee   dee   dee   dee   d	ee   d
e	e
ef   fdZ xZS )ClapAudioEncoderc                    t         |           t        |j                        | _        || _        t        |      | _        |j                  | _        | j                  j                  | _	        |j                  | _
        |j                  |j                  z  | _        t        |j                  d| j                  dz
  z  z        | _        t!        j"                  d|j$                  t'        |j                        d      D cg c]  }|j)                          }}| j                  j*                  }t-        | j                        D cg c]  }|d   d|z  z  |d   d|z  z  f c}| _        t1        j2                  t-        | j                        D cg c]  }t5        |t        |j                  d|z  z        | j.                  |   |j                  |   |j6                  |   |t'        |j                  d |       t'        |j                  d |dz           || j                  dz
  k  rt8        nd        c}      | _        d| _        t1        j>                  |j                        | _         t1        jB                  | j                        | _"        |j                  | _        t1        jF                  d      | _$        y c c}w c c}w c c}w )Nr-   r   r   cpurO   )r   r?   rE  r  r   rI  r~  F)%r}   r~   rR   depths
num_layersr   r   patch_embedr   r   r   num_mel_bins
freq_ratiorA   r   num_featuresrB   linspacerL  r   itemr   r|  input_resolutionsr   r{  rx  r   r  layersgradient_checkpointingr   
batch_normr   r   AdaptiveAvgPool1davgpool)ru   r   xrL  r   r  i_layerr   s          r)   r~   zClapAudioEncoder.__init__  sg   fmm,.v6#11 ,,99)) **f.A.AA ? ?!Z[H[B\ \],1NN1f>S>SUXY_YfYfUgpu,vw,vq!&&(,vw$$..	\abfbqbq\r!s\rWX9Q<AqD#99Q<AqD;Q"R\r!smm  %T__5  6G !F;;ajHI%)%;%;G%D --0$88A,Sx1H-ICPVP]P]^k`gjk`kPlLmn9@4??UVCV9V4]a  6
 ',#..)<)<=LL!2!23	mm++A.3 x "ts   J<KB#Kc                    |j                   \  }}}}t        | j                  | j                  z        }| j                  | j                  z  }||kD  s||kD  rt	        d      ||k  r%t
        j                  j                  |||fdd      }||k  r%t
        j                  j                  |||fdd      }|j                   \  }}}	}
|j                  ||| j                  z  |	| j                  z  |
      }|j                  dddd      j                         }|j                  |||
| j                  z  |	| j                  z        }|S )	z
        The input is 4 normalized log mel spectrograms. It is reshape to the common shape of images. Each channel
        should represent 1 of the 4 crops of the spectrogram. For more details, refer to the [`ClapFeatureExtractor`].
        z@the wav size should be less than or equal to the swin input sizebicubicT)modealign_cornersr   r   r	   r-   )r    rA   r   r  r   r   rS   r*   r"   r3   r4   )ru   normalized_input_featuresr   r&   freq_length
spec_widthspec_heightbatchr   timefreqs              r)   reshape_mel2imgz ClapAudioEncoder.reshape_mel2img8  s`   
 *C)H)H&1k;$//9:
nn7#{['@_`` #(*(A(A)J+D9dh )B )% $(*(A(A)K+EIei )B )% '@&E&E#xt %>$E$E8doo-tt/F%
! %>$E$EaAq$Q$\$\$^!$=$E$E8TDOO3TT__5L%
! )(r+   	is_longerr   r   output_hidden_states(output_hidden_states_before_downsamplingrf  return_dictrM   c	                 &   |j                  dd      }| j                  |      }	|	j                  dd      }	d }
| j                  r6|j                  |j                        }t        j                  |dk(        d   }
| j                  |	      }|j                  d   }| j                  ||
      }|rdnd }|rdnd }|rdnd }| j                  d   }|rE|j                  \  }}} |j                  |g|| }|j                  dddd      }||fz  }||fz  }t        | j                        D ]  \  }}|||   nd }| j                  |   } ||||||      }|d   }|d   }|d   }|d   |d   f}|rP|rN|j                  \  }}} |j                  |g|d   |d   f| }|j                  dddd      }||fz  }||fz  }nI|rG|sE|j                  \  }}} |j                  |g|| }|j                  dddd      }||fz  }||fz  }|s||dd  z  } | j                  |      }|j                  \  }}}|dt!        | j"                        dz
  z  z  | j$                  d   z  }|dt!        | j"                        dz
  z  z  | j$                  d   z  }|j                  ddd      j'                         j)                  ||||      }|j                  \  }}} }!| | j*                  z  }"|j)                  ||| |"z  |"|!      }|j                  ddddd      j'                         j)                  |||"d      }| j-                  t        j.                  |d            }#t        j.                  |#d      }#|st1        d	 ||#||fD              S t3        ||#||
      S )Nr   r	   r   r-   rd   r   r0   r.   c              3   $   K   | ]  }|| 
 y wr|   rd   )rs   vs     r)   rv   z+ClapAudioEncoder.forward.<locals>.<genexpr>  s#      	A = s   r[   pooler_outputr#   r\   )r   r  r   torP   rB   wherer  r    r  r  r2   r3   r  r  r   rR   r  r   r4   r"   r  r  r   rc   r   )$ru   input_featuresr  r   r   r  r  rf  r  r  is_longer_list_idxis_longer_listr#   
frames_numall_hidden_statesall_reshaped_hidden_statesall_self_attentionsre  r%   r   hidden_sizereshaped_hidden_stater  r  r  ru  r  r  r[   
n_channels
freq_shapetemporal_shapen_frequenciesn_temp
c_freq_binlatent_outputs$                                       r)   r   zClapAudioEncoder.forward\  s{    (11!Q7$(OON$C!$=$G$G1$M!!&\\.*?*?@N!&^q-@!A!!D,,-FG"((+
((8JK"6BD+?RT"$5b411!4)6)<)<&J;$6M$6$6z$bDT$bVa$b!$9$A$A!Q1$M!-!11&+@*BB&(5OA|.7.CilO#55a8(/BSUeM *!,M0=a0@- -a 0 1" 57H7LM#(P-N-T-T*
A{ )O(I(N(N)"3A"68I!8L!M)OZ)% )>(E(EaAq(Q%!&G%II!*/D.FF*%.V-:-@-@*
A{(:(:(::(fHX(fZe(f%(=(E(EaAq(Q%!m%55!*/D.FF* #}QR'88#G  6J !IIm4$5$;$;!
AzA#dkk*:Q*>$?@DDUDUVWDXX
#c$++.>.B(CDHYHYZ[H\\ %%aA.99;CCJPZ\fhvw 	 9J8O8O5
Jv"doo5
-55
MZ$?V
 %%aAq!4??AII*V`blnpq 	 U]]3Da%HImQ7 	 &!.'		 	 	 */'4*	
 	
r+   )NNFFFFT)r]   r^   r_   r~   r  r   rB   ra   r  r   rc   rf   r   r   r   s   @r)   r  r    s    &/P")N 2615,1/4CH+0&*u
 E--.u
 E--.	u

 $D>u
 'tnu
 3;4.u
 #4.u
 d^u
 
u**	+u
r+   r  c                   4     e Zd Zdeeef   f fdZd Z xZS )ClapProjectionLayerr   c                     t         |           || _        |j                  }|j                  }t        j                  ||      | _        t        |j                     | _
        t        j                  ||      | _        y r|   )r}   r~   r   r  projection_dimr   r   linear1r
   projection_hidden_act
activationlinear2)ru   r   r  r  r   s       r)   r~   zClapProjectionLayer.__init__  sa    ((..yyn= !=!=>yy@r+   c                 l    | j                  |      }| j                  |      }| j                  |      }|S r|   )r  r  r  r6  s     r)   r   zClapProjectionLayer.forward  s2    ]36]3r+   )	r]   r^   r_   r   r   r   r~   r   r   r   s   @r)   r  r    s     Au_n%DE Ar+   r  c                   2     e Zd ZdZ fdZ	 ddZd Z xZS )ClapTextEmbeddingszV
    Same as BertEmbeddings with a tiny tweak for positional embeddings indexing.
    c                    t         |           t        j                  |j                  |j
                  |j                        | _        t        j                  |j                  |j
                        | _	        t        j                  |j                  |j
                        | _        t        j                  |j
                  |j                        | _        t        j                  |j                        | _        t#        |dd      | _        | j'                  dt)        j*                  |j                        j-                  d      d       | j'                  d	t)        j.                  | j0                  j3                         t(        j4                  
      d       |j                  | _        t        j                  |j                  |j
                  | j6                        | _	        y )N)rG   rA  position_embedding_typeabsoluteposition_ids)r   r0   T)
persistenttoken_type_ids)r   )r}   r~   r   	Embedding
vocab_sizer  pad_token_idword_embeddingsmax_position_embeddingsposition_embeddingstype_vocab_sizetoken_type_embeddingsr   rF  r   r<  r   rq   r  r   rB   rQ   expandr   r  r   rE   rG   ru   r   r   s     r)   r~   zClapTextEmbeddings.__init__  si   !||F,=,=v?Q?Q_e_r_rs#%<<0N0NPVPbPb#c %'\\&2H2H&J\J\%]" f&8&8f>S>STzz&"<"<='.v7PR\']$ELL)G)GHOOPWXei 	 	
 	ekk$*;*;*@*@*B%**Ubf 	 	

 "..#%<<**F,>,>DL\L\$
 r+   c                    |+|t        || j                  |      }n| j                  |      }||j                         }n|j                         d d }|d   }|st	        | d      r-| j
                  d d d |f   }|j                  |d   |      }	|	}n:t        j                  |t        j                  | j                  j                        }|| j                  |      }| j                  |      }
||
z   }| j                  dk(  r| j                  |      }||z  }| j!                  |      }| j#                  |      }|S )Nr0   r   r  r   r   r  )rK   rG   &create_position_ids_from_inputs_embedsr   hasattrr  r  rB   r   rE   r  rP   r  r  r  r  r   r   )ru   rF   r  r  inputs_embedsrH   input_shape
seq_lengthbuffered_token_type_ids buffered_token_type_ids_expandedr  
embeddingsr  s                r)   r   zClapTextEmbeddings.forward  sR    $A)TM]M]_uv#JJ=Y #..*K',,.s3K ^

 !t-.*.*=*=a*n*M'3J3Q3QR]^_R`bl3m0!A!&[

SWSdSdSkSk!l  00;M $ : :> J"%::
'':5"&":":<"H--J^^J/
\\*-
r+   c                    |j                         dd }|d   }t        j                  | j                  dz   || j                  z   dz   t        j                  |j
                        }|j                  d      j                  |      S )z
        We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.

        Args:
            inputs_embeds: torch.Tensor

        Returns: torch.Tensor
        Nr0   r   r   r   )r   rB   rQ   rG   rE   rP   r   r  )ru   r  r  sequence_lengthr  s        r)   r  z9ClapTextEmbeddings.create_position_ids_from_inputs_embeds.  s     $((*3B/%a.||q /D4D4D"Dq"HPUPZPZcpcwcw
 %%a(//<<r+   )NNNNr   )r]   r^   r_   r`   r~   r   r  r   r   s   @r)   r  r    s    

4 rs&P=r+   r  moduler   r   r   r   scalingr   r   c                 .   t        j                  ||j                  dd            |z  }	|#|d d d d d d d |j                  d   f   }
|	|
z   }	t        j
                  j                  |	dt         j                        j                  |j                        }	t        j
                  j                  |	|| j                        }	||	|j                  dddd      z  }	t        j                  |	|      }|j                  dd      j                         }||	fS )Nr-   r	   r   r0   )r?   r   )pr   r   )rB   r   r   r    r   rS   r   float32r  r   r   r   r2   r4   )r  r   r   r   r   r  r   r   kwargsattn_weightscausal_maskattn_outputs               r)   eager_attention_forwardr  A  s     <<s}}Q':;gEL!$Q1o		"o%=>#k1==((2U]](SVVW\WbWbcL==((6??([L#innQAq&AA,,|U3K''1-88:K$$r+   c                        e Zd Z fdZ	 	 	 ddej
                  deej                     deej                     dee   de	ej
                     f
dZ
 xZS )	ClapTextSelfAttentionc                 $   t         |           |j                  |j                  z  dk7  r2t	        |d      s&t        d|j                   d|j                   d      || _        |j                  | _        t        |j                  |j                  z        | _        | j                  | j                  z  | _	        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                         | _        |j                   | _        | j                  dz  | _        y )Nr   embedding_sizer   r   r         )r}   r~   r  r   r  r   r   rA   r   r   r   r   r   r   r   r   r   r   attention_dropoutr  r  s     r)   r~   zClapTextSelfAttention.__init__^  sC    : ::a?PVXhHi#F$6$6#7 8 445Q8 
 #)#=#= #&v'9'9F<V<V'V#W !558P8PPYYv1143E3EF
99V//1C1CDYYv1143E3EF
zz&"E"EF!'!D!D//5r+   r#   r   r   r   rM   c                    |j                   d d }g |d| j                  }| j                  |      j                  |      j	                  dd      }| j                  |      j                  |      j	                  dd      }	| j                  |      j                  |      j	                  dd      }
t        }| j                  j                  dk7  rt        | j                  j                     } || ||	|
|f| j                  sdn| j                  | j                  |d|\  }} |j                  g |d j                         }|r||f}|S |f}|S )Nr0   r   r-   eagerr   )r   r  r   )r    r   r   r2   r   r   r   r  r   _attn_implementationr   r   r	  r  r"   r4   )ru   r#   r   r   r   r  r  r  query_states
key_statesvalue_statesattention_interfacer  r   r  s                  r)   r   zClapTextSelfAttention.forwards  sa    $))#2.CCbC$*B*BCzz-055lCMMaQRSXXm,11,?II!QO
zz-055lCMMaQRS(?;;++w6"9$++:Z:Z"[$7
%
  $}}C$2H2HLL
%
 
%
!\ *k));;;;FFH1B;- JUr+   r  r  r   s   @r)   r  r  ]  so    60 7;15,1!||! !!2!23! E--.	!
 $D>! 
u||	!r+   r  c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )ClapTextSelfOutputc                 (   t         |           t        j                  |j                  |j                        | _        t        j                  |j                  |j                        | _        t        j                  |j                        | _
        y NrA  )r}   r~   r   r   r  r  r   rF  r   r<  r   r  s     r)   r~   zClapTextSelfOutput.__init__  s`    YYv1163E3EF
f&8&8f>S>STzz&"<"<=r+   r#   r  rM   c                 r    | j                  |      }| j                  |      }| j                  ||z         }|S r|   r  r   r   r  s      r)   r   zClapTextSelfOutput.forward  7    

=1]3}|'CDr+   r  r   s   @r)   r  r    1    >U\\  RWR^R^ r+   r  c                        e Zd Z fdZd Z	 	 	 d	dej                  deej                     deej                     dee	   de
ej                     f
dZ xZS )
ClapTextAttentionc                     t         |           t        |      | _        t	        |      | _        t               | _        y r|   )r}   r~   r  ru   r  r   r  r  r  s     r)   r~   zClapTextAttention.__init__  s0    )&1	(0Er+   c                 >   t        |      dk(  ry t        || j                  j                  | j                  j                  | j
                        \  }}t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _	        t        | j                  j                  |d      | j                  _        | j                  j                  t        |      z
  | j                  _        | j                  j                  | j                  j                  z  | j                  _        | j
                  j                  |      | _        y r   r!  r#  s      r)   r&  zClapTextAttention.prune_heads  r'  r+   r#   r   r   r   rM   c                 p     | j                   |f|||d|}| j                  |d   |      }|f|dd  z   }|S N)r   r   r   r   r   r)  )	ru   r#   r   r   r   r  r*  r+  r  s	            r)   r   zClapTextAttention.forward  s_     !tyy
)/	

 
  ;;|AF#%QR(88r+   r  r,  r   s   @r)   r  r    st    ";* 7;15,1|| !!2!23 E--.	
 $D> 
u||	r+   r  c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )ClapTextIntermediatec                    t         |           t        j                  |j                  |j
                        | _        t        |j                  t              rt        |j                     | _        y |j                  | _        y r|   )r}   r~   r   r   r  intermediate_sizer  r   r1  r2  r
   r3  r  s     r)   r~   zClapTextIntermediate.__init__  s]    YYv1163K3KL
f''-'-f.?.?'@D$'-'8'8D$r+   r#   rM   c                 J    | j                  |      }| j                  |      }|S r|   r5  r6  s     r)   r   zClapTextIntermediate.forward  r7  r+   r  r   s   @r)   r   r     r8  r+   r   c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )ClapTextOutputc                 (   t         |           t        j                  |j                  |j
                        | _        t        j                  |j
                  |j                        | _        t        j                  |j                        | _        y r  )r}   r~   r   r   r"  r  r  r   rF  r   r<  r   r  s     r)   r~   zClapTextOutput.__init__  s`    YYv779K9KL
f&8&8f>S>STzz&"<"<=r+   r#   r  rM   c                 r    | j                  |      }| j                  |      }| j                  ||z         }|S r|   r  r  s      r)   r   zClapTextOutput.forward  r  r+   r  r   s   @r)   r%  r%    r  r+   r%  c                        e Zd Z fdZ	 	 	 d	dej
                  deej                     deej                     dee   de	ej
                     f
dZ
d Z xZS )
ClapTextLayerc                     t         |           |j                  | _        d| _        t	        |      | _        t        |      | _        t        |      | _	        y )Nr   )
r}   r~   rC  seq_len_dimr  rH  r   rK  r%  r   r  s     r)   r~   zClapTextLayer.__init__  sI    '-'E'E$*6208$V,r+   r#   r   r   r   rM   c                      | j                   |f|||d|}|d   }|dd  }t        | j                  | j                  | j                  |      }	|	f|z   }|S r  )rH  r   feed_forward_chunkrC  r+  )
ru   r#   r   r   r   r  self_attention_outputsr+  r  rt  s
             r)   r   zClapTextLayer.forward  s     "0"
)/	"

 "
 2!4(,0##T%A%A4CSCSUe
  /G+r+   c                 L    | j                  |      }| j                  ||      }|S r|   )rK  r   )ru   r+  intermediate_outputrt  s       r)   r-  z ClapTextLayer.feed_forward_chunk  s,    "//0@A{{#68HIr+   r  )r]   r^   r_   r~   rB   r  r   ra   r  rc   r   r-  r   r   s   @r)   r)  r)    st    - 7;15,1|| !!2!23 E--.	
 $D> 
u||	2r+   r)  c                        e Zd Z fdZe	 	 	 	 	 d
dej                  deej                     deej                     dee	   dee	   dee	   de
eej                     ef   fd	       Z xZS )ClapTextEncoderc                     t         |           || _        t        j                  t        |j                        D cg c]  }t        |       c}      | _        d| _	        y c c}w )NF)
r}   r~   r   r   r{  r|  num_hidden_layersr)  layerr  )ru   r   r  r   s      r)   r~   zClapTextEncoder.__init__  sT    ]]5IaIaCb#cCbaM&$9Cb#cd
&+# $ds   A#r#   r   r   r   r  r  rM   c           	          |rdnd }|rdnd }	t        | j                        D ]4  \  }
}|r||fz   }|||
   nd } |d||||d|}|d   }|s,|	|d   fz   }	6 |r||fz   }t        |||	      S )Nrd   )r#   r   r   r   r   r   )r[   r#   r\   )r  r5  r   )ru   r#   r   r   r   r  r  r  r  r  r  r  r  ru  s                 r)   r   zClapTextEncoder.forward$  s     #7BD$5b4(4OA|#$58H$H!.7.CilO( +-)"3	
 M *!,M &9]1=M<O&O#!  5$   1]4D D++*
 	
r+   )NNFFT)r]   r^   r_   r~   r   rB   r  r   ra   r  r   rc   r   r   r   r   s   @r)   r2  r2    s    ,  7;15,1/4&*&
||&
 !!2!23&
 E--.	&

 $D>&
 'tn&
 d^&
 
uU\\"O3	4&
 &
r+   r2  c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )ClapTextPoolerc                     t         |           t        j                  |j                  |j                        | _        t        j                         | _        y r|   )r}   r~   r   r   r  r  Tanhr  r  s     r)   r~   zClapTextPooler.__init__P  s9    YYv1163E3EF
'')r+   r#   rM   c                 \    |d d df   }| j                  |      }| j                  |      }|S rN  )r  r  )ru   r#   first_token_tensorpooled_outputs       r)   r   zClapTextPooler.forwardU  s6     +1a40

#566r+   r  r   s   @r)   r8  r8  O  s#    $
U\\ ell r+   r8  c                   @    e Zd ZU eed<   dZdZdej                  fdZ	y)ClapPreTrainedModelr   clapFr  c                    | j                   j                  }t        |t              ri|j                  j
                  j                  j                  d|dz         |j                  j
                  j                  j                  d|dz         yt        |t              r|j                  j                  j                  t        j                  | j                   j                               |j                  j                  j                  t        j                  | j                   j                               yt        |t         j"                        r+|j
                  j                  j                  d|dz         yt        |t         j$                  t         j&                  f      rJ|j(                  j                  j+                          |j
                  j                  j                  d       yt        |t         j,                  t         j.                  f      r| j                   j0                  dz  d| j                   j2                  z  dz  z  |z  }t         j4                  j                  |j
                  |       |j(                  %|j(                  j                  j+                          yyt        |t6              r%|j8                  j                  j+                          yy)	zInitialize the weightsr   g{Gz?)meanstdg      ?r  r-   )rC  N)r   initializer_factorr   r  r  weightdatanormal_r  	ClapModellogit_scale_afill_r   loglogit_scale_init_valuelogit_scale_tr   r  r   r   r   zero_r   r   r  r4  initr   r   )ru   r  factorin_proj_stds       r)   _init_weightsz!ClapPreTrainedModel._init_weightsd  s   //f01&&--22::RV:W((//44<<#6TX=<Y	*  %%++DHHT[[5W5W,XY  %%++DHHT[[5W5W,XY-MM&&CVd]&Cr~~ >?KK""$MM$$S)BII 67;;22D8a$++B_B_>_dh=hilrrKGGOOFMM{O;{{&  &&( ' 67//44::< 8r+   N)
r]   r^   r_   r   rb   base_model_prefixsupports_gradient_checkpointingr   r  rR  rd   r+   r)   r?  r?  ^  s$    &+#=BII =r+   r?  c                        e Zd ZU eed<   dZdef fdZdej                  fdZ	e
	 	 	 	 	 ddeej                     deej                     dee   dee   d	ee   deeef   fd
       Z xZS )ClapAudioModelr   r  c                 d    t         |   |       t        |      | _        | j	                          y r|   )r}   r~   r  audio_encoder	post_initr  s     r)   r~   zClapAudioModel.__init__  s'     -f5r+   rM   c                 B    | j                   j                  j                  S r|   )rX  r  r   rx   s    r)   get_input_embeddingsz#ClapAudioModel.get_input_embeddings  s    !!--222r+   r  r   r  r  c                     ||n| j                   j                  }||n| j                   j                  }||n| j                   j                  }| j	                  |||||      S )ae  
        is_longer (`torch.FloatTensor`, of shape `(batch_size, 1)`, *optional*):
            Whether the audio clip is longer than `max_length`. If `True`, a feature fusion will be enabled to enhance
            the features.

        Examples:

        ```python
        >>> from datasets import load_dataset
        >>> from transformers import AutoProcessor, ClapAudioModel

        >>> dataset = load_dataset("hf-internal-testing/ashraq-esc50-1-dog-example")
        >>> audio_sample = dataset["train"]["audio"][0]["array"]

        >>> model = ClapAudioModel.from_pretrained("laion/clap-htsat-fused")
        >>> processor = AutoProcessor.from_pretrained("laion/clap-htsat-fused")

        >>> inputs = processor(audios=audio_sample, return_tensors="pt")

        >>> outputs = model(**inputs)
        >>> last_hidden_state = outputs.last_hidden_state
        ```r  r  r   r  r  )r   use_return_dictr   r  rX  )ru   r  r  r   r  r  s         r)   r   zClapAudioModel.forward  sx    > &1%<k$++B]B]1B1N-TXT_T_TqTq$8$D $++JjJj 	 !!)/!5# " 
 	
r+   NNNNN)r]   r^   r_   r   rb   main_input_namer~   r   r  r[  r   r   rB   ra   
BoolTensorr  r   rc   r   r   r   r   s   @r)   rV  rV  |  s    &O 3bii 3  7;04,0/3&**
 !2!23*
 E,,-*
 $D>	*

 'tn*
 d^*
 
u00	1*
 *
r+   rV  a0  
    The model can behave as an encoder (with only self-attention) as well as a decoder, in which case a layer of
    cross-attention is added between the self-attention layers, following the architecture described in *Attention is
    all you need*_ by Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Lukasz
    Kaiser and Illia Polosukhin.

    To behave as an decoder the model needs to be initialized with the `is_decoder` argument of the configuration set
    to `True`. To be used in a Seq2Seq model, the model needs to initialized with both `is_decoder` argument and
    `add_cross_attention` set to `True`; an `encoder_hidden_states` is then expected as an input to the forward pass.

    .. _*Attention is all you need*: https://huggingface.co/papers/1706.03762
    c                   b    e Zd ZU eed<   d fd	Zd Zd Zee		 	 	 	 	 	 	 	 	 dde
ej                     de
ej                     de
ej                     de
ej                     d	e
ej                     d
e
ej                     de
e   de
e   de
e   deeej                     ef   fd              Z xZS )ClapTextModelr   c                     t         |   |       || _        t        |      | _        t        |      | _        |rt        |      nd| _        | j                          y)zv
        add_pooling_layer (bool, *optional*, defaults to `True`):
            Whether to add a pooling layer
        N)
r}   r~   r   r  r  r2  encoderr8  poolerrY  )ru   r   add_pooling_layerr   s      r)   r~   zClapTextModel.__init__  sM    
 	 ,V4&v.0AnV,t 	r+   c                 .    | j                   j                  S r|   r  r  rx   s    r)   r[  z"ClapTextModel.get_input_embeddings  s    ...r+   c                 &    || j                   _        y r|   ri  ru   r   s     r)   set_input_embeddingsz"ClapTextModel.set_input_embeddings  s    */'r+   rF   r   r  r  r   r  r   r  r  rM   c
                    ||n| j                   j                  }||n| j                   j                  }|	|	n| j                   j                  }	||t	        d      |#| j                  ||       |j                         }
n!||j                         d d }
nt	        d      |
\  }}||j                  n|j                  }|t        j                  ||f|      }|pt        | j                  d      r4| j                  j                  d d d |f   }|j                  ||      }|}n&t        j                  |
t        j                  |      }| j!                  ||
      }| j#                  || j                   j$                        }| j                  ||||      }| j'                  |||||d	      }|d
   }| j(                  | j)                  |      nd }t+        |||j,                  |j.                        S )NzDYou cannot specify both input_ids and inputs_embeds at the same timer0   z5You have to specify either input_ids or inputs_embedsrO   r  r   )rF   r  r  r  T)r   r   r   r  r  r   r  )r   r   r  r^  r   %warn_if_padding_and_no_attention_maskr   rP   rB   onesr  r  r  r  r   rE   get_extended_attention_maskget_head_maskr4  re  rf  r   r#   r\   )ru   rF   r   r  r  r   r  r   r  r  r  r%   r  rP   r  r  extended_attention_maskembedding_outputencoder_outputssequence_outputr=  s                        r)   r   zClapTextModel.forward  s    2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B] ]%>cdd"66y.Q#..*K&',,.s3KTUU!,
J%.%:!!@T@T!"ZZ*j)A6RN!t(89*.//*H*HKZK*X'3J3Q3QR\^h3i0!A!&[

SY!Z 150P0PQ_al0m &&y$++2O2OP	??%)'	 + 
 ,,2/!5 ' 
 *!,8<8OO4UY)-')77&11	
 	
r+   )T	NNNNNNNNN)r]   r^   r_   r   rb   r~   r[  rl  r   r   r   rB   r  r  r   rc   r   r   r   r   s   @r)   rc  rc    s     /0  -11515/3,004,0/3&*G
ELL)G
 !.G
 !.	G

 u||,G
 ELL)G
  -G
 $D>G
 'tnG
 d^G
 
uU\\"$PP	QG
  G
r+   rc  c                   \    e Zd ZU eed<   def fdZ e       e	 	 ddej                  de
ej                     de
ej                     dej                  fd              Z e       e	 	 ddej                  d	e
ej                     de
ej                     dej                  fd
              Zee	 	 	 	 	 	 	 	 	 dde
ej                     de
ej                     d	e
ej                      de
ej                     de
ej                     de
e   de
e   de
e   de
e   deeef   fd              Z xZS )rH  r   c                 .   t         |   |       t        |j                  t              s"t        dt        |j                         d      t        |j                  t              s"t        dt        |j                         d      |j                  }|j                  }t        j                  t        j                  t        j                  |j                                    | _        t        j                  t        j                  t        j                  |j                                    | _        |j$                  | _        t'        |      | _        t+        |      | _        t/        |      | _        t+        |      | _        | j5                          y )NzKconfig.text_config is expected to be of type ClapTextConfig but is of type .zMconfig.audio_config is expected to be of type ClapAudioConfig but is of type )r}   r~   r   text_configr   	TypeErrortypeaudio_configr   r   r   rB   rR  r   rK  rL  rI  rM  r  rc  
text_modelr  text_projectionrV  audio_modelaudio_projectionrY  )ru   r   rz  r}  r   s       r)   r~   zClapModel.__init__.  s=    &,,n=++,-Q0 
 &--?,,-.a1 
 ((**\\%,,txx@]@]7^*_`\\%,,txx@]@]7^*_`$33'42;?),7 3L A 	r+   rF   r   r  rM   c                     | j                  |||      }| j                  |j                        }t        j                  |d      }|S )a  
        Returns:
            text_features (`torch.FloatTensor` of shape `(batch_size, output_dim`): The text embeddings obtained by
            applying the projection layer to the pooled output of [`ClapTextModel`].

        Examples:

        ```python
        >>> import torch
        >>> from transformers import AutoTokenizer, ClapModel

        >>> model = ClapModel.from_pretrained("laion/clap-htsat-unfused")
        >>> tokenizer = AutoTokenizer.from_pretrained("laion/clap-htsat-unfused")

        >>> inputs = tokenizer(["the sound of a cat", "the sound of a dog"], padding=True, return_tensors="pt")
        >>> with torch.inference_mode():
        ...     text_features = model.get_text_features(**inputs)
        ```)rF   r   r  r0   r>   )r~  r  r  F	normalize)ru   rF   r   r  text_outputstext_featuress         r)   get_text_featureszClapModel.get_text_featuresN  sM    4 48??\ 4C 4
 ,,\-G-GHMr:r+   r  r  c                     | j                  ||      }| j                  |j                        }t        j                  |d      }|S )a  
        is_longer (`torch.FloatTensor`, of shape `(batch_size, 1)`, *optional*):
            Whether the audio clip is longer than `max_length`. If `True`, a feature fusion will be enabled to enhance
            the features.

        Returns:
            audio_features (`torch.FloatTensor` of shape `(batch_size, output_dim`): The audio embeddings obtained by
            applying the projection layer to the pooled output of [`ClapAudioModel`].

        Examples:

        ```python
        >>> import torch
        >>> from transformers import AutoFeatureExtractor, ClapModel

        >>> model = ClapModel.from_pretrained("laion/clap-htsat-unfused")
        >>> feature_extractor = AutoFeatureExtractor.from_pretrained("laion/clap-htsat-unfused")
        >>> random_audio = torch.rand((16_000))

        >>> inputs = feature_extractor(random_audio, return_tensors="pt")
        >>> with torch.inference_mode():
        ...     audio_features = model.get_audio_features(**inputs)
        ```)r  r  r0   r>   )r  r  r  r  r  )ru   r  r  r   audio_outputsaudio_featuress         r)   get_audio_featureszClapModel.get_audio_featuresp  sM    > 594D4D)Y 5E 5
 ..}/J/JK^<r+   return_lossr   r  r  c
           	      l   ||n| j                   j                  }||n| j                   j                  }|	|	n| j                   j                  }	| j	                  ||||d      }
| j                  |||||d      }|	s|
d   n|
j                  }| j                  |      }|	s|d   n|j                  }| j                  |      }||j                  ddd      z  }||j                  ddd      z  }| j                  j                         }| j                  j                         }t        j                  ||j                               |z  }t        j                  ||j                               |z  }d}|r,t!        |      }t!        |j                               }||z   d	z  }t#        |||||||

      S )a  
        is_longer (`torch.FloatTensor`, of shape `(batch_size, 1)`, *optional*):
            Whether the audio clip is longer than `max_length`. If `True`, a feature fusion will be enabled to enhance
            the features.
        return_loss (`bool`, *optional*):
            Whether or not to return the contrastive loss.

        Examples:

        ```python
        >>> from datasets import load_dataset
        >>> from transformers import AutoProcessor, ClapModel

        >>> dataset = load_dataset("hf-internal-testing/ashraq-esc50-1-dog-example")
        >>> audio_sample = dataset["train"]["audio"][0]["array"]

        >>> model = ClapModel.from_pretrained("laion/clap-htsat-unfused")
        >>> processor = AutoProcessor.from_pretrained("laion/clap-htsat-unfused")

        >>> input_text = ["Sound of a dog", "Sound of vacuum cleaner"]

        >>> inputs = processor(text=input_text, audios=audio_sample, return_tensors="pt", padding=True)

        >>> outputs = model(**inputs)
        >>> logits_per_audio = outputs.logits_per_audio  # this is the audio-text similarity score
        >>> probs = logits_per_audio.softmax(dim=-1)  # we can take the softmax to get the label probabilities
        ```NTr]  rF   r   r  r   r  r  r   r-   r0   )r  r?   keepdimg       @)rj   rk   rl   rZ   rg   rm   rn   )r   r   r  r^  r  r~  r  r  r  r   rM  exprI  rB   r   trV   ri   )ru   rF   r  r  r   r  r  r   r  r  r  r  rg   rZ   logit_scale_textlogit_scale_audiorl   rk   rj   caption_loss
audio_losss                        r)   r   zClapModel.forward  s   T 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B](()/!5 ) 
 )%/!5 ' 
 0;}Q'@[@[,,\:-8l1ol>X>X**;7 $l&7&7!T&7&RR!K$4$4qb$$4$OO  --113 ..224,,{LNN4DEHXX <<kmmoFIZZ+O<L)*:*<*<*>?J :-4D-+#%*,
 	
r+   )NNrv  )r]   r^   r_   r   rb   r~   r   r   rB   r  r   ra   r  r  r   
LongTensorra  r  r   rc   ri   r   r   r   s   @r)   rH  rH  *  s   z @ %& 26/3	<< !. u||,	
 
		  '@ %& -115	## ELL)# !.	#
 
		#  '#J  156:041537&*,0/3&*]
E,,-]
 !!2!23]
 E,,-	]

 !.]
 u//0]
 d^]
 $D>]
 'tn]
 d^]
 
uj 	!]
  ]
r+   rH  c                       e Zd ZU eed<   def fdZdej                  fdZd Z	e
e	 	 	 	 	 	 ddeej                     deej                     deej                     d	ee   d
ee   dee   deeef   fd              Z xZS )ClapTextModelWithProjectionr   c                     t         |   |       t        |      | _        t	        |      | _        | j                          y r|   )r}   r~   rc  r~  r  r  rY  r  s     r)   r~   z$ClapTextModelWithProjection.__init__  s3     '/26:r+   rM   c                 B    | j                   j                  j                  S r|   r~  r  r  rx   s    r)   r[  z0ClapTextModelWithProjection.get_input_embeddings  s    ))999r+   c                 :    || j                   j                  _        y r|   r  rk  s     r)   rl  z0ClapTextModelWithProjection.set_input_embeddings  s    5:""2r+   rF   r   r  r   r  r  c                    ||n| j                   j                  }| j                  |||||d      }|s|d   n|j                  }| j	                  |      }	t        |	|j                  |j                  |j                        S )a  
        Examples:

        ```python
        >>> from transformers import AutoTokenizer, ClapTextModelWithProjection

        >>> model = ClapTextModelWithProjection.from_pretrained("laion/clap-htsat-unfused")
        >>> tokenizer = AutoTokenizer.from_pretrained("laion/clap-htsat-unfused")

        >>> inputs = tokenizer(["a sound of a cat", "a sound of a dog"], padding=True, return_tensors="pt")

        >>> outputs = model(**inputs)
        >>> text_embeds = outputs.text_embeds
        ```Tr  r   )rZ   r[   r#   r\   )	r   r^  r~  r  r  rY   r[   r#   r\   )
ru   rF   r   r  r   r  r  r  r=  rZ   s
             r)   r   z#ClapTextModelWithProjection.forward
  s    2 &1%<k$++B]B])%/!5 ' 
 0;Q@Z@Z**=9"#*<<&44#..	
 	
r+   )NNNNNN)r]   r^   r_   r   rb   r~   r   r  r[  rl  r   r   r   rB   r  r  r   rc   rY   r   r   r   s   @r)   r  r    s    ~ :bii :;  -115/3,0/3&*+
ELL)+
 !.+
 u||,	+

 $D>+
 'tn+
 d^+
 
u))	*+
  +
r+   r  c                        e Zd ZU eed<   dZdef fdZdej                  fdZ	e
e	 	 	 	 	 ddeej                     deej                     dee   dee   d	ee   deeef   fd
              Z xZS )ClapAudioModelWithProjectionr   r  c                     t         |   |       t        |      | _        t	        |      | _        | j                          y r|   )r}   r~   rV  r  r  r  rY  r  s     r)   r~   z%ClapAudioModelWithProjection.__init__?  s4     )&1 3F ;r+   rM   c                 V    | j                   j                  j                  j                  S r|   )r  rX  r  r   rx   s    r)   r[  z1ClapAudioModelWithProjection.get_input_embeddingsF  s     --99>>>r+   r  r   r  r  c                 l   ||n| j                   j                  }||n| j                   j                  }||n| j                   j                  }| j	                  ||||d      }|s|d   n|j
                  }| j                  |      }t        ||j                  |j                  |j                        S )av  
        is_longer (`torch.FloatTensor`, of shape `(batch_size, 1)`, *optional*):
            Whether the audio clip is longer than `max_length`. If `True`, a feature fusion will be enabled to enhance
            the features.

        Examples:

        ```python
        >>> from datasets import load_dataset
        >>> from transformers import ClapAudioModelWithProjection, ClapProcessor

        >>> model = ClapAudioModelWithProjection.from_pretrained("laion/clap-htsat-fused")
        >>> processor = ClapProcessor.from_pretrained("laion/clap-htsat-fused")

        >>> dataset = load_dataset("hf-internal-testing/ashraq-esc50-1-dog-example")
        >>> audio_sample = dataset["train"]["audio"][0]["array"]

        >>> inputs = processor(audios=audio_sample, return_tensors="pt")
        >>> outputs = model(**inputs)
        >>> audio_embeds = outputs.audio_embeds
        ```Tr]  r   )rg   r[   r\   r#   )r   r^  r   r  r  r  r  rf   r[   r\   r#   )	ru   r  r  r   r  r  r  r=  rg   s	            r)   r   z$ClapAudioModelWithProjection.forwardI  s    > &1%<k$++B]B]1B1N-TXT_T_TqTq$8$D $++JjJj 	 (()/!5 ) 
 1<a(A\A\,,];#%+==$//'55	
 	
r+   r_  )r]   r^   r_   r   rb   r`  r~   r   r  r[  r   r   r   rB   ra   ra  r  r   rc   rf   r   r   r   s   @r)   r  r  :  s    &O ?bii ?  7;04,0/3&*4
 !2!234
 E,,-4
 $D>	4

 'tn4
 d^4
 
u**	+4
  4
r+   r  )rH  r?  rc  r  rV  r  )r   )r   N)Vr`   r   r   dataclassesr   typingr   r   r   r   rB   torch.nn.functionalr   rS   r  activationsr
   modeling_layersr   modeling_outputsr   r   r   modeling_utilsr   r   pytorch_utilsr   r   r   r   utilsr   r   r   r   r   r   configuration_clapr   r   r   
get_loggerr]   loggerr*   r:   r<   rK   r  rV   rY   rf   ri   r  rz   r   r   r   r  r  r.  r:  r?  rx  r  r  r  r  floatr  r  r  r  r   r%  r)  r2  r8  r?  rV  rc  rH  r  r  __all__rd   r+   r)   <module>r     s      ! 1 1     ! 9 
 G v v w w K K 
		H	%"*(4$7U\\ 7ell 7
 	?+ 	? 	? 
	?; 	? 	?  
  
   
H299 2%		 %P_")) _F\RYY \@
")) 
# #NBII  	bii 	zRYY z|9/ 9z3BII 3lB
ryy B
J")) &V= V=B (,%II%<<% 
% <<	%
 U\\*% % % %%87BII 7v *		 *\299  RYY %. %R.
bii .
dRYY  =/ = =:8
( 8
v b
' b
b
J K
# K
 K
\ =
"5 =
 =
@ D
#6 D
 D
Nr+   