
    <h                        S r SSKrSSKrSSKJrJr  SSKrSSKJr  SSKJ	r	  SSK
Jr  SSKJr  SS	KJrJrJr  SS
KJr  SSKJr  SSKJr  SSKJrJrJr  SSKJr  SSKJrJ r   SSK!J"r"J#r#J$r$J%r%J&r&  SSK'J(r(  \#" 5       (       a  SSK)J*r*  SSK+J,r,  \&RZ                  " \.5      r/Sr0 SSK1J2r2  Sr0\/Rg                  S5         " S S\Rn                  5      r8\0(       d  \2r8 " S S\Rn                  5      r9 " S S\Rn                  5      r: " S  S!\Rn                  5      r; " S" S#\Rn                  5      r< " S$ S%\Rn                  5      r= " S& S'\Rn                  5      r> " S( S)\5      r?\" " S* S+\5      5       r@ " S, S-\@5      rA " S. S/\Rn                  5      rB\"" S0S19 " S2 S3\@\5      5       rCS3S+/rDg! \4 a     N\5 a    \/Rm                  S5         GNf = f)4zPyTorch Pop2Piano model.    N)OptionalUnion)nn)CrossEntropyLoss)GenerationConfig   )ACT2FN)CacheDynamicCacheEncoderDecoderCache)GenerationMixin)AttentionMaskConverter)GradientCheckpointingLayer)BaseModelOutput)BaseModelOutputWithPastAndCrossAttentionsSeq2SeqLMOutput)PreTrainedModel) find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringis_torch_flex_attn_availableis_torch_fx_proxyis_torchdynamo_compilinglogging   )Pop2PianoConfig)	BlockMask)make_flex_block_causal_maskT)FusedRMSNormFzVDiscovered apex.normalization.FusedRMSNorm - will use it instead of Pop2PianoLayerNormzIDiscovered apex but it failed to load, falling back to Pop2PianoLayerNormc                   2   ^  \ rS rSrSU 4S jjrS rSrU =r$ )Pop2PianoLayerNorm@   c                    > [         TU ]  5         [        R                  " [        R
                  " U5      5      U l        X l        g)zZ
Construct a layernorm module in the Pop2Piano style. No bias and no subtraction of mean.
N)super__init__r   	Parametertorchonesweightvariance_epsilon)selfhidden_sizeeps	__class__s      h/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/pop2piano/modeling_pop2piano.pyr%   Pop2PianoLayerNorm.__init__A   s/     	ll5::k#:; #    c                    UR                  [        R                  5      R                  S5      R	                  SSS9nU[        R
                  " X R                  -   5      -  nU R                  R                  [        R                  [        R                  4;   a%  UR                  U R                  R                  5      nU R                  U-  $ )N   T)keepdim)tor'   float32powmeanrsqrtr*   r)   dtypefloat16bfloat16)r+   hidden_statesvariances      r/   forwardPop2PianoLayerNorm.forwardI   s     !##EMM266q9>>r4>P%H?T?T4T(UU ;; ??),,T[[->->?M{{]**r1   )r*   r)   )gư>)__name__
__module____qualname____firstlineno__r%   r@   __static_attributes____classcell__r.   s   @r/   r!   r!   @   s    $+ +r1   r!   c                   6   ^  \ rS rSrS\4U 4S jjrS rSrU =r$ )Pop2PianoDenseActDense^   configc                 X  > [         TU ]  5         [        R                  " UR                  UR
                  SS9U l        [        R                  " UR
                  UR                  SS9U l        [        R                  " UR                  5      U l
        [        UR                     U l        g NFbias)r$   r%   r   Lineard_modeld_ffwiwoDropoutdropout_ratedropoutr	   dense_act_fnactr+   rL   r.   s     r/   r%   Pop2PianoDenseActDense.__init___   sn    ))FNNFKKeD))FKKeDzz&"5"56&--.r1   c                    U R                  U5      nU R                  U5      nU R                  U5      n[        U R                  R
                  [        R                  5      (       a  UR                  U R                  R
                  R                  :w  aa  U R                  R
                  R                  [        R                  :w  a/  UR                  U R                  R
                  R                  5      nU R	                  U5      nU$ N)rT   rZ   rX   
isinstancerU   r)   r'   Tensorr;   int8r6   )r+   r>   s     r/   r@   Pop2PianoDenseActDense.forwardf   s    ./]3tww~~u||44##tww~~';';;$$

2),,TWW^^-A-ABM.r1   )rZ   rX   rT   rU   	rB   rC   rD   rE   r   r%   r@   rF   rG   rH   s   @r/   rJ   rJ   ^   s    / / r1   rJ   c                   6   ^  \ rS rSrS\4U 4S jjrS rSrU =r$ )Pop2PianoDenseGatedActDenseu   rL   c                   > [         TU ]  5         [        R                  " UR                  UR
                  SS9U l        [        R                  " UR                  UR
                  SS9U l        [        R                  " UR
                  UR                  SS9U l        [        R                  " UR                  5      U l        [        UR                     U l        g rN   )r$   r%   r   rQ   rR   rS   wi_0wi_1rU   rV   rW   rX   r	   rY   rZ   r[   s     r/   r%   $Pop2PianoDenseGatedActDense.__init__v   s    IIfnnfkkF	IIfnnfkkF	))FKKeDzz&"5"56&--.r1   c                 8   U R                  U R                  U5      5      nU R                  U5      nX#-  nU R                  U5      n[	        U R
                  R                  [        R                  5      (       a  UR                  U R
                  R                  R                  :w  aa  U R
                  R                  R                  [        R                  :w  a/  UR                  U R
                  R                  R                  5      nU R                  U5      nU$ r^   )rZ   rh   ri   rX   r_   rU   r)   r'   r`   r;   ra   r6   )r+   r>   hidden_geluhidden_linears       r/   r@   #Pop2PianoDenseGatedActDense.forward~   s    hhtyy78		-0#3]3 tww~~u||44##tww~~';';;$$

2),,TWW^^-A-ABM.r1   )rZ   rX   rh   ri   rU   rc   rH   s   @r/   re   re   u   s    / / r1   re   c                   6   ^  \ rS rSrS\4U 4S jjrS rSrU =r$ )Pop2PianoLayerFF   rL   c                   > [         TU ]  5         UR                  (       a  [        U5      U l        O[        U5      U l        [        UR                  UR                  S9U l	        [        R                  " UR                  5      U l        g )Nr-   )r$   r%   is_gated_actre   DenseReluDenserJ   r!   rR   layer_norm_epsilon
layer_normr   rV   rW   rX   r[   s     r/   r%   Pop2PianoLayerFF.__init__   s_    "=f"ED"8"@D,V^^AZAZ[zz&"5"56r1   c                 p    U R                  U5      nU R                  U5      nXR                  U5      -   nU$ r^   )rw   ru   rX   )r+   r>   forwarded_statess      r/   r@   Pop2PianoLayerFF.forward   s;    ??=9../?@%5E(FFr1   )ru   rX   rw   rc   rH   s   @r/   rp   rp      s    7 7 r1   rp   c                      ^  \ rS rSr  S
S\S\\   4U 4S jjjrS r\	SS j5       r
SS jr         SS jrS	rU =r$ )Pop2PianoAttention   rL   	layer_idxc                   > [         TU ]  5         UR                  U l        X l        UR                  U l        UR
                  U l        UR                  U l        UR                  U l        UR                  U l
        UR                  U l        U R                  U R                  -  U l        X0l        Uc>  U R                  (       a-  [        R!                  SU R"                  R$                   S35        [&        R(                  " U R                  U R                  SS9U l        [&        R(                  " U R                  U R                  SS9U l        [&        R(                  " U R                  U R                  SS9U l        [&        R(                  " U R                  U R                  SS9U l        U R                  (       a0  [&        R2                  " U R                  U R                  5      U l        [7        5       U l        SU l        g )NzInstantiating a decoder z without passing `layer_idx` is not recommended and will to errors during the forward call, if caching is used. Please make sure to provide a `layer_idx` when creating this class.FrO   )r$   r%   
is_decoderhas_relative_attention_biasrelative_attention_num_bucketsrelative_attention_max_distancerR   d_kvkey_value_proj_dim	num_headsn_headsrW   rX   	inner_dimr   loggerwarning_oncer.   rB   r   rQ   qkvo	Embeddingrelative_attention_biassetpruned_headsgradient_checkpointingr+   rL   r   r   r.   s       r/   r%   Pop2PianoAttention.__init__   so    	 +++F(.4.S.S+/5/U/U,~~"(++''**(?(??"*4>>+B+B*C D, , 4<<eD4<<eD4<<eD4>>4<<eD+++-<<8[8[]a]i]i+jD(E&+#r1   c                 
   [        U5      S:X  a  g [        XR                  U R                  U R                  5      u  p[        U R                  U5      U l        [        U R                  U5      U l        [        U R                  U5      U l        [        U R                  USS9U l	        U R                  [        U5      -
  U l        U R                  U R                  -  U l
        U R                  R                  U5      U l        g )Nr   r   dim)lenr   r   r   r   r   r   r   r   r   r   union)r+   headsindexs      r/   prune_headsPop2PianoAttention.prune_heads   s    u:?7<<!8!8$:K:K
 $DFFE2#DFFE2#DFFE2#DFFEq9||c%j0004<<? --33E:r1   c                 b   SnU(       aC  US-  nX@S:  R                  [        R                  5      U-  -  n[        R                  " U 5      n O,[        R                  " U [        R
                  " U 5      5      * n US-  nX:  nU[        R                  " U R                  5       U-  5      [        R                  " X5-  5      -  X%-
  -  R                  [        R                  5      -   n[        R                  " U[        R                  " XrS-
  5      5      nU[        R                  " X`U5      -  nU$ )aR  
Adapted from Mesh Tensorflow:
https://github.com/tensorflow/mesh/blob/0cb87fe07da627bf0b7e60475d59f95ed6b5be3d/mesh_tensorflow/transformer/transformer_layers.py#L593

Translate relative position to a bucket number for relative attention. The relative position is defined as
memory_position - query_position, i.e. the distance in tokens from the attending position to the attended-to
position. If bidirectional=False, then positive relative positions are invalid. We use smaller buckets for
small absolute relative_position and larger buckets for larger absolute relative_positions. All relative
positions >=max_distance map to the same bucket. All relative positions <=-max_distance map to the same bucket.
This should allow for more graceful generalization to longer sequences than the model has been trained on

Args:
    relative_position: an int32 Tensor
    bidirectional: a boolean - whether the attention is bidirectional
    num_buckets: an integer
    max_distance: an integer

Returns:
    a Tensor with the same shape as relative_position, containing int32 values in the range [0, num_buckets)
r   r3   r   )r6   r'   longabsmin
zeros_likelogfloatmath	full_likewhere)relative_positionbidirectionalnum_bucketsmax_distancerelative_buckets	max_exactis_smallrelative_position_if_larges           r/   _relative_position_bucket,Pop2PianoAttention._relative_position_bucket   s   , AKQ!6 : :5:: F TT %		*; <!&+<e>N>NO`>a!b b  1$	$0 &/II'--/);<hh|/01&( "UZZ.	&"
 &+YY&8RbcTc(d&
" 	EKKE_``r1   c                    Uc   U R                   R                  R                  nUc,  [        R                  " U[        R
                  US9SS2S4   nOUSS2S4   R                  U5      n[        R                  " U[        R
                  US9SSS24   nXe-
  nU R                  UU R                  (       + U R                  U R                  S9nU R                  U5      n	U	R                  / SQ5      R                  S5      n	U	$ )z%Compute binned relative position biasN)r;   device)r   r   r   )r3   r   r   r   )r   r)   r   r'   aranger   r6   r   r   r   r   permute	unsqueeze)
r+   query_length
key_lengthr   cache_positioncontext_positionmemory_positionr   relative_position_bucketvaluess
             r/   compute_biasPop2PianoAttention.compute_bias
  s    >1188??F!$||L

SYZ[\^b[bc-ag699&A,,zFSTXZ[T[\+>#'#A#A#.;;==	 $B $
  --.FG	*44Q7r1   c                    UR                   SS u  pUSLnU R                  U5      nUR                  USU R                  U R                  5      R                  SS5      nUb[  [        U[        5      (       aF  UR                  R                  U R                  5      nU(       a  UR                  nOUR                  nOUnU(       a  UOUnU(       aQ  UbN  W(       aG  UR                  U R                     R                  nUR                  U R                     R                  nOU R!                  U5      nU R#                  U5      nUR                  USU R                  U R                  5      R                  SS5      nUR                  USU R                  U R                  5      R                  SS5      nUbN  U(       d  U
OSn
UR%                  UUU R                  SU
05      u  nnU(       a  SUR                  U R                  '   [&        R(                  " UUR                  SS5      5      nUc  UR                   S   nUb  UOU
S   S-   nU R*                  (       db  [&        R,                  " SU R                  UU4UR.                  UR0                  S	9nU R2                  (       a  U R4                  (       a  SUl        O.U R9                  UUUR.                  U
S
9nUSS2SS2U* S2SS24   nUb#  USS2SS2SS2SUR                   S   24   nUU-   nU R:                  (       aS  [&        R<                  " UR                   S   5      nSU[?        U R:                  5      '   USS2URA                  5       4   nOUnUU-  n[B        RD                  RG                  URI                  5       SS9RK                  U5      n[B        RD                  RM                  UU RL                  U R4                  S9nUb  UU-  n[&        R(                  " UU5      nUR                  SS5      RO                  5       nUR                  USU RP                  5      nU RS                  U5      nUU4nU	(       a  UU4-   nU$ )zp
Self-attention (if key_value_states is None) or attention over source sentence (provided by key_value_states).
Nr3   r4   r   r   Tr   )r   r;   )r   r   r   r   )ptraining)*shaper   viewr   r   	transposer_   r   
is_updatedgetr   cross_attention_cacheself_attention_cachelayerskeysr   r   r   updater'   matmulr   zerosr   r;   r   r   requires_gradr   r   r(   listboolr   
functionalsoftmaxr   type_asrX   
contiguousr   r   )r+   r>   maskkey_value_statesposition_biaspast_key_valuelayer_head_maskr   	use_cacheoutput_attentionsr   
batch_size
seq_lengthis_cross_attentionquery_statesr   curr_past_key_valuecurrent_states
key_statesvalue_statesscoresr   real_seq_lengthcausal_maskposition_bias_maskedattn_weightsattn_outputoutputss                               r/   r@   Pop2PianoAttention.forward  s   $ "/!4!4Ra!8
 .T9vvm,#((RtG^G^_iijkmno %*^EX*Y*Y'2266t~~FJ!&4&J&J#&4&I&I#"0-?)]."<,33DNNCHHJ.55dnnELLL/J66.1L#RtG^G^_iijkmnoJ',,ZT\\4KbKbcmmnoqrsL)7It+>+E+Ednn?OQ_>`,(
L &@DN--dnn= lJ,@,@A,FG #))"-J.:.FlN[]L^abLbO33 %j*=fmm[a[g[g! ..4==26M/ $ 1 1#ZVd !2 ! !.aZKL!.C D"1a,Bj.>.>r.B,B#BC - ;::m11!45D,-Dd''()#0DIIK#@ #0 && }},,V\\^,DLLVT}},,\T\\TXTaTa,b &'/9Lll<>!++Aq1<<>!&&z2t~~Fff[)./Gr1   )rR   rX   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   FN)T       )NN)	NNNNNNFFN)rB   rC   rD   rE   r   r   intr%   r   staticmethodr   r   r@   rF   rG   rH   s   @r/   r}   r}      st     %*#'	!,!, C=	!, !,F;  -  - ^. l lr1   r}   c                   R   ^  \ rS rSrSS\\   4U 4S jjjr       SS jrSrU =r	$ )Pop2PianoLayerSelfAttentioni  r   c                    > [         TU ]  5         [        XUS9U l        [	        UR
                  UR                  S9U l        [        R                  " UR                  5      U l        g )Nr   r   rs   )r$   r%   r}   SelfAttentionr!   rR   rv   rw   r   rV   rW   rX   r   s       r/   r%   $Pop2PianoLayerSelfAttention.__init__  sQ    /W`
 -V^^AZAZ[zz&"5"56r1   c	                     U R                  U5      n	U R                  U	UUUUUUUS9n
XR                  U
S   5      -   nU4U
SS  -   nU$ )N)r   r   r   r   r   r   r   r   r   )rw   r   rX   )r+   r>   attention_maskr   r   r   r   r   r   normed_hidden_statesattention_outputr   s               r/   r@   #Pop2PianoLayerSelfAttention.forward  st      $}=-- '+)/) . 	
 &5Ea5H(II "%5ab%99r1   )r   rX   rw   r   )NNNNFFN
rB   rC   rD   rE   r   r   r%   r@   rF   rG   rH   s   @r/   r   r     s:    7XVY] 7 7  r1   r   c                   T   ^  \ rS rSrSS\\   4U 4S jjjr        SS jrSrU =r	$ )Pop2PianoLayerCrossAttentioni  r   c                    > [         TU ]  5         [        USUS9U l        [	        UR
                  UR                  S9U l        [        R                  " UR                  5      U l        g )NFr   rs   )r$   r%   r}   EncDecAttentionr!   rR   rv   rw   r   rV   rW   rX   )r+   rL   r   r.   s      r/   r%   %Pop2PianoLayerCrossAttention.__init__  sO    1&V[gpq,V^^AZAZ[zz&"5"56r1   c                     U R                  U5      nU R                  UUUUUUUUU	U
S9
nXR                  US   5      -   nU4USS  -   nU$ )N)	r   r   r   r   r   r   r   r   r   r   r   )rw   r  rX   )r+   r>   r   r   r   r   r   r   r   r   r   r   r   layer_outputr   s                  r/   r@   $Pop2PianoLayerCrossAttention.forward  sy      $}=// -'+)%/) 0 
 %||4DQ4G'HH/$4QR$88r1   )r  rX   rw   r^   )NNNNFNFNr   rH   s   @r/   r  r    s<    7(3- 7 7  r1   r  c                   \   ^  \ rS rSrSS\\   4U 4S jjjr            SS jrSrU =r	$ )Pop2PianoBlocki  r   c                 l  > [         TU ]  5         UR                  U l        [        R                  " 5       U l        U R
                  R                  [        XUS95        U R                  (       a"  U R
                  R                  [        XS95        U R
                  R                  [        U5      5        g )Nr   )r   )
r$   r%   r   r   
ModuleListlayerappendr   r  rp   r   s       r/   r%   Pop2PianoBlock.__init__  s     ++]]_


'[d	

 ??JJ:6WX

*623r1   c                    U R                   S   " UUUUU	U
UUS9nUS   nUSS  nUR                  [        R                  :X  a  [        R                  " [        R
                  " U5      R                  5       [        R                  " UR                  5      R                  S-
  [        R                  " UR                  5      R                  5      n[        R                  " UU* US9nU R                  =(       a    US LnU(       a  U R                   S   " UUUUUU	US   S-   U
US9	nUS   nUR                  [        R                  :X  a  [        R                  " [        R
                  " U5      R                  5       [        R                  " UR                  5      R                  S-
  [        R                  " UR                  5      R                  5      n[        R                  " UU* US9nUUSS  -   nU R                   S   " U5      nUR                  [        R                  :X  a  [        R                  " [        R
                  " U5      R                  5       [        R                  " UR                  5      R                  S-
  [        R                  " UR                  5      R                  5      n[        R                  " UU* US9nU4nUU-   $ )Nr   )r   r   r   r   r   r   r   r   i  )r   maxr4   )r   r   r   r   r   r   r   r   )r  r;   r'   r<   r   isinfanyfinfor  clampr   )r+   r>   r   r   encoder_hidden_statesencoder_attention_maskencoder_decoder_position_biasr   cross_attn_layer_head_maskr   r   r   return_dictr   self_attention_outputsattention_outputsclamp_valuedo_cross_attentioncross_attention_outputsr   s                       r/   r@   Pop2PianoBlock.forward  sl     "&A)'+)/)	"
 /q12126 %--/++M*..0M//044t;M//044K
 "KKK<[YM!__R1Fd1R&*jjm!65; :-+B/!3#"3
'# 4A6M ""emm3#kkKK.224KK 3 34884?KK 3 3488
 !&M|Q\ ] !24KAB4O O 

2}5 %--/++M*..0M//044t;M//044K
 "KKK<[YM " ''	
r1   )r   r  r   )NNNNNNNNFFTNr   rH   s   @r/   r
  r
    sK    4XVY] 4 4" "#&*#'Q
 Q
r1   r
  c                   H    \ rS rSr% \\S'   SrSrSrSr	S/r
S/rS rS	 rS
rg)Pop2PianoPreTrainedModeli=  rL   transformerFTr
  rU   c                 P   U R                   R                  n[        U[        5      (       a)  UR                  R
                  R                  US-  5        g[        U[        5      (       a2  UR                  R                  R
                  R                  SUS-  S9  g[        U[        5      (       a  UR                  R                  R
                  R                  SUS-  S9  [        US5      (       aN  U R                   R                  (       d2  UR                  R                  R
                  R                  SUS-  S9  ggg[        U[        5      (       GaQ  UR                   R                  R
                  R                  SX R                   R"                  S-  -  S9  [        UR                   S5      (       aE  UR                   R$                  b.  UR                   R$                  R
                  R'                  5         UR(                  R                  R
                  R                  SX R                   R*                  S-  -  S9  [        UR(                  S5      (       aG  UR(                  R$                  b/  UR(                  R$                  R
                  R'                  5         ggg[        U[,        5      (       Ga  UR.                  R                  R
                  R                  SX R                   R"                  S-  -  S9  [        UR.                  S5      (       aE  UR.                  R$                  b.  UR.                  R$                  R
                  R'                  5         UR0                  R                  R
                  R                  SX R                   R"                  S-  -  S9  [        UR0                  S5      (       aE  UR0                  R$                  b.  UR0                  R$                  R
                  R'                  5         UR(                  R                  R
                  R                  SX R                   R*                  S-  -  S9  [        UR(                  S5      (       aG  UR(                  R$                  b/  UR(                  R$                  R
                  R'                  5         ggg[        U[2        5      (       GaZ  U R                   R"                  nU R                   R4                  nU R                   R6                  nUR8                  R                  R
                  R                  SX#U-  S-  -  S9  UR:                  R                  R
                  R                  SX#S-  -  S9  UR<                  R                  R
                  R                  SX#S-  -  S9  UR>                  R                  R
                  R                  SX%U-  S-  -  S9  UR@                  (       a4  URB                  R                  R
                  R                  SX#S-  -  S9  ggg)zInitialize the weights      ?        )r9   stdlm_head      rP   N)"rL   initializer_factorr_   r!   r)   datafill_Pop2PianoConcatEmbeddingToMel	embeddingnormal_!Pop2PianoForConditionalGenerationsharedhasattrtie_word_embeddingsr(  rJ   rT   rR   rP   zero_rU   rS   re   rh   ri   r}   r   r   r   r   r   r   r   r   )r+   modulefactorrR   r   r   s         r/   _init_weights&Pop2PianoPreTrainedModel._init_weightsH  s   //f011MM$$Vc\2 =>>##((00cv|0L ABB MM  %%--3FSL-Ivy))$++2Q2Q%%**22#2N 3R) 677 II!!))s;;CVCV[_B_8`)avyy&))fiinn.H		##))+II!!))s;;CSCSX\B\8])^vyy&))fiinn.H		##))+ /I) ;<<KK##++&[[EXEX]aDa:b+cv{{F++0@0@0L  %%++-KK##++&[[EXEX]aDa:b+cv{{F++0@0@0L  %%++-II!!))s;;CSCSX\B\8])^vyy&))fiinn.H		##))+ /I) 233 kk))G!%!1!1kk++GHHOO  ((cvL^B^cgAg7h(iHHOO  ((cv$7O(PHHOO  ((cv$7O(PHHOO  ((cvL^B^cgAg7h(i11..55::BBQWhl[lQmBn 2 4r1   c                    U R                   R                  nU R                   R                  nUc  [        S5      e[	        U5      (       aE  [
        R                  " UR                  S S S-   U5      n[
        R                  " XASS S24   /SS9nO=UR                  UR                  5      nUSS S24   R                  5       USSS 24'   X$S'   Uc  [        S5      eUR                  US	:H  U5        U$ )
Nzoself.model.config.decoder_start_token_id has to be defined. In Pop2Piano it is usually set to the pad_token_id.r4   )r   .r   r   ).r   z1self.model.config.pad_token_id has to be defined.)rL   decoder_start_token_idpad_token_id
ValueErrorr   r'   fullr   cat	new_zerosclonemasked_fill_)r+   	input_idsr;  r<  shifted_input_idss        r/   _shift_right%Pop2PianoPreTrainedModel._shift_rightv  s    !%!C!C{{//!) B 
 Y'' %

9??3B+?$+FH^ _ %		+<SbS>Q*RXZ [ ) 3 3IOO D)238)<)B)B)Dc12g&(>f%PQQ&&'8D'@,O  r1    N)rB   rC   rD   rE   r   __annotations__base_model_prefixis_parallelizablesupports_gradient_checkpointing_can_compile_fullgraph_no_split_modules_keep_in_fp32_modulesr7  rE  rF   rG  r1   r/   r"  r"  =  s=    %&*#")*!F,o\!r1   r"  c                   (  ^  \ rS rSrSU 4S jjrS r             SS jr SS\\R                  S4   S\R                  S\R                  S	\
S
\4
S jjr\S\R                  S\S\S\R                  S\R                  S\4S j5       rSrU =r$ )Pop2PianoStacki  c                   > [         TU ]  U5        X l        UR                  U l        [        R
                  " [        UR                  5       Vs/ sH  n[        U[        US:H  5      US9PM     sn5      U l
        [        UR                  UR                  S9U l        [        R                  " UR                   5      U l        U R%                  5         SU l        S U l        SU l        g s  snf )Nr   r   rs   F)r$   r%   embed_tokensr   r   r  range
num_layersr
  r   blockr!   rR   rv   final_layer_normrV   rW   rX   	post_initmodel_parallel
device_mapr   )r+   rL   rR  ir.   s       r/   r%   Pop2PianoStack.__init__  s     ( ++]] v0011A v4Q<[\]1

 !36>>vG`G` azz&"5"56 	#&+#s    C)c                     Xl         g r^   )rR  r+   new_embeddingss     r/   set_input_embeddings#Pop2PianoStack.set_input_embeddings  s    *r1   c                  
   U	b  U	OU R                   R                  n	U
b  U
OU R                   R                  n
Ub  UOU R                   R                  nUb  UOU R                   R                  nUb*  Ub'  U R
                  (       a  SOSn[        SU SU S35      eUb&  UR                  5       nUR                  SUS   5      nO>Ub  UR                  5       S S nO'U R
                  (       a  SOSn[        SU SU S	35      eU R                  (       a/  U R                  (       a  U	(       a  [        R                  S
5        Sn	Uc)  U R                  c  [        S5      eU R                  U5      nUu  nnU	SL a   U R
                  (       d  [        SU  S35      eU R
                  (       aM  U	(       aE  UcB  U R                   R                  (       a  [        [!        5       [!        5       5      nO[!        5       nOU R
                  (       d  S nUb  UR#                  5       OSnUc#  [$        R&                  " UUU-   UR(                  S9nUc4  [+        5       (       d%  UU-   n[$        R,                  " UUUR(                  S9nU R                   R
                  (       a7  U R/                  UUU[1        U[        5      (       a  UR2                  OUU
5      nOVUS S 2S S S S 24   nUR5                  UR6                  S9nSU-
  [$        R8                  " UR6                  5      R:                  -  nU R
                  (       aO  UbL  UR                  5       u  nnnUU4nUc  [$        R,                  " UUR(                  S9nU R=                  U5      nOS nU R?                  X`R                   R@                  5      nU R?                  XpR                   R@                  5      nU(       a  SOS nU
(       a  SOS nU
(       a  U R
                  (       a  SOS nS nS nU RC                  U5      n[E        U RF                  5       H  u  n n!UU    n"UU    n#U(       a  UU4-   nU!" UUUUUUU"U#UU	U
US9n$U$S   nU$S   nU R
                  (       a  Ub  U$U
(       a  SOS   nU
(       d  Mc  UU$S   4-   nU R
                  (       d  M  UU$S   4-   nM     U RI                  U5      nU RC                  U5      nU(       a  UU4-   nU(       d  [K        S UUUUU4 5       5      $ [M        UUUUUS9$ )Ndecoder_ zYou cannot specify both zinput_ids and zinputs_embeds at the same timer4   zYou have to specify either zinput_ids or inputs_embedszZ`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...Fz<You have to initialize the model with valid token embeddingsTz)`use_cache` can only be set to `True` if z is used as a decoderr   r   )r;   r%  rG  )r   r  r   r   r   r   r   r   r3      c              3   .   #    U H  nUc  M  Uv   M     g 7fr^   rG  ).0r   s     r/   	<genexpr>)Pop2PianoStack.forward.<locals>.<genexpr>J  s"      
A  s   	)last_hidden_statepast_key_valuesr>   
attentionscross_attentions)'rL   r   r   output_hidden_statesuse_return_dictr   r=  sizer   r   r   r   r   rR  is_encoder_decoderr   r   get_seq_lengthr'   r   r   r   r(   _update_causal_maskr_   r   r6   r;   r  r   invert_attention_maskget_head_maskrT  rX   	enumeraterU  rV  tupler   )%r+   rC  r   r  r  rd  	head_maskcross_attn_head_maskrl  r   r   ro  r  r   err_msg_prefixinput_shaper   r   past_key_values_lengthmask_seq_lengthr   encoder_batch_sizeencoder_sequence_length_encoder_hidden_shapeencoder_extended_attention_maskall_hidden_statesall_attentionsall_cross_attentionsr   r  r>   rZ  layer_moduler   r  layer_outputss%                                        r/   r@   Pop2PianoStack.forward  s     "+!6IDKK<Q<Q	1B1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B] ]%>+/??ZN*>*:.HXXvw  "#..*K!r;r?;I&',,.s3K+/??ZN:>:J-XfWggtuvv&&4==##p "	   ( !_`` --i8M!,
J?? #LTFRg!hii??_4;;11&9,.,.&YO&2nO #OETE`!?!?!Afg!"\\&(>(KTaThThN !*B*D*D4zAO"ZZ
OML`L`aN;;!!22o/BCC  44$!K )D$)9:K%..}/B/B.CK,M<O<O0P0T0TTK ??4@=R=W=W=Y: 7$68O#P %-).4HQ^QeQe)f&.2.H.HI_.`+.2+ &&y++2H2HI	#112FH^H^_"6BD0d&7DOOrRV(,%]3(4OA|'lO)=a)@&#$58H$H!(%/- /+E.#"3-M *!,M
 *!,M#8#D0=CTaZ[0\-  !/=3C2E!E???+?=QRCSBU+U(C  5F --m<]3   1]4D D 
 "#%"(
 
 
 9+++%1
 	
r1   r   r   input_tensorr   rl  r   c           	         U R                   R                  S:X  a  Ub  US:H  R                  5       (       a  U$ g U R                   R                  S:X  a,  [        U[        R
                  5      (       a  [        U5      nU$ Ub  UR                  5       OSnUb  UR                  OSnU R                   R                  S:X  a5  U(       d.  U(       d'  [        R                  " UUUU R                  S9(       a  g UR                  nUR                  S   n	U(       a  UR                  5       n
O5[        U[        R
                  5      (       a  UR                  S	   OXi-   S-   n
U R                  UU	U
UUUR                  S   S
9nU R                   R                  S:X  aZ  UbW  UR                   R"                  S;   a=  U(       d6  [        R$                  " U5      R&                  n[        R(                  " X5      nU$ )Nflash_attention_2r&  flex_attentionr   Fsdpa)rd  r}  is_trainingr   r4   )sequence_lengthtarget_lengthr;   r   r   )cudaxpunpu)rL   _attn_implementationr  r_   r'   r`   r   rs  is_compileabler   _ignore_causal_mask_sdpar   r;   r   get_max_cache_shape5_prepare_4d_causal_attention_mask_with_cache_positionr   typer  r   _unmask_unattended)r+   r   r  r   rl  r   past_seen_tokensusing_compilable_cacher;   r  r  r   	min_dtypes                r/   rt  "Pop2PianoStack._update_causal_mask^  s    ;;++/BB)~/D.I.I.K.K%%;;++/??.%,,77!<^!L!!
 @O?Z?99;`aCRC^!?!?di ;;++v5>T]n%>>*'7 MM	 ""&,,Q/!+??AM nell;; $$R(%7!;  PP+')#))!, Q 
 KK,,6*%%**.DD%
 E*..I0CCK[Kr1   r  r  r;   r   c                    U b  U R                  5       S:X  a  U nU$ [        R                  " U5      R                  n[        R                  " X4XUR
                  S9nUS:w  a  [        R                  " USS9nU[        R                  " X$R
                  S9UR                  SS5      :  -  nUSSSS2SS24   R                  USSS5      nU b  UR                  5       nU R                  S   n	USS2SS2SS2SU	24   U SS2SSSS24   R                  UR
                  5      -   n
U
S:H  n
USS2SS2SS2SU	24   R                  X5      USS2SS2SS2SU	24'   U$ )	a  
Creates a causal 4D mask of shape `(batch_size, 1, query_length, key_value_length)` from a 2D mask of shape
`(batch_size, key_value_length)`, or if the input `attention_mask` is already 4D, do nothing.

Args:
    attention_mask (`torch.Tensor`):
        A 2D attention mask of shape `(batch_size, key_value_length)` or a 4D attention mask of shape
        `(batch_size, 1, query_length, key_value_length)`.
    sequence_length (`int`):
        The sequence length being processed.
    target_length (`int`):
        The target length: when generating with static cache, the mask should be as long as the static cache,
        to account for the 0 padding, the part of the cache that is not filled yet.
    dtype (`torch.dtype`):
        The dtype to use for the 4D attention mask.
    cache_position (`torch.Tensor`):
        Indices depicting the position of the input sequence tokens in the sequence.
    batch_size (`torch.Tensor`):
        Batch size.
Nrf  )
fill_valuer;   r   r   )diagonalre  r4   r   )r   r'   r  r   r>  r   triur   reshapeexpandrA  r   r6   masked_fill)r   r  r  r;   r   r   kwargsr   r  mask_lengthpadding_masks              r/   r  DPop2PianoStack._prepare_4d_causal_attention_mask_with_cache_position  s}   > %.*<*<*>!*C(K* ' E*..I** 0Y\j\q\qK !##jjqA5<<>S>STWeWmWmnprsWtttK%dD!Q&67>>z1bRTUK))//1,2226*1aL[L+@ANSTVZ\`bcScDdDgDg&&E    ,q05@Aq,;,AV5W5c5c 6Aq!\k\12 r1   )rU  rY  rX   rR  rV  r   r   rX  r^   )NNNNNNNNNNNNN)F)rB   rC   rD   rE   r%   r_  r@   r   r'   r`   r
   r   rt  r   r   r;   r  rF   rG   rH   s   @r/   rP  rP    s    ,.+
 "#!!n
n #(BellK78B llB 	B
 B  BH 444 4 {{	4
 4 4 4r1   rP  c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )r-  i  z'Embedding Matrix for `composer` tokens.c                 ~   > [         TU ]  5         [        R                  " UR                  UR
                  S9U l        g )N)num_embeddingsembedding_dim)r$   r%   r   r   composer_vocab_sizerR   r.  r[   s     r/   r%   &Pop2PianoConcatEmbeddingToMel.__init__  s-    V5O5O_e_m_mnr1   c                 z    X#-
  nU R                  U5      R                  S5      n[        R                  " XQ/SS9nU$ )Nr   r   )r.  r   r'   r?  )r+   featureindex_valueembedding_offsetindex_shiftedcomposer_embeddingrd  s          r/   r@   %Pop2PianoConcatEmbeddingToMel.forward  s>    #6!^^M:DDQG		#5"?QGr1   )r.  )	rB   rC   rD   rE   __doc__r%   r@   rF   rG   rH   s   @r/   r-  r-    s    1o r1   r-  zA
    Pop2Piano Model with a `language modeling` head on top.
    )custom_introc            *       &  ^  \ rS rSr/ SQrS\4U 4S jjrS rS rS r	S r
 S#S	\R                  S
\S\S\\R                     4S jjr\                  S$S\\R$                     S\\R                     S\\R$                     S\\R&                     S\\R                     S\\R                     S\\R(                     S\\\\R(                           S\\   S\\R                     S	\\R                     S\\R                     S\\R$                     S\\   S\\   S\\   S\\   S\\R$                     S\\\R                     \4   4&S jj5       r\R6                  " 5          S%U 4S  jj5       rS\R(                  4S! jrS"rU =r$ )&r0  i  )zencoder.embed_tokens.weightzdecoder.embed_tokens.weightzlm_head.weightrL   c                 x  > [         TU ]  U5        Xl        UR                  U l        [
        R                  " UR                  UR                  5      U l        [        U5      U l
        [        R                  " U5      nSUl        SUl        SUl        [!        X R                  5      U l        [        R                  " U5      nSUl        SUl        UR$                  Ul        [!        X0R                  5      U l        [
        R*                  " UR                  UR                  SS9U l        U R/                  5         g )NFTrO   )r$   r%   rL   rR   	model_dimr   r   
vocab_sizer1  r-  mel_conditionercopydeepcopyr   r   tie_encoder_decoderrP  encodernum_decoder_layersrT  decoderrQ   r(  rW  )r+   rL   encoder_configdecoder_configr.   s       r/   r%   *Pop2PianoForConditionalGeneration.__init__  s     ll6#4#4fnnE<VDv.$)!#( -2*%nkkBv.$(!-2*$*$=$=!%nkkByy1B1BO 	r1   c                     U R                   $ r^   )r1  r+   s    r/   get_input_embeddings6Pop2PianoForConditionalGeneration.get_input_embeddings  s    {{r1   c                 |    Xl         U R                  R                  U5        U R                  R                  U5        g r^   )r1  r  r_  r  r]  s     r/   r_  6Pop2PianoForConditionalGeneration.set_input_embeddings  s+    $)).9)).9r1   c                     U R                   $ r^   )r  r  s    r/   get_encoder-Pop2PianoForConditionalGeneration.get_encoder      ||r1   c                     U R                   $ r^   )r  r  s    r/   get_decoder-Pop2PianoForConditionalGeneration.get_decoder  r  r1   input_featurescomposergeneration_configr   c                    UR                   nX%;  a(  [        S[        UR                  5       5       SU 35      eXR   n[        R
                  " X`R                  S9nUR                  UR                  S   5      n[        UR                  5       5      nU R                  UUUS9nUbK  SXSS2S4   R                  5       ) '   [        R                  " USS2S4   R                  SS	5      U/S	S
9nX4$ US4$ )ak  
This method is used to concatenate mel conditioner tokens at the front of the input_features in order to
control the type of MIDI token generated by the model.

Args:
    input_features (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
        input features extracted from the feature extractor.
    composer (`str`):
        composer token which determines the type of MIDI tokens to be generated.
    generation_config (`~generation.GenerationConfig`):
        The generation is used to get the composer-feature_token pair.
    attention_mask (``, *optional*):
        For batched generation `input_features` are padded to have the same shape across all examples.
        `attention_mask` helps to determine which areas were padded and which were not.
        - 1 for tokens that are **not padded**,
        - 0 for tokens that are **padded**.
zPlease choose a composer from z. Composer received - re  r   )r  r  r  Nr&  r4   r   )axis)composer_to_feature_tokenr=  r   r   r'   tensorr   repeatr   r   r   r  r   concatenater   )r+   r  r  r  r   r  composer_valuer  s           r/   get_mel_conditioner_outputs=Pop2PianoForConditionalGeneration.get_mel_conditioner_outputs  s   0 %6$O$O!406O6T6T6V1W0XXnownxy  3<n[[I'..~/C/CA/FG8??AB--"&- . 

 %;>N1a4055778 #..q!t0D0I0I"a0PR`/ahijN!11t##r1   rC  decoder_input_idsdecoder_attention_maskry  decoder_head_maskrz  encoder_outputsrl  rd  decoder_inputs_embedslabelsr   r   ro  r  r   returnc                    Ub  UOU R                   R                  nUb  UOU R                   R                  nU
b  Ub  [        S5      eUb  U
c  Un
Uc  U R	                  UUU
UUUUS9nORU(       aK  [        U[        5      (       d6  [        US   [        U5      S:  a  US   OS[        U5      S:  a  US   OSS9nUS   nUb  Uc  Uc  U R                  U5      nU R                  UUUU	UUUUUUUUUS9nUS   nU R                   R                  (       a  UU R                  S	-  -  nU R                  U5      nSnUb@  [        S
S9nU" UR                  SUR                  S5      5      UR                  S5      5      nU(       d  U4USS -   U-   nUb  U4U-   $ U$ [!        UUUR"                  UR$                  UR&                  UR(                  UR*                  UR$                  UR&                  S9	$ )aB	  
input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
    Indices of input sequence tokens in the vocabulary. Pop2Piano is a model with relative position embeddings
    so you should be able to pad the inputs on both the right and the left. Indices can be obtained using
    [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and [`PreTrainedTokenizer.__call__`] for detail.
    [What are input IDs?](../glossary#input-ids) To know more on how to prepare `input_ids` for pretraining
    take a look a [Pop2Piano Training](./Pop2Piano#training).
decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
    Indices of decoder input sequence tokens in the vocabulary. Indices can be obtained using
    [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and [`PreTrainedTokenizer.__call__`] for details.
    [What are decoder input IDs?](../glossary#decoder-input-ids) Pop2Piano uses the `pad_token_id` as the
    starting token for `decoder_input_ids` generation. If `past_key_values` is used, optionally only the last
    `decoder_input_ids` have to be input (see `past_key_values`). To know more on how to prepare
decoder_attention_mask (`torch.BoolTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
    Default behavior: generate a tensor that ignores pad tokens in `decoder_input_ids`. Causal mask will also
    be used by default.
decoder_head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
    Mask to nullify selected heads of the self-attention modules in the decoder. Mask values selected in `[0,
    1]`:
    - 1 indicates the head is **not masked**,
    - 0 indicates the head is **masked**.
cross_attn_head_mask (`torch.Tensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
    Mask to nullify selected heads of the cross-attention modules in the decoder. Mask values selected in
    `[0, 1]`:
    - 1 indicates the head is **not masked**,
    - 0 indicates the head is **masked**.
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the sequence classification/regression loss. Indices should be in `[-100, 0, ...,
    config.vocab_size - 1]`. All labels set to `-100` are ignored (masked), the loss is only computed for
    labels in `[0, ..., config.vocab_size]`
NzSBoth `inputs_embeds` and `input_features` received! Please provide only one of them)rC  r   rd  ry  r   ro  r  r   r   r3   )rk  r>   rm  )rC  r   rd  rl  r  r  ry  rz  r   r   ro  r  r   r)  r:  )ignore_indexr4   )	losslogitsrl  decoder_hidden_statesdecoder_attentionsrn  encoder_last_hidden_stater  encoder_attentions)rL   r   rp  r=  r  r_   r   r   rE  r  r3  r  r(  r   r   rq  r   rl  r>   rm  rn  rk  )r+   rC  r   r  r  ry  r  rz  r  rl  rd  r  r  r  r   r   ro  r  r   r>   decoder_outputssequence_output	lm_logitsr  loss_fctoutputs                             r/   r@   )Pop2PianoForConditionalGeneration.forwardK  sA   j "+!6IDKK<Q<Q	%0%<k$++B]B]$)Crss'M,A*M ""ll#-+#"3%9' + O O_!M!M-"1!"4474H14Loa0RV14_1E1I?1-tO (*"3";@U@] $ 1 1& 9 ,,'1/+"/#1'!5/!5#) ' 
  *!,;;** .1EFOLL1	'T:HINN2y~~b/ABFKKPROTD\OAB$77/IF)-)9TGf$EvE+;;"1"?"?.99,==&5&G&G"1"?"?.99

 
	
r1   c                   > Uc  U R                   nUR                  " S	0 UD6  [        US5      (       d  [        S5      e[	        UR
                  5      U R                  R                  :w  a9  [        SU R                  R                   S[	        UR
                  5       S35      eU R                  UUUUS9u  p[        TU ](  " S	SUUUS.UD6$ )
aV  
Generates token ids for midi outputs.

<Tip warning={true}>

Most generation-controlling parameters are set in `generation_config` which, if not passed, will be set to the
model's default generation configuration. You can override any `generation_config` by passing the corresponding
parameters to generate(), e.g. `.generate(inputs, num_beams=4, do_sample=True)`. For an overview of generation
strategies and code examples, check out the [following guide](./generation_strategies).

</Tip>

Parameters:
    input_features (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
        This is the featurized version of audio generated by `Pop2PianoFeatureExtractor`.
    attention_mask:
        For batched generation `input_features` are padded to have the same shape across all examples.
        `attention_mask` helps to determine which areas were padded and which were not.
        - 1 for tokens that are **not padded**,
        - 0 for tokens that are **padded**.
    composer (`str`, *optional*, defaults to `"composer1"`):
        This value is passed to `Pop2PianoConcatEmbeddingToMel` to generate different embeddings for each
        `"composer"`. Please make sure that the composet value is present in `composer_to_feature_token` in
        `generation_config`. For an example please see
        https://huggingface.co/sweetcocoa/pop2piano/blob/main/generation_config.json .
    generation_config (`~generation.GenerationConfig`, *optional*):
        The generation configuration to be used as base parametrization for the generation call. `**kwargs`
        passed to generate matching the attributes of `generation_config` will override them. If
        `generation_config` is not provided, the default will be used, which had the following loading
        priority: 1) from the `generation_config.json` model file, if it exists; 2) from the model
        configuration. Please note that unspecified parameters will inherit [`~generation.GenerationConfig`]'s
        default values, whose documentation should be checked to parameterize generation.
    kwargs:
        Ad hoc parametrization of `generate_config` and/or additional model-specific kwargs that will be
        forwarded to the `forward` function of the model. If the model is an encoder-decoder model, encoder
        specific kwargs should not be prefixed and decoder specific kwargs should be prefixed with *decoder_*.
Return:
    [`~utils.ModelOutput`] or `torch.LongTensor`: A [`~utils.ModelOutput`] (if `return_dict_in_generate=True`
    or when `config.return_dict_in_generate=True`) or a `torch.FloatTensor`.
        Since Pop2Piano is an encoder-decoder model (`model.config.is_encoder_decoder=True`), the possible
        [`~utils.ModelOutput`] types are:
            - [`~generation.GenerateEncoderDecoderOutput`],
            - [`~generation.GenerateBeamEncoderDecoderOutput`]
Nr  z`composer_to_feature_token` was not found! Please refer to https://huggingface.co/sweetcocoa/pop2piano/blob/main/generation_config.jsonand parse a dict like that.ztconfig.composer_vocab_size must be same as the number of keys in generation_config.composer_to_feature_token! Found z vs .)r  r   r  r  )inputsrd  r   r  rG  )r  r   r2  r=  r   r  rL   r  r  r$   generate)r+   r  r   r  r  r  r.   s         r/   r  *Pop2PianoForConditionalGeneration.generate  s    l $ $ 6 6  *6* (*EFF.   ::;t{{?^?^^889cBSBmBm>n=oopr  *.)I)I))/	 *J *
& w 
()/	

 
 	
r1   c                 $    U R                  U5      $ r^   )rE  )r+   r  s     r/   %prepare_decoder_input_ids_from_labelsGPop2PianoForConditionalGeneration.prepare_decoder_input_ids_from_labels*  s      ((r1   )rL   r  r  r(  r  r  r1  r^   )NNNNNNNNNNNNNNNNNN)N	composer1N) rB   rC   rD   rE   _tied_weights_keysr   r%   r  r_  r  r  r'   FloatTensorstrr   r   r  r   
LongTensor
BoolTensorr`   rx  r
   r   r   r   r@   no_gradr  r  rF   rG   rH   s   @r/   r0  r0    s    j 6:
 7;/$))/$ /$ ,	/$
 !!2!23/$b  156:8<=A159=7;@D+/596:=A-1$(,0/3&*59'B
E,,-B
 !!2!23B
 $E$4$45	B

 !))9)9 :B
 E--.B
 $E$5$56B
 'u||4B
 "%ell(;"<=B
 "%B
   1 12B
 !!2!23B
  ((9(9:B
 ))*B
 D>B
  $D>!B
" 'tn#B
$ d^%B
& !!1!12'B
( 
uU&&'8	9)B
 B
H ]]_ W
 W
r)ELL ) )r1   r0  )Er  r  r   typingr   r   r'   r   torch.nnr   transformers.generationr   activationsr	   cache_utilsr
   r   r   
generationr   modeling_attn_mask_utilsr   modeling_layersr   modeling_outputsr   r   r   modeling_utilsr   pytorch_utilsr   r   utilsr   r   r   r   r   configuration_pop2pianor   !torch.nn.attention.flex_attentionr   integrations.flex_attentionr   
get_loggerrB   r   _load_pop2piano_layer_normapex.normalizationr   infoImportError	ExceptionwarningModuler!   rJ   re   rp   r}   r   r  r
  r"  rP  r-  r0  __all__rG  r1   r/   <module>r     s      "   % 4 ! C C ) > 9 k k - Q w w 4  !!;J 
		H	%! 	/!&
KKhi+ +2 "%RYY .")) <ryy &d dP!")) !J#299 #N`
/ `
F P! P! P!fG- GT
BII  
})(@/ })
})@
 /0J
Ko'  	 	
NN^_	s   $F. .G6GG