ó
    <±hIz  ã                   ó  • S r SSKrSSKJrJr  SSKrSSKrSSKJr  SSKJ	r	  SSK
Jr  SSKJr  SS	KJrJrJr  SS
KJrJrJr  SSKJr  SSKJrJr  SSKJrJrJrJrJ r   SSK!J"r"  SSK#J$r$  SSK%J&r&  \" 5       (       a  SSK'J(r(J)r)   " S S\ 5      r*\ " S S\5      5       r+ " S S\5      r, " S S\5      r-\ " S S\+5      5       r.\" SS9 " S  S!\+\5      5       r/ " S" S#\5      r0 " S$ S%\"5      r1 " S& S'\5      r2/ S(Qr3g))zPyTorch PLBART model.é    N)ÚOptionalÚUnion)Únn)ÚCrossEntropyLossé   )ÚCache)ÚGenerationMixin)ÚAttentionMaskConverterÚ_prepare_4d_attention_maskÚ#_prepare_4d_attention_mask_for_sdpa)ÚBaseModelOutputÚSeq2SeqLMOutputÚSeq2SeqModelOutput)ÚPreTrainedModel)Úauto_docstringÚis_torch_flex_attn_availableé   )ÚBartClassificationHeadÚBartDecoderÚBartEncoderÚBartForCausalLMÚBartScaledWordEmbedding)Ú'BigBirdPegasusForSequenceClassification)Úshift_tokens_righté   )ÚPLBartConfig)Ú	BlockMaskÚmake_flex_block_causal_maskc                   ó   • \ rS rSrSrg)ÚPLBartScaledWordEmbeddingé7   © N©Ú__name__Ú
__module__Ú__qualname__Ú__firstlineno__Ú__static_attributes__r"   ó    Úa/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/plbart/modular_plbart.pyr    r    7   ó   † Úr)   r    c                   óÌ  • \ rS rSr% \\S'   SrSrSS/rSr	Sr
SrS\\R                  S4   S	\R                  4S
 jrS\\\R                  S4      S\R                  S\R                  S\4S jr\S\R                  S\S\S\R*                  S\R                  S\4S j5       rS\\R                  S4   S\\R                  S4   S\R.                  S	\R                  4S jrSrg)ÚPLBartPreTrainedModelé;   ÚconfigÚmodelTÚPLBartDecoderLayerÚPLBartEncoderLayerÚattention_maskNÚinputs_embedsc                 ór  • Ub³  U R                   R                  S:X  a  SU;   a  UnU$ S nU$ U R                   R                  S:X  a  [        XR                  5      nU$ U R                   R                  S:X  a+  [	        U[
        R                  5      (       a
  [        USS9nU$ [        XR                  5      nU$ )NÚflash_attention_2r   ÚsdpaÚflex_attentionF)Ú	is_causal©	r/   Ú_attn_implementationr   ÚdtypeÚ
isinstanceÚtorchÚTensorr   r   )Úselfr3   r4   s      r*   Ú_update_full_maskÚ'PLBartPreTrainedModel._update_full_maskF   sÄ   € ð
 Ñ%Ø{‰{×/Ñ/Ð3FÓFØ34¸Ó3F ð Ðð MQð Ðð —‘×1Ñ1°VÓ;ô "EÀ^×UhÑUhÓ!ið Ðð —‘×1Ñ1Ð5EÓEÜ˜n¬e¯l©l×;Ñ;Ü%@ÀÐ[`Ñ%aNð
 Ðô "<¸N×L_ÑL_Ó!`àÐr)   r   Úinput_tensorÚcache_positionÚpast_key_valuesc           	      ó\  • U R                   R                  S:X  au  [        U[        R                  5      (       a  [        U5      nU$ UcD  [        [        R                  " UR                  S   UR                  S   4UR                  S95      nU$ U R                   R                  S:X  a  Ub  US:H  R                  5       (       a  U$ g Ub  UR                  5       OSnUb  UR                  OSnU R                   R                  S:X  a.  U(       d'  [        R                  " UUUU R                  S	9(       a  g UR                  nUR                  S   nU(       a  UR!                  5       n	O5[        U[        R                  5      (       a  UR                  S
   OXX-   S-   n	U R#                  UUU	UUUR                  S   S9n
U R                   R                  S:X  aS  UbP  UR                  R$                  S;   a6  [        R&                  " U5      R(                  n[        R*                  " X«5      n
U
$ )Nr8   r   r   )ÚsizeÚdevicer6   g        Fr7   )r4   Úpast_key_values_lengthÚis_trainingéÿÿÿÿ)Úsequence_lengthÚtarget_lengthr<   rD   Ú
batch_size)ÚcudaÚxpuÚnpu)r/   r;   r=   r>   r?   r   ÚonesÚshaperH   ÚanyÚget_seq_lengthÚis_compileabler
   Ú_ignore_causal_mask_sdpaÚtrainingr<   Úget_max_cache_shapeÚ5_prepare_4d_causal_attention_mask_with_cache_positionÚtypeÚfinfoÚminÚ_unmask_unattended)r@   r3   rC   rD   rE   Úpast_seen_tokensÚusing_compilable_cacher<   rL   rM   Úcausal_maskÚ	min_dtypes               r*   Ú_update_causal_maskÚ)PLBartPreTrainedModel._update_causal_mask]   s  € ð ;‰;×+Ñ+Ð/?Ó?Ü˜.¬%¯,©,×7Ñ7Ü!<¸^Ó!Lð "Ð!ð  Ñ'Ü!<Ü—J’JØ*×0Ñ0°Ñ3°\×5GÑ5GÈÑ5JÐKØ-×4Ñ4ñó"ð "Ð!à;‰;×+Ñ+Ð/BÓBØÑ)¨~ÀÑ/D×.IÑ.I×.KÑ.KØ%Ð%Øð
 @OÑ?Z˜?×9Ñ9Ô;Ð`aÐØCRÑC^ ×!?Ò!?ÐdiÐð ;‰;×+Ñ+¨vÓ5Ö>TÜ%×>Ò>ØØ*Ø'7Ø ŸM™M÷	ð à×"Ñ"ˆØ&×,Ñ,¨QÑ/ˆÞ!Ø+×?Ñ?ÓA‰Mô ˜n¬e¯l©l×;Ñ;ð ×$Ñ$ RÒ(à%Ñ7¸!Ñ;ð ð ×PÑPØØ+Ø'ØØ)Ø#×)Ñ)¨!Ñ,ð Qð 
ˆð K‰K×,Ñ,°Ó6ØÑ*Ø×%Ñ%×*Ñ*Ð.DÓDô
 Ÿš EÓ*×.Ñ.ˆIÜ0×CÒCÀKÓ[ˆKàÐr)   rL   rM   r<   rN   c                 ó¶  • U b  U R                  5       S:X  a  U nU$ [        R                  " U5      R                  n[        R                  " X4XƒUR
                  S9nUS:w  a  [        R                  " USS9nU[        R                  " X$R
                  S9UR                  SS5      :„  -  nUSSSS2SS24   R                  USSS5      nU b‹  UR                  5       nU R                  S   n	USS2SS2SS2SU	24   U SS2SSSS24   R                  UR
                  5      -   n
U
S:H  n
USS2SS2SS2SU	24   R                  X¨5      USS2SS2SS2SU	24'   U$ )	a½  
Creates a causal 4D mask of shape `(batch_size, 1, query_length, key_value_length)` from a 2D mask of shape
`(batch_size, key_value_length)`, or if the input `attention_mask` is already 4D, do nothing.

Args:
    attention_mask (`torch.Tensor`):
        A 2D attention mask of shape `(batch_size, key_value_length)` or a 4D attention mask of shape
        `(batch_size, 1, query_length, key_value_length)`.
    sequence_length (`int`):
        The sequence length being processed.
    target_length (`int`):
        The target length: when generating with static cache, the mask should be as long as the static cache,
        to account for the 0 padding, the part of the cache that is not filled yet.
    dtype (`torch.dtype`):
        The dtype to use for the 4D attention mask.
    cache_position (`torch.Tensor`):
        Indices depicting the position of the input sequence tokens in the sequence.
    batch_size (`torch.Tensor`):
        Batch size.
Né   )Ú
fill_valuer<   rH   r   )Údiagonal©rH   rK   r   )Údimr>   r\   r]   ÚfullrH   ÚtriuÚarangeÚreshapeÚexpandÚclonerS   ÚtoÚmasked_fill)r3   rL   rM   r<   rD   rN   Úkwargsra   rb   Úmask_lengthÚpadding_masks              r*   rZ   ÚKPLBartPreTrainedModel._prepare_4d_causal_attention_mask_with_cache_position©   s}  € ð> Ñ%¨.×*<Ñ*<Ó*>À!Ó*Cà(ˆKð* Ðô' Ÿš EÓ*×.Ñ.ˆIÜŸ*š*Ø Ð0¸YÐ\j×\qÑ\qñˆKð  !Ó#Ü#Ÿjšj¨¸qÑAØœ5Ÿ<š<¨×>SÑ>SÑTÐWe×WmÑWmÐnpÐrsÓWtÑtÑtˆKØ% d¨D²!²QÐ&6Ñ7×>Ñ>¸zÈ1ÈbÐRTÓUˆKØÑ)Ø)×/Ñ/Ó1Ø,×2Ñ2°2Ñ6Ø*ª1ªa²°L°[°LÐ+@ÑAÀNÒSTÐVZÐ\`ÒbcÐScÑDd×DgÑDgØ×&Ñ&óEñ  ð  ,¨qÑ0Ø5@ÂÂAÂqÈ,È;È,ÐAVÑ5W×5cÑ5cØ ó6šAšq¢! \ k \Ð1Ñ2ð Ðr)   Úencoder_hidden_statesÚencoder_attention_maskÚinput_shapec                 óŠ  • Ub¿  Ub¼  U R                   R                  S:X  a  SU;   a  UnU$ S nU$ U R                   R                  S:X  a  [        UUR                  US   S9nU$ U R                   R                  S:X  a/  [	        U[
        R                  5      (       a  [        UUS   SS9nU$ [        X$R                  US   S9nU$ )	Nr6   r   r7   rK   )Útgt_lenr8   F)Úquery_lengthr9   r:   )r@   rw   rx   ry   r4   s        r*   Ú_update_cross_attn_maskÚ-PLBartPreTrainedModel._update_cross_attn_maskâ   sö   € ð !Ñ,Ð1GÑ1SØ{‰{×/Ñ/Ð3FÓFØCDÐH^ÓC^Ð)?Ð&ð. &Ð%ð/ eiÐ&ð. &Ð%ð- —‘×1Ñ1°VÓ;ô *MØ*Ø!×'Ñ'Ø'¨™Oñ*Ð&ð$ &Ð%ð —‘×1Ñ1Ð5EÓEÜÐ4´e·l±l×CÑCÜ-HØ.Ø%0°¡_Ø"'ñ.Ð*ð &Ð%ô	 *DØ*×,?Ñ,?ÈÐUWÉñ*Ð&ð &Ð%r)   r"   )r$   r%   r&   r'   r   Ú__annotations__Úbase_model_prefixÚsupports_gradient_checkpointingÚ_no_split_modulesÚ_supports_flash_attnÚ_supports_sdpaÚ_supports_flex_attnr   r>   r?   rA   r   r   rc   ÚstaticmethodÚintr<   rZ   ÚSizer}   r(   r"   r)   r*   r-   r-   ;   sN  ‡ àÓØÐØ&*Ð#Ø-Ð/CÐDÐØÐØ€NØÐðà˜eŸl™l¨DÐ0Ñ1ðð —|‘|ôð.Jà   u§|¡|°[Ð'@Ñ!AÑBðJð —l‘lðJð Ÿ™ð	Jð
 ôJðX ð4ØŸ™ð4àð4ð ð4ð {‰{ð	4ð
 Ÿ™ð4ð ó4ó ð4ðn!&à$ U§\¡\°4Ð%7Ñ8ð!&ð !& e§l¡l°DÐ&8Ñ 9ð!&ð —Z‘Zð	!&ð
 —|‘|÷!&r)   r-   c                   ó   • \ rS rSrSrg)ÚPLBartEncoderi  r"   Nr#   r"   r)   r*   rŠ   rŠ     r+   r)   rŠ   c                   ó   • \ rS rSrSrg)ÚPLBartDecoderi
  r"   Nr#   r"   r)   r*   rŒ   rŒ   
  r+   r)   rŒ   c            &       ó8  ^ • \ rS rSrSS/rS\4U 4S jjrS rS rS r	S	 r
S
 r\                SS\\R                     S\\R                     S\\R                     S\\R                      S\\R                      S\\R                     S\\R                      S\\\R$                        S\\   S\\R$                     S\\R$                     S\\   S\\   S\\   S\\   S\\R                     S\\\R                      \4   4"S jj5       rSrU =r$ )ÚPLBartModeli  úencoder.embed_tokens.weightúdecoder.embed_tokens.weightr/   c                 ór  >• [         TU ]  U5        UR                  UR                  p2UR                  (       a   [
        R                  " UR                  5      OSn[        X1R                  X$S9U l	        [        XR                  5      U l        [        XR                  5      U l        U R                  5         g )Ng      ð?)Úembed_scale)ÚsuperÚ__init__Úpad_token_idÚ
vocab_sizeÚscale_embeddingÚmathÚsqrtÚd_modelr    ÚsharedrŠ   ÚencoderrŒ   ÚdecoderÚinit_weights)r@   r/   Úpadding_idxr–   r’   Ú	__class__s        €r*   r”   ÚPLBartModel.__init__  s}   ø€ Ü‰Ñ˜Ô à"(×"5Ñ"5°v×7HÑ7HZØ39×3I×3I”d—i’i §¡Ô/ÈsˆÜ/°
¿N¹NÈKÑqˆŒä$ V¯[©[Ó9ˆŒÜ$ V¯[©[Ó9ˆŒà×ÑÕr)   c                 ó   • U R                   $ ©N)r›   ©r@   s    r*   Úget_input_embeddingsÚ PLBartModel.get_input_embeddings  s   € Ø{‰{Ðr)   c                 ó|   • Xl         U R                   U R                  l        U R                   U R                  l        g r£   )r›   rœ   Úembed_tokensr   )r@   Úvalues     r*   Úset_input_embeddingsÚ PLBartModel.set_input_embeddings!  s'   € ØŒØ$(§K¡Kˆ‰Ô!Ø$(§K¡Kˆ‰Õ!r)   c                 óü   • U R                   R                  (       aa  U R                  U R                  R                  U R
                  5        U R                  U R                  R                  U R
                  5        g g r£   )r/   Útie_word_embeddingsÚ_tie_or_clone_weightsrœ   r¨   r›   r   r¤   s    r*   Ú_tie_weightsÚPLBartModel._tie_weights&  sP   € Ø;‰;×*×*Ø×&Ñ& t§|¡|×'@Ñ'@À$Ç+Á+ÔNØ×&Ñ& t§|¡|×'@Ñ'@À$Ç+Á+ÕNð +r)   c                 ó   • U R                   $ r£   )rœ   r¤   s    r*   Úget_encoderÚPLBartModel.get_encoder+  ó   € Ø|‰|Ðr)   c                 ó   • U R                   $ r£   )r   r¤   s    r*   Úget_decoderÚPLBartModel.get_decoder.  r´   r)   Ú	input_idsr3   Údecoder_input_idsÚdecoder_attention_maskÚ	head_maskÚdecoder_head_maskÚcross_attn_head_maskÚencoder_outputsrE   r4   Údecoder_inputs_embedsÚ	use_cacheÚoutput_attentionsÚoutput_hidden_statesÚreturn_dictrD   Úreturnc                 ó  • Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nUc"  Uc  [        XR                   R                  5      nUc  U R                  UUUU
UUUS9nORU(       aK  [        U[        5      (       d6  [        US   [        U5      S:”  a  US   OS[        U5      S:”  a  US   OSS9nU R                  UUUS   UUUU	UUUUUUS9nU(       d  UU-   $ [        UR                  UR                  UR                  UR                   UR"                  UR                  UR                  UR                   S9$ )	a:  
decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
    Indices of decoder input sequence tokens in the vocabulary.

    Indices can be obtained using [`AutoTokenizer`] or [`PLBartMultiTokenizer`] depending on the checkpoint.
    See [`PreTrainedTokenizer.encode`] and [`PreTrainedTokenizer.__call__`] for details.

    [What are decoder input IDs?](../glossary#decoder-input-ids)

    PLBart uses a specific language id token as the starting token for `decoder_input_ids` generation that
    varies according to source and target language, *e.g.* 50003 for *en_XX*, and 50001 for *java*. If
    `past_key_values` is used, optionally only the last `decoder_input_ids` have to be input (see
    `past_key_values`).

    For translation and summarization training, `decoder_input_ids` should be provided. If no
    `decoder_input_ids` is provided, the model will create this tensor by shifting the `input_ids` to the right
    for denoising pre-training following the paper.
decoder_attention_mask (:
    obj:*torch.LongTensor* of shape `(batch_size, target_sequence_length)`, *optional*):
    Default behavior:
    generate a tensor that ignores pad tokens in `decoder_input_ids`. Causal mask will also be used by default.
cross_attn_head_mask (:
    obj:*torch.Tensor* of shape `(decoder_layers, decoder_attention_heads)`, *optional*):
    Mask to nullify
    selected heads of the cross-attention modules in the decoder. Mask values selected in `[0, 1]`:

    - 1 indicates the head is **not masked**,
    - 0 indicates the head is **masked**.
N)r¸   r3   r»   r4   rÁ   rÂ   rÃ   r   r   r   )Úlast_hidden_stateÚhidden_statesÚ
attentions)r¸   r3   rw   rx   r»   r½   rE   r4   rÀ   rÁ   rÂ   rÃ   rD   )rÆ   rE   Údecoder_hidden_statesÚdecoder_attentionsÚcross_attentionsÚencoder_last_hidden_staterw   Úencoder_attentions)r/   rÁ   rÂ   rÀ   Úuse_return_dictr   r•   rœ   r=   r   Úlenr   r   rÆ   rE   rÇ   rÈ   rË   )r@   r¸   r3   r¹   rº   r»   r¼   r½   r¾   rE   r4   r¿   rÀ   rÁ   rÂ   rÃ   rD   Údecoder_outputss                     r*   ÚforwardÚPLBartModel.forward1  s°  € ðb 2CÑ1NÑ-ÐTX×T_ÑT_×TqÑTqÐà$8Ñ$DÑ È$Ï+É+×JjÑJjð 	ð "+Ñ!6‘I¸D¿K¹K×<QÑ<Qˆ	Ø%0Ñ%<‘kÀ$Ç+Á+×B]ÑB]ˆð Ñ$Ð)>Ñ)FÜ 2°9¿k¹k×>VÑ>VÓ WÐàÑ"Ø"Ÿl™lØ#Ø-Ø#Ø+Ø"3Ø%9Ø'ð +ð ‰Oö ¤¨O¼_×!MÑ!MÜ-Ø"1°!Ñ"4Ü47¸Ó4HÈ1Ó4L˜o¨aÒ0ÐRVÜ14°_Ó1EÈÓ1I˜?¨1Ò-ÈtñˆOð Ÿ,™,Ø'Ø1Ø"1°!Ñ"4Ø#1Ø'Ø!5Ø+Ø/ØØ/Ø!5Ø#Ø)ð 'ð 
ˆö  Ø" _Ñ4Ð4ä!Ø-×?Ñ?Ø+×;Ñ;Ø"1×"?Ñ"?Ø.×9Ñ9Ø,×=Ñ=Ø&5×&GÑ&GØ"1×"?Ñ"?Ø.×9Ñ9ñ	
ð 		
r)   )r   rœ   r›   )NNNNNNNNNNNNNNNN)r$   r%   r&   r'   Ú_tied_weights_keysr   r”   r¥   rª   r¯   r²   r¶   r   r   r>   Ú
LongTensorr?   ÚlistÚFloatTensorr   Úboolr   Útupler   rÑ   r(   Ú__classcell__©r    s   @r*   rŽ   rŽ     sÖ  ø† à7Ð9VÐWÐð
˜|÷ 
òò0ò
Oò
òð ð 15Ø59Ø8<Ø9=Ø,0Ø8<Ø7;Ø=AØ+/Ø59Ø=AØ$(Ø,0Ø/3Ø&*Ø59ñ#k
à˜E×,Ñ,Ñ-ðk
ð ! ×!1Ñ!1Ñ2ðk
ð $ E×$4Ñ$4Ñ5ð	k
ð
 !)¨¯©Ñ 6ðk
ð ˜EŸL™LÑ)ðk
ð $ E×$4Ñ$4Ñ5ðk
ð ' u§|¡|Ñ4ðk
ð " $ u×'8Ñ'8Ñ"9Ñ:ðk
ð " %™ðk
ð   × 1Ñ 1Ñ2ðk
ð  (¨×(9Ñ(9Ñ:ðk
ð ˜D‘>ðk
ð $ D™>ðk
ð ' t™nðk
ð  ˜d‘^ð!k
ð" ! ×!1Ñ!1Ñ2ð#k
ð$ 
ˆuU—\‘\Ñ"Ð$6Ð6Ñ	7ô%k
ó ök
r)   rŽ   zv
    The PLBART Model with a language modeling head. Can be used for code-to-text, text-to-code and code-to-code.
    )Úcustom_introc            (       óÄ  ^ • \ rS rSrSrS/r/ SQrS\4U 4S jjrS r	S r
 S$S
\S\\   S\S\R                  4U 4S jjjrS
\SS	4S jr\                 S%S\\R(                     S\\R(                     S\\R(                     S\\R*                     S\\R*                     S\\R(                     S\\R*                     S\\\R.                        S\\   S\\R.                     S\\R.                     S\\R*                     S\\   S\\   S\\   S\\   S \\R(                     S\\\R*                     \4   4$S! jj5       rS\R*                  4S" jrS#rU =r$ )&ÚPLBartForConditionalGenerationi   r0   Úfinal_logits_bias)r   r   zlm_head.weightr/   c                 óv  >• [         TU ]  U5        [        U5      U l        U R	                  S[
        R                  " SU R                  R                  R                  45      5        [        R                  " UR                  U R                  R                  R                  SS9U l        U R                  5         g )NrÞ   r   F)Úbias)r“   r”   rŽ   r0   Úregister_bufferr>   Úzerosr›   Únum_embeddingsr   ÚLinearrš   Úlm_headrž   )r@   r/   r    s     €r*   r”   Ú'PLBartForConditionalGeneration.__init__ª  s€   ø€ Ü‰Ñ˜Ô Ü  Ó(ˆŒ
Ø×ÑÐ0´%·+²+¸qÀ$Ç*Á*×BSÑBS×BbÑBbÐ>cÓ2dÔeÜ—y’y §¡°·±×1BÑ1B×1QÑ1QÐX]Ñ^ˆŒà×ÑÕr)   c                 ó6   • U R                   R                  5       $ r£   )r0   r²   r¤   s    r*   r²   Ú*PLBartForConditionalGeneration.get_encoder²  ó   € Øz‰z×%Ñ%Ó'Ð'r)   c                 ó6   • U R                   R                  5       $ r£   )r0   r¶   r¤   s    r*   r¶   Ú*PLBartForConditionalGeneration.get_decoderµ  ré   r)   NÚnew_num_tokensÚpad_to_multiple_ofÚmean_resizingrÄ   c                 óx   >• [         TU ]  XU5      nU R                  UR                  R                  S   5        U$ )Nr   )r“   Úresize_token_embeddingsÚ_resize_final_logits_biasÚweightrS   )r@   rì   rí   rî   Únew_embeddingsr    s        €r*   rð   Ú6PLBartForConditionalGeneration.resize_token_embeddings¸  s<   ø€ ô ™Ñ8¸Ð]jÓkˆØ×&Ñ& ~×'<Ñ'<×'BÑ'BÀ1Ñ'EÔFØÐr)   c                 ó,  • U R                   R                  S   nX::  a  U R                   S S 2S U24   nON[        R                  " SX-
  4U R                   R                  S9n[        R
                  " U R                   U/SS9nU R                  SU5        g )NrK   r   ri   )rj   rÞ   )rÞ   rS   r>   râ   rH   Úcatrá   )r@   rì   Úold_num_tokensÚnew_biasÚ
extra_biass        r*   rñ   Ú8PLBartForConditionalGeneration._resize_final_logits_bias¿  sˆ   € Ø×/Ñ/×5Ñ5°bÑ9ˆØÓ+Ø×-Ñ-ªa°°.°Ð.@ÑA‰HäŸš a¨Ñ)HÐ%IÐRV×RhÑRh×RoÑRoÑpˆJÜ—y’y $×"8Ñ"8¸*Ð!EÈ1ÑMˆHØ×ÑÐ0°(Õ;r)   r¸   r3   r¹   rº   r»   r¼   r½   r¾   rE   r4   r¿   ÚlabelsrÀ   rÁ   rÂ   rÃ   rD   c                 ó¶  • Ub  UOU R                   R                  nUb%  Uc"  Uc  [        XÀR                   R                  5      nU R	                  UUUUUUUUU	U
UUUUUUS9nU R                  US   5      nUU R                  R                  UR                  5      -   nSnUbF  [        5       nU" UR                  SU R                   R                  5      UR                  S5      5      nU(       d  U4USS -   nUb  U4U-   $ U$ [        UUUR                  UR                  UR                  UR                   UR"                  UR$                  UR&                  S9	$ )aÃ
  
decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
    Indices of decoder input sequence tokens in the vocabulary.

    Indices can be obtained using [`AutoTokenizer`] or [`PLBartMultiTokenizer`] depending on the checkpoint.
    See [`PreTrainedTokenizer.encode`] and [`PreTrainedTokenizer.__call__`] for details.

    [What are decoder input IDs?](../glossary#decoder-input-ids)

    PLBart uses a specific language id token as the starting token for `decoder_input_ids` generation that
    varies according to source and target language, *e.g.* 50003 for *en_XX*, and 50001 for *java*. If
    `past_key_values` is used, optionally only the last `decoder_input_ids` have to be input (see
    `past_key_values`).

    For translation and summarization training, `decoder_input_ids` should be provided. If no
    `decoder_input_ids` is provided, the model will create this tensor by shifting the `input_ids` to the right
    for denoising pre-training following the paper.
decoder_attention_mask (:
    obj:*torch.LongTensor* of shape `(batch_size, target_sequence_length)`, *optional*):
    Default behavior:
    generate a tensor that ignores pad tokens in `decoder_input_ids`. Causal mask will also be used by default.
cross_attn_head_mask (:
    obj:*torch.Tensor* of shape `(decoder_layers, decoder_attention_heads)`, *optional*):
    Mask to nullify
    selected heads of the cross-attention modules in the decoder. Mask values selected in `[0, 1]`:

    - 1 indicates the head is **not masked**,
    - 0 indicates the head is **masked**.
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
    config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
    (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.

Example Mask-filling:

```python
>>> from transformers import AutoTokenizer, PLBartForConditionalGeneration

>>> model = PLBartForConditionalGeneration.from_pretrained("uclanlp/plbart-base")
>>> tokenizer = AutoTokenizer.from_pretrained("uclanlp/plbart-base")

>>> # en_XX is the language symbol id <LID> for English
>>> TXT = "<s> Is 0 the <mask> Fibonacci number ? </s> en_XX"
>>> input_ids = tokenizer([TXT], add_special_tokens=False, return_tensors="pt").input_ids

>>> logits = model(input_ids).logits
>>> masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item()
>>> probs = logits[0, masked_index].softmax(dim=0)
>>> values, predictions = probs.topk(5)

>>> tokenizer.decode(predictions).split()
['first', 'same', 'highest', 'result', 'number']
```
N)r3   r¹   r¾   rº   r»   r¼   r½   rE   r4   r¿   rÀ   rÁ   rÂ   rÃ   rD   r   rK   r   )	ÚlossÚlogitsrE   rÉ   rÊ   rË   rÌ   rw   rÍ   )r/   rÎ   r   r•   r0   rå   rÞ   rq   rH   r   Úviewr–   r   rE   rÉ   rÊ   rË   rÌ   rw   rÍ   )r@   r¸   r3   r¹   rº   r»   r¼   r½   r¾   rE   r4   r¿   rû   rÀ   rÁ   rÂ   rÃ   rD   ÚoutputsÚ	lm_logitsÚmasked_lm_lossÚloss_fctÚoutputs                          r*   rÑ   Ú&PLBartForConditionalGeneration.forwardÈ  s{  € ðV &1Ñ%<‘kÀ$Ç+Á+×B]ÑB]ˆàÑØ Ñ(Ð-BÑ-JÜ$6°v¿{¹{×?WÑ?WÓ$XÐ!à—*‘*ØØ)Ø/Ø+Ø#9ØØ/Ø!5Ø+Ø'Ø"7ØØ/Ø!5Ø#Ø)ð! ð 
ˆð$ —L‘L ¨¡Ó,ˆ	Ø × 6Ñ 6× 9Ñ 9¸)×:JÑ:JÓ KÑKˆ	àˆØÑÜ'Ó)ˆHÙ% i§n¡n°R¸¿¹×9OÑ9OÓ&PÐRX×R]ÑR]Ð^`ÓRaÓbˆNæØ\ G¨A¨B KÑ/ˆFØ3AÑ3M^Ð%¨Ñ.ÐYÐSYÐYäØØØ#×3Ñ3Ø")×"?Ñ"?Ø&×9Ñ9Ø$×5Ñ5Ø&-×&GÑ&GØ")×"?Ñ"?Ø&×9Ñ9ñ

ð 
	
r)   c                 ó@   • [        XR                  R                  5      $ r£   )r   r/   r•   )r@   rû   s     r*   Ú%prepare_decoder_input_ids_from_labelsÚDPLBartForConditionalGeneration.prepare_decoder_input_ids_from_labelsC  s   € Ü! &¯+©+×*BÑ*BÓCÐCr)   )rå   r0   )NT)NNNNNNNNNNNNNNNNN) r$   r%   r&   r'   r€   Ú_keys_to_ignore_on_load_missingrÓ   r   r”   r²   r¶   r‡   r   r×   r   Ú	Embeddingrð   rñ   r   r>   rÔ   r?   rÕ   rÖ   r   r   rØ   r   rÑ   r  r(   rÙ   rÚ   s   @r*   rÝ   rÝ      sL  ø† ð  ÐØ':Ð&;Ð#ÚiÐð˜|÷ ò(ò(ð dhñØ!ðØ7?À±}ðØ\`ðà	‰÷ð ð<¸ð <Àô <ð ð 15Ø59Ø8<Ø9=Ø,0Ø8<Ø7;Ø=AØ+/Ø59Ø=AØ)-Ø$(Ø,0Ø/3Ø&*Ø59ñ%x
à˜E×,Ñ,Ñ-ðx
ð ! ×!1Ñ!1Ñ2ðx
ð $ E×$4Ñ$4Ñ5ð	x
ð
 !)¨¯©Ñ 6ðx
ð ˜EŸL™LÑ)ðx
ð $ E×$4Ñ$4Ñ5ðx
ð ' u§|¡|Ñ4ðx
ð " $ u×'8Ñ'8Ñ"9Ñ:ðx
ð " %™ðx
ð   × 1Ñ 1Ñ2ðx
ð  (¨×(9Ñ(9Ñ:ðx
ð ˜Ÿ™Ñ&ðx
ð ˜D‘>ðx
ð $ D™>ðx
ð  ' t™nð!x
ð" ˜d‘^ð#x
ð$ ! ×!1Ñ!1Ñ2ð%x
ð& 
ˆuU—\‘\Ñ" OÐ3Ñ	4ô'x
ó ðx
ðtD¸E¿L¹L÷ Dò Dr)   rÝ   c                   ó   • \ rS rSrSrg)ÚPLBartClassificationHeadiG  r"   Nr#   r"   r)   r*   r  r  G  r+   r)   r  c                   ó(   ^ • \ rS rSrU 4S jrSrU =r$ )ÚPLBartForSequenceClassificationiK  c                  ó:   >• [        5       R                  " S0 U D6  g)aL  
decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
    Indices of decoder input sequence tokens in the vocabulary.

    Indices can be obtained using [`AutoTokenizer`] or [`PLBartMultiTokenizer`] depending on the checkpoint.
    See [`PreTrainedTokenizer.encode`] and [`PreTrainedTokenizer.__call__`] for details.

    [What are decoder input IDs?](../glossary#decoder-input-ids)

    PLBart uses a specific language id token as the starting token for `decoder_input_ids` generation that
    varies according to source and target language, *e.g.* 50003 for *en_XX*, and 50001 for *java*. If
    `past_key_values` is used, optionally only the last `decoder_input_ids` have to be input (see
    `past_key_values`).

    For translation and summarization training, `decoder_input_ids` should be provided. If no
    `decoder_input_ids` is provided, the model will create this tensor by shifting the `input_ids` to the right
    for denoising pre-training following the paper.
decoder_attention_mask (:
    obj:*torch.LongTensor* of shape `(batch_size, target_sequence_length)`, *optional*):
    Default behavior:
    generate a tensor that ignores pad tokens in `decoder_input_ids`. Causal mask will also be used by default.
cross_attn_head_mask (:
    obj:*torch.Tensor* of shape `(decoder_layers, decoder_attention_heads)`, *optional*):
    Mask to nullify
    selected heads of the cross-attention modules in the decoder. Mask values selected in `[0, 1]`:

    - 1 indicates the head is **not masked**,
    - 0 indicates the head is **masked**.
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
Nr"   ©r“   rÑ   ©Úsuper_kwargsr    s    €r*   rÑ   Ú'PLBartForSequenceClassification.forwardL  s   ø€ ôB 	‹ŠÑ'˜,Ó'r)   r"   )r$   r%   r&   r'   rÑ   r(   rÙ   rÚ   s   @r*   r  r  K  s   ø† ÷!(ó !(r)   r  c                   ó2   ^ • \ rS rSr\U 4S j5       rSrU =r$ )ÚPLBartForCausalLMip  c                  ó:   >• [        5       R                  " S0 U D6  g)aÿ  
cross_attn_head_mask (`torch.Tensor` of shape `(decoder_layers, decoder_attention_heads)`, *optional*):
    Mask to nullify selected heads of the cross-attention modules. Mask values selected in `[0, 1]`:

    - 1 indicates the head is **not masked**,
    - 0 indicates the head is **masked**.
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
    config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
    (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.

Example:

```python
>>> from transformers import AutoTokenizer, PLBartForCausalLM

>>> tokenizer = AutoTokenizer.from_pretrained("uclanlp/plbart-base")
>>> model = PLBartForCausalLM.from_pretrained("uclanlp/plbart-base", add_cross_attention=False)
>>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder."
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs)

>>> logits = outputs.logits
>>> expected_shape = [1, inputs.input_ids.shape[-1], model.config.vocab_size]
>>> list(logits.shape) == expected_shape
True
```Nr"   r  r  s    €r*   rÑ   ÚPLBartForCausalLM.forwardq  s   ø€ ô: 	‹ŠÑ'˜,Ó'r)   r"   )r$   r%   r&   r'   r   rÑ   r(   rÙ   rÚ   s   @r*   r  r  p  s   ø† Øô(ó ö(r)   r  )r  rÝ   r  rŽ   r-   )4Ú__doc__r˜   Útypingr   r   r>   Útorch.utils.checkpointr   Útorch.nnr   Úcache_utilsr   Ú
generationr	   Úmodeling_attn_mask_utilsr
   r   r   Úmodeling_outputsr   r   r   Úmodeling_utilsr   Úutilsr   r   Úbart.modeling_bartr   r   r   r   r   Ú(bigbird_pegasus.modeling_bigbird_pegasusr   Úmbart.modeling_mbartr   Úconfiguration_plbartr   Úintegrations.flex_attentionr   r   r    r-   rŠ   rŒ   rŽ   rÝ   r  r  r  Ú__all__r"   r)   r*   Ú<module>r(     s$  ðñ ã ß "ã Û Ý Ý %å  Ý )÷ñ ÷
ñ õ
 .ß A÷õ õ _Ý 5Ý .ñ  ×!Ñ!ßUô	Ð 7ô 	ð ôG&˜Oó G&ó ðG&ôT	Kô 	ô	Kô 	ð ôN
Ð'ó N
ó ðN
ñb ðñô
_DÐ%:¸Oó _Dóð
_DôD	Ð5ô 	ô"(Ð&Mô "(ôJ(˜ô (òBr)   