ó
    <±h­!  ã                   óš   • S r SSKJr  SSKJr  SSKJr  SSKJr  SSK	J
r
  \
R                  " \5      r " S S	\5      r " S
 S\5      rS	/rg)zPLBART model configurationé    ©ÚOrderedDict)ÚMappingé   )ÚPretrainedConfig)ÚOnnxConfigWithPast)Úloggingc                   óv   ^ • \ rS rSrSrSrS/rSSSS.r                        SU 4S	 jjrS
r	U =r
$ )ÚPLBartConfigé   a  
This is the configuration class to store the configuration of a [`PLBartModel`]. It is used to instantiate an
PLBART model according to the specified arguments, defining the model architecture. Instantiating a configuration
with the defaults will yield a similar configuration to that of the PLBART
[uclanlp/plbart-base](https://huggingface.co/uclanlp/plbart-base) architecture.

Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.


Args:
    vocab_size (`int`, *optional*, defaults to 50005):
        Vocabulary size of the PLBART model. Defines the number of different tokens that can be represented by the
        `inputs_ids` passed when calling [`PLBartModel`].
    d_model (`int`, *optional*, defaults to 768):
        Dimensionality of the layers and the pooler layer.
    encoder_layers (`int`, *optional*, defaults to 6):
        Number of encoder layers.
    decoder_layers (`int`, *optional*, defaults to 6):
        Number of decoder layers.
    encoder_attention_heads (`int`, *optional*, defaults to 12):
        Number of attention heads for each attention layer in the Transformer encoder.
    decoder_attention_heads (`int`, *optional*, defaults to 12):
        Number of attention heads for each attention layer in the Transformer decoder.
    decoder_ffn_dim (`int`, *optional*, defaults to 3072):
        Dimensionality of the "intermediate" (often named feed-forward) layer in decoder.
    encoder_ffn_dim (`int`, *optional*, defaults to 3072):
        Dimensionality of the "intermediate" (often named feed-forward) layer in decoder.
    activation_function (`str` or `function`, *optional*, defaults to `"gelu"`):
        The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
        `"relu"`, `"silu"` and `"gelu_new"` are supported.
    dropout (`float`, *optional*, defaults to 0.1):
        The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
    attention_dropout (`float`, *optional*, defaults to 0.1):
        The dropout ratio for the attention probabilities.
    activation_dropout (`float`, *optional*, defaults to 0.0):
        The dropout ratio for activations inside the fully connected layer.
    classifier_dropout (`float`, *optional*, defaults to 0.0):
        The dropout ratio for classifier.
    max_position_embeddings (`int`, *optional*, defaults to 1024):
        The maximum sequence length that this model might ever be used with. Typically set this to something large
        just in case (e.g., 512 or 1024 or 2048).
    init_std (`float`, *optional*, defaults to 0.02):
        The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
    encoder_layerdrop (`float`, *optional*, defaults to 0.0):
        The LayerDrop probability for the encoder. See the [LayerDrop paper](see https://huggingface.co/papers/1909.11556)
        for more details.
    decoder_layerdrop (`float`, *optional*, defaults to 0.0):
        The LayerDrop probability for the decoder. See the [LayerDrop paper](see https://huggingface.co/papers/1909.11556)
        for more details.
    scale_embedding (`bool`, *optional*, defaults to `True`):
        Scale embeddings by diving by sqrt(d_model).
    use_cache (`bool`, *optional*, defaults to `True`):
        Whether or not the model should return the last key/values attentions (not used by all models)
    forced_eos_token_id (`int`, *optional*, defaults to 2):
        The id of the token to force as the last generated token when `max_length` is reached. Usually set to
        `eos_token_id`.

Example:

```python
>>> from transformers import PLBartConfig, PLBartModel

>>> # Initializing a PLBART uclanlp/plbart-base style configuration
>>> configuration = PLBartConfig()

>>> # Initializing a model (with random weights) from the uclanlp/plbart-base style configuration
>>> model = PLBartModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```ÚplbartÚpast_key_valuesÚencoder_attention_headsÚd_modelÚinit_std)Únum_attention_headsÚhidden_sizeÚinitializer_rangec           	      ó,  >• Xl         X l        Xàl        X@l        X0l        XPl        Xpl        X`l        X€l        Xðl	        UU l
        UU l        XÐl        UU l        Xl        X l        UU l        X°l        X0l        UU l        [(        TU ]T  " SUUUUUS.UD6  g )N)Úpad_token_idÚbos_token_idÚeos_token_idÚis_encoder_decoderÚforced_eos_token_id© )Ú
vocab_sizeÚmax_position_embeddingsr   Úencoder_ffn_dimÚencoder_layersr   Údecoder_ffn_dimÚdecoder_layersÚdecoder_attention_headsÚdropoutÚattention_dropoutÚactivation_dropoutÚactivation_functionr   Úencoder_layerdropÚdecoder_layerdropÚclassifier_dropoutÚ	use_cacheÚnum_hidden_layersÚscale_embeddingÚsuperÚ__init__)Úselfr   r   r   r   r   r!   r    r"   r'   r(   r*   r   r&   r   r#   r$   r%   r   r)   r,   r   r   r   r   ÚkwargsÚ	__class__s                             €Úg/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/plbart/configuration_plbart.pyr.   ÚPLBartConfig.__init__n   s­   ø€ ð8 %ŒØ'>Ô$ØŒØ.ÔØ,ÔØ'>Ô$Ø.ÔØ,ÔØ'>Ô$ØŒØ!2ˆÔØ"4ˆÔØ#6Ô Ø ˆŒØ!2ÔØ!2ÔØ"4ˆÔØ"ŒØ!/ÔØ.ˆÔÜ‰Òð 	
Ø%Ø%Ø%Ø1Ø 3ñ	
ð ó	
ó    )r%   r&   r$   r)   r   r"   r    r(   r!   r#   r   r   r'   r   r   r   r+   r,   r*   r   )iUÃ  i   é   é   é   r5   r6   r7   ç        r8   TTÚgelui   çš™™™™™¹?r:   r8   g{®Gáz”?r8   Té   r   é   r<   )Ú__name__Ú
__module__Ú__qualname__Ú__firstlineno__Ú__doc__Ú
model_typeÚkeys_to_ignore_at_inferenceÚattribute_mapr.   Ú__static_attributes__Ú__classcell__)r1   s   @r2   r   r      s~   ø† ñGðR €JØ#4Ð"5Ðà8Ø Ø'ñ€Mð Ø $ØØØ "ØØØ "ØØØØØ"ØØØØØØØØØØØ÷37
õ 7
r4   r   c                   ól   • \ rS rSr\S\\\\\4   4   4S j5       r\S\\\\\4   4   4S j5       r	Sr
g)ÚPLBartOnnxConfigé¨   Úreturnc                 ó0   • [        SSSS.4SSSS.4/5      $ )NÚ	input_idsÚbatchÚsequence©r   r;   Úattention_maskr   ©r/   s    r2   ÚinputsÚPLBartOnnxConfig.inputs©   s.   € äà '¨jÑ9Ð:Ø! w°:Ñ#>Ð?ðó
ð 	
r4   c                 óŒ   • U R                   (       a  [        SSSS.4SSSS.4SSSS.4/5      $ [        SSSS.4SSSS.4/5      $ )NÚlast_hidden_staterM   rN   rO   Ú	past_keys)r   r<   Úencoder_last_hidden_state)Úuse_pastr   rQ   s    r2   ÚoutputsÚPLBartOnnxConfig.outputs²   sp   € à==Üà(¨g¸*Ñ*EÐFØ  g°*Ñ"=Ð>Ø0°gÀ*Ñ2MÐNðóð ô à(¨g¸*Ñ*EÐFØ0°gÀ*Ñ2MÐNðóð r4   r   N)r=   r>   r?   r@   Úpropertyr   ÚstrÚintrR   rY   rE   r   r4   r2   rH   rH   ¨   s\   † Øð
˜  W¨S°#¨XÑ%6Ð 6Ñ7ó 
ó ð
ð ð˜  g¨c°3¨hÑ&7Ð!7Ñ8ó ó ór4   rH   N)rA   Úcollectionsr   Úcollections.abcr   Úconfiguration_utilsr   Úonnxr   Úutilsr	   Ú
get_loggerr=   Úloggerr   rH   Ú__all__r   r4   r2   Ú<module>rf      sR   ðñ !å #Ý #å 3Ý &Ý ð 
×	Ò	˜HÓ	%€ôI
Ð#ô I
ôXÐ)ô ð: Ð
r4   