ó
    <±h46  ã                   ó‚   • S r SSKJr  SSKJr  SSKJr  \R                  " \5      r	 " S S\5      r
 " S S\5      rS/rg	)
zEvolla model configurationé   )ÚPretrainedConfig)Úrope_config_validation)Úloggingc                   óP   ^ • \ rS rSrSr                SU 4S jjrSrU =r$ )ÚSaProtConfigé   aB
  This is the configuration class to store the configuration of a [`EvollaSaProtProteinEncoder`]. It is used to instantiate a
SaProt model according to the specified arguments, defining the model architecture.

Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.

Args:
    vocab_size (`int`, *optional*, defaults to 446):
        Vocabulary size of the protein sequence model. Defines the number of different tokens that can be represented
        by the `inputs_ids` passed when calling [`EvollaModel`].
    mask_token_id (`int`, *optional*, defaults to 4):
        The id of the *mask* token in the protein sequence model.
    pad_token_id (`int`, *optional*, defaults to 1):
        The id of the *padding* token in the protein sequence model.
    hidden_size (`int`, *optional*, defaults to 1280):
        Dimensionality of the protein sequence model layers and the pooler layer.
    num_hidden_layers (`int`, *optional*, defaults to 33):
        Number of hidden layers in the protein sequence model.
    num_attention_heads (`int`, *optional*, defaults to 20):
        Number of attention heads for each attention layer in the protein sequence model.
    intermediate_size (`int`, *optional*, defaults to 5120):
        Dimensionality of the intermediate layers in the protein sequence model.
    hidden_dropout_prob (`float`, *optional*, defaults to 0.1):
        The dropout ratio for the hidden layers in the protein sequence model.
    attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
        The dropout ratio for the attention probabilities in the protein sequence model.
    max_position_embeddings (`int`, *optional*, defaults to 1026):
        The maximum sequence length that the protein sequence model might ever be used with. Typically set this to
        something large just in case (e.g., 512 or 1024 or 2048).
    layer_norm_eps (`float`, *optional*, defaults to 1e-05):
        The epsilon value for the layer normalization layer in the protein sequence model.
    position_embedding_type (`str`, *optional*, defaults to `"rotary"`):
        The type of position embedding to use in the protein sequence model. Currently only `"rotary"` is supported.
    emb_layer_norm_before (`bool`, *optional*, defaults to `False`):
        Whether to apply layer normalization before the position embedding in the protein sequence model.
    token_dropout (`bool`, *optional*, defaults to `True`):
        Whether to apply dropout to the tokens in the protein sequence model.c                 óÔ   >• [         TU ]  " SX2S.UD6  Xl        X@l        XPl        X`l        Xpl        X€l        Xl        X l	        X°l
        XÀl        XÐl        Xàl        Xðl        UU l        g )N)Úpad_token_idÚmask_token_id© )ÚsuperÚ__init__Ú
vocab_sizeÚhidden_sizeÚnum_hidden_layersÚnum_attention_headsÚintermediate_sizeÚhidden_dropout_probÚattention_probs_dropout_probÚmax_position_embeddingsÚinitializer_rangeÚlayer_norm_epsÚposition_embedding_typeÚ	use_cacheÚemb_layer_norm_beforeÚtoken_dropout)Úselfr   r   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   ÚkwargsÚ	__class__s                     €Úg/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/evolla/configuration_evolla.pyr   ÚSaProtConfig.__init__@   so   ø€ ô( 	‰ÒÐZ lÑZÐSYÒZà$ŒØ&ÔØ!2ÔØ#6Ô Ø!2ÔØ#6Ô Ø,HÔ)Ø'>Ô$Ø!2ÔØ,ÔØ'>Ô$Ø"ŒØ%:Ô"Ø*ˆÕó    )r   r   r   r   r   r   r   r   r   r   r   r   r   r   )i¾  é   é   i   é!   é   i   çš™™™™™¹?r'   i  ç{®Gáz”?çñhãˆµøä>ÚrotaryTFT)Ú__name__Ú
__module__Ú__qualname__Ú__firstlineno__Ú__doc__r   Ú__static_attributes__Ú__classcell__©r   s   @r    r   r      sF   ø† ñ$UðP ØØØØØØØØ%(Ø $ØØØ (ØØ#Ø÷##+õ #+r"   r   c                   óx   ^ • \ rS rSrSrSrS\0r                              SU 4S jjrSr	U =r
$ )ÚEvollaConfigéf   ay  
This is the configuration class to store the configuration of a [`EvollaModel`]. It is used to instantiate an
Evolla model according to the specified arguments, defining the model architecture. Instantiating a configuration
with the defaults will yield a similar configuration to that of the Evolla-10B.

e.g. [westlake-repl/Evolla-10B-hf](https://huggingface.co/westlake-repl/Evolla-10B-hf)

Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.

Args:
    protein_encoder_config (`dict`, *optional*):
        Dictionary of configuration options used to initialize [`SaProtConfig`].
    vocab_size (`int`, *optional*, defaults to 128256):
        Vocabulary size of the Evolla llama model. Defines the number of different tokens that can be represented by the
        `inputs_ids` passed when calling [`EvollaModel`].
    hidden_size (`int`, *optional*, defaults to 4096):
        Dimensionality of the llama layers and the pooler layer.
    intermediate_size (`int`, *optional*, defaults to 14336):
        Dimensionality of the intermediate layers in the llama model.
    num_hidden_layers (`int`, *optional*, defaults to 32):
        Number of hidden layers in the llama model.
    num_attention_heads (`int`, *optional*, defaults to 32):
        Number of attention heads for each attention layer in the llama model.
    num_key_value_heads (`int`, *optional*, defaults to 8):
        Number of key-value pairs for each attention layer in the llama model.
    hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
        The non-linear activation function (function or string) in the llama model. If string, `"gelu"`, `"relu"`,
        `"selu"` and `"silu"` are supported.
    max_position_embeddings (`int`, *optional*, defaults to 8192):
        The maximum sequence length that this model might ever be used with. Typically set this to something large
        just in case (e.g., 512 or 1024 or 2048).
    rms_norm_eps (`float`, *optional*, defaults to 1e-05):
        The epsilon value for the RMS-norm layer in the llama model.
    rope_theta (`float`, *optional*, defaults to 500000.0):
        The threshold value for the RoPE layer in the llama model.
    rope_scaling (`float`, *optional*):
        The scaling factor for the RoPE layer in the llama model.
    attention_bias (`bool`, *optional*, defaults to `False`):
        Whether to use bias in the attention layer.
    attention_dropout (`float`, *optional*, defaults to 0.0):
        The dropout ratio for the attention layer.
    mlp_bias (`bool`, *optional*, defaults to `False`):
        Whether to use bias in the MLP layer.
    aligner_ffn_mult (`int`, *optional*, defaults to 4):
        The FFN multiplier for the aligner layer.
    aligner_enable_bias (`bool`, *optional*, defaults to `True`):
        Whether to use bias in the aligner layer.
    aligner_attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
        The dropout ratio for the attention probabilities in the aligner layer.
    aligner_num_add_layers (`int`, *optional*, defaults to 8):
        The number of additional layers for the aligner layer.
    resampler_depth (`int`, *optional*, defaults to 6):
        The depth of the resampler layer in the llama model.
    resampler_dim_head (`int`, *optional*, defaults to 64):
        The dimension of the heads in the resampler layer in the llama model.
    resampler_heads (`int`, *optional*, defaults to 8):
        The number of heads in the resampler layer in the llama model.
    resampler_num_latents (`int`, *optional*, defaults to 64):
        The number of latents in the resampler layer in the llama model.
    resampler_ff_mult (`int`, *optional*, defaults to 4):
        The FFN multiplier for the resampler layer.
    initializer_range (`float`, *optional*, defaults to 0.02):
        The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
    pad_token_id (`int`, *optional*):
        The id of the *padding* token.
    bos_token_id (`int`, *optional*, defaults to 128000):
        The id of the *beginning-of-sequence* token.
    eos_token_id (`int`, *optional*, defaults to 128009):
        The id of the *end-of-sequence* token.
    use_cache (`bool`, *optional*, defaults to `False`):
        Whether or not the model should return the last key/values attentions (not used by all models).
    tie_word_embeddings (`bool`, *optional*, defaults to `False`):
        Whether or not to tie the input and output word embeddings.

Example:

```python
>>> from transformers import EvollaModel, EvollaConfig

>>> # Initializing a Evolla evolla-10b style configuration
>>> configuration = EvollaConfig()

>>> # Initializing a model from the evolla-10b style configuration
>>> model = EvollaModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```ÚEvollaModelÚprotein_encoder_configc                 ó\  >• X l         X0l        X@l        XPl        X`l        Xpl        X€l        Xl        X l        UU l	        XÐl
        Xàl        Xðl        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        X°l        XÀl        U R2                  b,  SU R2                  ;   a  U R2                  S   U R2                  S'   [5        U 5        Uc  0 n[6        R9                  S5        [;        S0 UD6U l        [>        T U ]€  " SUUUUS.UD6  g )NÚtypeÚ	rope_typezX`protein_encoder_config` is `None`. Initializing the `SaProtConfig` with default values.)r
   Úbos_token_idÚeos_token_idÚtie_word_embeddingsr   )!r   r   r   r   r   Únum_key_value_headsÚ
hidden_actr   Úrms_norm_epsr=   Úattention_biasÚattention_dropoutÚmlp_biasÚaligner_ffn_multÚaligner_enable_biasÚ$aligner_attention_probs_dropout_probÚaligner_num_add_layersr   r   Úresampler_depthÚresampler_dim_headÚresampler_headsÚresampler_num_latentsÚresampler_ff_multÚ
rope_thetaÚrope_scalingr   ÚloggerÚinfor   r7   r   r   )!r   r7   r   r   r   r   r   r>   r?   r   r@   rM   rN   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   r   r
   r;   r<   r   r=   r   r   s!                                   €r    r   ÚEvollaConfig.__init__Ä   sG  ø€ ðD %ŒØ&ÔØ!2ÔØ!2ÔØ#6Ô Ø#6Ô Ø$ŒØ'>Ô$Ø(ÔØ#6ˆÔ Ø,ÔØ!2ÔØ ŒØ 0ˆÔØ#6ˆÔ Ø4XˆÔ1Ø&<ˆÔ#Ø"ˆŒØ!2ˆÔà.ˆÔØ"4ˆÔØ.ˆÔØ%:ˆÔ"Ø!2ˆÔà$ŒØ(Ôð ×ÑÑ(¨V°t×7HÑ7HÓ-HØ-1×->Ñ->¸vÑ-FˆD×Ñ˜kÑ*Ü˜tÔ$ð "Ñ)Ø%'Ð"ÜK‰KÐrÔsÜ&2Ñ&LÐ5KÑ&LˆÔ#ä‰Òð 	
Ø%Ø%Ø%Ø 3ñ		
ð
 ó	
r"   )rF   rE   rD   rG   rA   rB   r?   r   r   r   r   rC   r   r   r>   r7   rH   rI   rL   rJ   rK   r@   rN   rM   r=   r   r   )Ni õ i   i 8  é    rR   é   Úsilui    r)   g    €„ANFg        Fr#   Tr'   rS   é   é@   rS   rV   r#   r(   Ni ô i	ô FF)r+   r,   r-   r.   r/   Ú
model_typer   Úsub_configsr   r0   r1   r2   s   @r    r4   r4   f   sƒ   ø† ñXðt €JØ+¨\Ð:€Kð  $ØØØØØØØØ $ØØØØØØØØ Ø-0Ø ØØØØ ØØØØØØØ!÷?P
õ P
r"   r4   N)r/   Úconfiguration_utilsr   Úmodeling_rope_utilsr   Úutilsr   Ú
get_loggerr+   rO   r   r4   Ú__all__r   r"   r    Ú<module>r^      sO   ðñ !å 3Ý 9Ý ð 
×	Ò	˜HÓ	%€ôJ+Ð#ô J+ôZn
Ð#ô n
ðb Ð
r"   