
    <h46                         S r SSKJr  SSKJr  SSKJr  \R                  " \5      r	 " S S\5      r
 " S S\5      rS/rg	)
zEvolla model configuration   )PretrainedConfig)rope_config_validation)loggingc                   P   ^  \ rS rSrSr                SU 4S jjrSrU =r$ )SaProtConfig   aB
  This is the configuration class to store the configuration of a [`EvollaSaProtProteinEncoder`]. It is used to instantiate a
SaProt model according to the specified arguments, defining the model architecture.

Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.

Args:
    vocab_size (`int`, *optional*, defaults to 446):
        Vocabulary size of the protein sequence model. Defines the number of different tokens that can be represented
        by the `inputs_ids` passed when calling [`EvollaModel`].
    mask_token_id (`int`, *optional*, defaults to 4):
        The id of the *mask* token in the protein sequence model.
    pad_token_id (`int`, *optional*, defaults to 1):
        The id of the *padding* token in the protein sequence model.
    hidden_size (`int`, *optional*, defaults to 1280):
        Dimensionality of the protein sequence model layers and the pooler layer.
    num_hidden_layers (`int`, *optional*, defaults to 33):
        Number of hidden layers in the protein sequence model.
    num_attention_heads (`int`, *optional*, defaults to 20):
        Number of attention heads for each attention layer in the protein sequence model.
    intermediate_size (`int`, *optional*, defaults to 5120):
        Dimensionality of the intermediate layers in the protein sequence model.
    hidden_dropout_prob (`float`, *optional*, defaults to 0.1):
        The dropout ratio for the hidden layers in the protein sequence model.
    attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
        The dropout ratio for the attention probabilities in the protein sequence model.
    max_position_embeddings (`int`, *optional*, defaults to 1026):
        The maximum sequence length that the protein sequence model might ever be used with. Typically set this to
        something large just in case (e.g., 512 or 1024 or 2048).
    layer_norm_eps (`float`, *optional*, defaults to 1e-05):
        The epsilon value for the layer normalization layer in the protein sequence model.
    position_embedding_type (`str`, *optional*, defaults to `"rotary"`):
        The type of position embedding to use in the protein sequence model. Currently only `"rotary"` is supported.
    emb_layer_norm_before (`bool`, *optional*, defaults to `False`):
        Whether to apply layer normalization before the position embedding in the protein sequence model.
    token_dropout (`bool`, *optional*, defaults to `True`):
        Whether to apply dropout to the tokens in the protein sequence model.c                    > [         TU ]  " SX2S.UD6  Xl        X@l        XPl        X`l        Xpl        Xl        Xl        Xl	        Xl
        Xl        Xl        Xl        Xl        UU l        g )N)pad_token_idmask_token_id )super__init__
vocab_sizehidden_sizenum_hidden_layersnum_attention_headsintermediate_sizehidden_dropout_probattention_probs_dropout_probmax_position_embeddingsinitializer_rangelayer_norm_epsposition_embedding_type	use_cacheemb_layer_norm_beforetoken_dropout)selfr   r   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   kwargs	__class__s                     g/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/evolla/configuration_evolla.pyr   SaProtConfig.__init__@   so    ( 	ZlZSYZ$&!2#6 !2#6 ,H)'>$!2,'>$"%:"*    )r   r   r   r   r   r   r   r   r   r   r   r   r   r   )i        i   !      i   皙?r'   i  {Gz?h㈵>rotaryTFT)__name__
__module____qualname____firstlineno____doc__r   __static_attributes____classcell__r   s   @r    r   r      sF    $UP %( $ (###+ #+r"   r   c                   x   ^  \ rS rSrSrSrS\0r                              SU 4S jjrSr	U =r
$ )EvollaConfigf   ay  
This is the configuration class to store the configuration of a [`EvollaModel`]. It is used to instantiate an
Evolla model according to the specified arguments, defining the model architecture. Instantiating a configuration
with the defaults will yield a similar configuration to that of the Evolla-10B.

e.g. [westlake-repl/Evolla-10B-hf](https://huggingface.co/westlake-repl/Evolla-10B-hf)

Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.

Args:
    protein_encoder_config (`dict`, *optional*):
        Dictionary of configuration options used to initialize [`SaProtConfig`].
    vocab_size (`int`, *optional*, defaults to 128256):
        Vocabulary size of the Evolla llama model. Defines the number of different tokens that can be represented by the
        `inputs_ids` passed when calling [`EvollaModel`].
    hidden_size (`int`, *optional*, defaults to 4096):
        Dimensionality of the llama layers and the pooler layer.
    intermediate_size (`int`, *optional*, defaults to 14336):
        Dimensionality of the intermediate layers in the llama model.
    num_hidden_layers (`int`, *optional*, defaults to 32):
        Number of hidden layers in the llama model.
    num_attention_heads (`int`, *optional*, defaults to 32):
        Number of attention heads for each attention layer in the llama model.
    num_key_value_heads (`int`, *optional*, defaults to 8):
        Number of key-value pairs for each attention layer in the llama model.
    hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
        The non-linear activation function (function or string) in the llama model. If string, `"gelu"`, `"relu"`,
        `"selu"` and `"silu"` are supported.
    max_position_embeddings (`int`, *optional*, defaults to 8192):
        The maximum sequence length that this model might ever be used with. Typically set this to something large
        just in case (e.g., 512 or 1024 or 2048).
    rms_norm_eps (`float`, *optional*, defaults to 1e-05):
        The epsilon value for the RMS-norm layer in the llama model.
    rope_theta (`float`, *optional*, defaults to 500000.0):
        The threshold value for the RoPE layer in the llama model.
    rope_scaling (`float`, *optional*):
        The scaling factor for the RoPE layer in the llama model.
    attention_bias (`bool`, *optional*, defaults to `False`):
        Whether to use bias in the attention layer.
    attention_dropout (`float`, *optional*, defaults to 0.0):
        The dropout ratio for the attention layer.
    mlp_bias (`bool`, *optional*, defaults to `False`):
        Whether to use bias in the MLP layer.
    aligner_ffn_mult (`int`, *optional*, defaults to 4):
        The FFN multiplier for the aligner layer.
    aligner_enable_bias (`bool`, *optional*, defaults to `True`):
        Whether to use bias in the aligner layer.
    aligner_attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
        The dropout ratio for the attention probabilities in the aligner layer.
    aligner_num_add_layers (`int`, *optional*, defaults to 8):
        The number of additional layers for the aligner layer.
    resampler_depth (`int`, *optional*, defaults to 6):
        The depth of the resampler layer in the llama model.
    resampler_dim_head (`int`, *optional*, defaults to 64):
        The dimension of the heads in the resampler layer in the llama model.
    resampler_heads (`int`, *optional*, defaults to 8):
        The number of heads in the resampler layer in the llama model.
    resampler_num_latents (`int`, *optional*, defaults to 64):
        The number of latents in the resampler layer in the llama model.
    resampler_ff_mult (`int`, *optional*, defaults to 4):
        The FFN multiplier for the resampler layer.
    initializer_range (`float`, *optional*, defaults to 0.02):
        The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
    pad_token_id (`int`, *optional*):
        The id of the *padding* token.
    bos_token_id (`int`, *optional*, defaults to 128000):
        The id of the *beginning-of-sequence* token.
    eos_token_id (`int`, *optional*, defaults to 128009):
        The id of the *end-of-sequence* token.
    use_cache (`bool`, *optional*, defaults to `False`):
        Whether or not the model should return the last key/values attentions (not used by all models).
    tie_word_embeddings (`bool`, *optional*, defaults to `False`):
        Whether or not to tie the input and output word embeddings.

Example:

```python
>>> from transformers import EvollaModel, EvollaConfig

>>> # Initializing a Evolla evolla-10b style configuration
>>> configuration = EvollaConfig()

>>> # Initializing a model from the evolla-10b style configuration
>>> model = EvollaModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```EvollaModelprotein_encoder_configc                 \  > X l         X0l        X@l        XPl        X`l        Xpl        Xl        Xl        Xl        UU l	        Xl
        Xl        Xl        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        Xl        Xl        U R2                  b,  SU R2                  ;   a  U R2                  S   U R2                  S'   [5        U 5        Uc  0 n[6        R9                  S5        [;        S0 UD6U l        [>        T U ]  " SUUUUS.UD6  g )Ntype	rope_typezX`protein_encoder_config` is `None`. Initializing the `SaProtConfig` with default values.)r
   bos_token_ideos_token_idtie_word_embeddingsr   )!r   r   r   r   r   num_key_value_heads
hidden_actr   rms_norm_epsr=   attention_biasattention_dropoutmlp_biasaligner_ffn_multaligner_enable_bias$aligner_attention_probs_dropout_probaligner_num_add_layersr   r   resampler_depthresampler_dim_headresampler_headsresampler_num_latentsresampler_ff_mult
rope_thetarope_scalingr   loggerinfor   r7   r   r   )!r   r7   r   r   r   r   r   r>   r?   r   r@   rM   rN   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   r   r
   r;   r<   r   r=   r   r   s!                                   r    r   EvollaConfig.__init__   sG   D %&!2!2#6 #6 $'>$(#6 ,!2  0#6 4X1&<#"!2."4.%:"!2$( (Vt7H7H-H-1->->v-FDk*t$ ")%'"KKrs&2&L5K&L# 	
%%% 3		

 	
r"   )rF   rE   rD   rG   rA   rB   r?   r   r   r   r   rC   r   r   r>   r7   rH   rI   rL   rJ   rK   r@   rN   rM   r=   r   r   )Ni  i   i 8      rR      silui    r)   g    ANFg        Fr#   Tr'   rS      @   rS   rV   r#   r(   Ni  i	 FF)r+   r,   r-   r.   r/   
model_typer   sub_configsr   r0   r1   r2   s   @r    r4   r4   f   s    Xt J+\:K  $ $ -0  !?P
 P
r"   r4   N)r/   configuration_utilsr   modeling_rope_utilsr   utilsr   
get_loggerr+   rO   r   r4   __all__r   r"   r    <module>r^      sO    ! 3 9  
		H	%J+# J+Zn
# n
b 
r"   