
    <h                     0    S r SSKJr   " S S\5      rS/rg)zVJEPA 2 model configuration   )PretrainedConfigc                   b   ^  \ rS rSrSrSr                       SU 4S jjrSrU =r$ )VJEPA2Config   a  
This is the configuration class to store the configuration of a [`VJEPA2Model`]. It is used to instantiate an
VJEPA2 model according to the specified arguments, defining the model architecture. Instantiating a configuration
with the defaults will yield a similar configuration to that of the VJEPA2
[facebook/vjepa2-vitl-fpc64-256](https://huggingface.co/facebook/vjepa2-vitl-fpc64-256) architecture.

Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.

Args:
    patch_size (`int`, *optional*, defaults to 16):
        The size (resolution) of each patch.
    crop_size (`int`, *optional*, defaults to 256):
        Input resolution of the model
    frames_per_clip (`int`, *optional*, defaults to 64):
        The number of frames the model has been pretrained with. Does not impact inference.
    tubelet_size (`int`, *optional*, defaults to 2):
        The number of temporal frames used for a single rastor, check paper for more information.
    hidden_size (`int`, *optional*, defaults to 1024):
        Dimensionality of the encoder layers
    in_chans (`int`, *optional*, defaults to 3):
        The number of input channels
    num_attention_heads (`int`, *optional*, defaults to 16):
        Number of attention heads for each attention layer in the Encoder
    num_hidden_layers (`int`, *optional*, defaults to 24):
        The number of hidden layers
    drop_path_rate (`float`, *optional*, defaults to 0.0):
        Stochastic depth rate per sample (when applied in the main path of residual layers).
    mlp_ratio (`float`, *optional*, defaults to 4.0):
        Ratio of the hidden size of the MLPs used in Encoder relative to the `hidden_size`.
    layer_norm_eps (`float`, *optional*, defaults to 1e-06):
        The epsilon used by the layer normalization layers.
    qkv_bias (`bool`, *optional*, defaults to `True`):
        Whether to add a bias to the queries, keys and values.
    attention_probs_dropout_prob (`float`, *optional*, defaults to 0.0):
        The dropout probability for attentions.
        The dropout probability for all fully connected layers.
    hidden_act (`str`, *optional*, defaults to `"gelu"`):
        The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
        `"relu"`, `"selu"` and `"gelu_new"` are supported.
    initializer_range (`float`, *optional*, defaults to 0.02):
        The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
    attention_dropout (`float`, *optional*, defaults to 0.0):
        The dropout probability for attentions.
    num_pooler_layers (`int`, *optional*, defaults to 3):
        The number of self-attention layers in the pooler.
    pred_hidden_size (`int`, *optional*, defaults to 384):
        Dimensionality of the predictor layers
    pred_num_attention_heads (`int`, *optional*, defaults to 12):
        Number of attention heads for each attention layer in the Predictor
    pred_num_hidden_layers (`int`, *optional*, defaults to 12):
        Number of hidden layers in the Predictor
    pred_num_mask_tokens (`int`, *optional*, defaults to 10):
        Define the number of mask tokens to use in the Predictor
    pred_zero_init_mask_tokens (`bool`, *optional*, defaults to `True`):
        Initialize the mask tokens in the predictor with 0.
    pred_mlp_ratio (`float`, *optional*, defaults to 4.0):
        Ratio of the hidden size of the MLPs used in Predictor relative to the `pred_hidden_size`.

Example:

```python
>>> from transformers import VJEPA2Config, VJEPA2Model

>>> # Initializing a VJEPA2 vjepa2-vitl-fpc64-256 style configuration
>>> configuration = VJEPA2Config()

>>> # Initializing a model (with random weights) from the vjepa2-vitl-fpc64-256  style configuration
>>> model = VJEPA2Model(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```vjepa2c                 V  > [         TU ]  " S0 UD6  X l        X0l        Xl        X@l        XPl        X`l        Xpl        Xl	        Xl
        Xl        Xl        Xl        Xl        Xl        Xl        X l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        g )N )super__init__	crop_sizeframes_per_clip
patch_sizetubelet_sizehidden_sizein_chansnum_attention_headsnum_hidden_layersdrop_path_rate	mlp_ratiolayer_norm_epsqkv_biasattention_probs_dropout_prob
hidden_actinitializer_range
image_sizeattention_dropoutnum_pooler_layerspred_hidden_sizepred_num_attention_headspred_num_hidden_layerspred_num_mask_tokenspred_zero_init_mask_tokenspred_mlp_ratio)selfr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   kwargs	__class__s                            g/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/vjepa2/configuration_vjepa2.pyr   VJEPA2Config.__init__a   s    8 	"6"".$(& #6 !2,", ,H)$!2#!2!2 0(@%&<#$8!*D',    )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r#   r   r    r!   r"   r   r   )      @      i   r   r*                    @gư>Tr/   gelug{Gz?r/   r   i     r2   
   Tr0   )	__name__
__module____qualname____firstlineno____doc__
model_typer   __static_attributes____classcell__)r&   s   @r'   r   r      sb    HT J %(!#!#'36- 6-r)   r   N)r8   configuration_utilsr   r   __all__r	   r)   r'   <module>r>      s'    " 3C-# C-L 
r)   