ث
    ًPîh‹<  م                   َ€   — d dl mZ ddlmZmZ  G d„ de«      Z G d„ de«      Z G d„ d	e«      Z G d
„ de«      Zg d¢Z	y)é   )عPretrainedConfigé   )عCONFIG_MAPPINGع
AutoConfigc                   َL   ‡ — e Zd ZdZdZdZdeiZ	 	 	 	 	 	 	 	 	 	 	 	 dˆ fd„	Zˆ xZ	S )عEdgeTamVisionConfigaà  
    This is the configuration class to store the configuration of a [`EdgeTamVisionModel`]. It is used to instantiate a SAM
    vision encoder according to the specified arguments, defining the model architecture. Instantiating a configuration
    defaults will yield a similar configuration to that of SAM 2.1 Hiera-tiny
    [facebook/EdgeTAM](https://huggingface.co/facebook/EdgeTAM) architecture.

    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.

    Args:
        backbone_config (`Union[dict, "PretrainedConfig"]`, *optional*):
            Configuration for the vision backbone. This is used to instantiate the backbone using
            `AutoModel.from_config`.
        backbone_channel_list (`List[int]`, *optional*, defaults to `[384, 192, 96, 48]`):
            The list of channel dimensions for the backbone.
        backbone_feature_sizes (`List[List[int]]`, *optional*, defaults to `[[256, 256], [128, 128], [64, 64]]`):
            The spatial sizes of the feature maps from the backbone.
        fpn_hidden_size (`int`, *optional*, defaults to 256):
            The hidden dimension of the FPN.
        fpn_kernel_size (`int`, *optional*, defaults to 1):
            The kernel size for the convolutions in the neck.
        fpn_stride (`int`, *optional*, defaults to 1):
            The stride for the convolutions in the neck.
        fpn_padding (`int`, *optional*, defaults to 0):
            The padding for the convolutions in the neck.
        fpn_top_down_levels (`List[int]`, *optional*, defaults to `[2, 3]`):
            The levels for the top-down FPN connections.
        num_feature_levels (`int`, *optional*, defaults to 3):
            The number of feature levels from the FPN to use.
        hidden_act (`str`, *optional*, defaults to `"gelu"`):
            The non-linear activation function in the neck.
        layer_norm_eps (`float`, *optional*, defaults to 1e-06):
            The epsilon for the layer normalization.
        initializer_range (`float`, *optional*, defaults to 0.02):
            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.

    عvision_configعedgetam_vision_modelعbackbone_configc                 َـ  •— t        ‰| چ  di |¤ژ |€g d¢n|}|€ddgddgddggn|}|€ddgn|}t        |t        «      r'|j	                  dd«      |d<   t        |d      di |¤ژ}n2t        |t        «      r|}n|€t        j                  d	dd
g d¢dœ¬«      }|| _        || _	        || _
        || _        || _        || _        || _        || _        |	| _        |
| _        || _        || _        y )N)i€  éہ   é`   é0   é   é€   é@   r   r   ع
model_typeعtimm_wrapperztimm/repvit_m1.dist_in1kT)é    é   r   r   )عin_chansعfeatures_onlyعout_indices)ع
model_args© )عsuperع__init__ع
isinstanceعdictعgetr   r   عfrom_pretrainedr   عbackbone_channel_listعbackbone_feature_sizesعfpn_hidden_sizeعfpn_kernel_sizeع
fpn_strideعfpn_paddingعfpn_top_down_levelsعnum_feature_levelsع
hidden_actعlayer_norm_epsعinitializer_range)عselfr   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   عkwargsع	__class__s                 €ْk/var/www/html/saasai/venv/lib/python3.12/site-packages/transformers/models/edgetam/configuration_edgetam.pyr   zEdgeTamVisionConfig.__init__G   s(  ّ€ ô  	‰رر"ک6ز"à6Kذ6Sس 2ذYnذà2Hذ2Pˆcگ3ˆZک#کsک b¨" Xر.ذVlً 	ً )<ذ(Cکq !™fذI\ذنگo¤tش,ط,;×,?ر,?ہبnس,]ˆOکLر)ـ,¨_¸\ر-JرKر^بoر^‰Oـک¬ش4ط-‰Oطذ$ـ(×8ر8ط*ط()¸DزQ]ر^ôˆOً
  /ˆشً &;ˆش"ط&<ˆش#ط.ˆشط.ˆشط$ˆŒط&ˆشط#6ˆش ط"4ˆشà$ˆŒط,ˆشط!2ˆصَ    )NNNr   r   r   r   Nr   عgeluçچيµ ÷ئ°>ç{®Gلz”?)
ع__name__ع
__module__ع__qualname__ع__doc__عbase_config_keyr   r   عsub_configsr   ع__classcell__©r/   s   @r0   r   r      sQ   ّ„ ٌ$ًL &€Oط'€Jàک:ً€Kً ط"ط#ططططط طططط÷13ٌ 13r1   r   c                   َ8   ‡ — e Zd ZdZdZ	 	 	 	 	 	 	 	 dˆ fd„	Zˆ xZS )عEdgeTamPromptEncoderConfigaB  
    This is the configuration class to store the configuration of a [`EdgeTamPromptEncoder`]. The [`EdgeTamPromptEncoder`]
    module is used to encode the input 2D points and bounding boxes.

    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.

    Args:
        hidden_size (`int`, *optional*, defaults to 256):
            Dimensionality of the hidden states.
        image_size (`int`, *optional*, defaults to 1024):
            The expected output resolution of the image.
        patch_size (`int`, *optional*, defaults to 16):
            The size (resolution) of each patch.
        mask_input_channels (`int`, *optional*, defaults to 16):
            The number of channels to be fed to the `MaskDecoder` module.
        num_point_embeddings (`int`, *optional*, defaults to 4):
            The number of point embeddings to be used.
        hidden_act (`str`, *optional*, defaults to `"gelu"`):
            The non-linear activation function in the encoder and pooler.
        layer_norm_eps (`float`, *optional*, defaults to 1e-06):
            The epsilon used by the layer normalization layers.
        scale (`float`, *optional*, defaults to 1):
            The scale factor for the prompt encoder.
    عprompt_encoder_configc	                 َ”   •— t        ‰
| چ  di |	¤ژ || _        || _        || _        || _        || _        || _        || _        || _	        y ©Nr   )
r   r   عhidden_sizeع
image_sizeع
patch_sizeعmask_input_channelsعnum_point_embeddingsr*   r+   عscale)r-   rB   rC   rD   rE   rF   r*   r+   rG   r.   r/   s             €r0   r   z#EdgeTamPromptEncoderConfig.__init__ک   sQ   ّ€ ô 	‰رر"ک6ز"ط&ˆشط$ˆŒط$ˆŒط#6ˆش ط$8ˆش!ط$ˆŒط,ˆشطˆچ
r1   )r   i   é   rH   é   r2   r3   r   ©r5   r6   r7   r8   r9   r   r;   r<   s   @r0   r>   r>   {   s3   ّ„ ٌً4 .€Oً ططططططط÷ٌ r1   r>   c                   َ@   ‡ — e Zd ZdZdZ	 	 	 	 	 	 	 	 	 	 	 	 dˆ fd„	Zˆ xZS )عEdgeTamMaskDecoderConfigaٹ  
    This is the configuration class to store the configuration of a [`EdgeTamMaskDecoder`]. It is used to instantiate a EDGETAM
    memory encoder according to the specified arguments, defining the model architecture.

    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.

    Args:
        hidden_size (`int`, *optional*, defaults to 256):
            Dimensionality of the hidden states.
        hidden_act (`str`, *optional*, defaults to `"gelu"`):
            The non-linear activation function in the EDGETAM mask decoder.
        mlp_dim (`int`, *optional*, defaults to 2048):
            The dimension of the MLP in the two-way transformer.
        num_hidden_layers (`int`, *optional*, defaults to 2):
            The number of hidden layers in the two-way transformer.
        num_attention_heads (`int`, *optional*, defaults to 8):
            The number of attention heads in the two-way transformer.
        attention_downsample_rate (`int`, *optional*, defaults to 2):
            The downsample rate for the attention layers.
        num_multimask_outputs (`int`, *optional*, defaults to 3):
            The number of multimask outputs.
        iou_head_depth (`int`, *optional*, defaults to 3):
            The depth of the IoU head.
        iou_head_hidden_dim (`int`, *optional*, defaults to 256):
            The hidden dimension of the IoU head.
        dynamic_multimask_via_stability (`bool`, *optional*, defaults to `True`):
            Whether to use dynamic multimask via stability.
        dynamic_multimask_stability_delta (`float`, *optional*, defaults to 0.05):
            The stability delta for the dynamic multimask.
        dynamic_multimask_stability_thresh (`float`, *optional*, defaults to 0.98):
            The stability threshold for the dynamic multimask.

    عmask_decoder_configc                 َع   •— t        ‰| چ  di |¤ژ || _        || _        || _        || _        |	| _        |
| _        || _        || _	        || _
        || _        || _        || _        || _        y rA   )r   r   rB   عnum_multimask_outputsr*   عiou_head_depthعiou_head_hidden_dimعdynamic_multimask_via_stabilityع!dynamic_multimask_stability_deltaع"dynamic_multimask_stability_threshعnum_hidden_layersعnum_attention_headsعmlp_dimعattention_downsample_rate)r-   rB   r*   rW   rU   rV   rX   rO   rP   rQ   rR   rS   rT   r.   r/   s                 €r0   r   z!EdgeTamMaskDecoderConfig.__init__ص   s}   ّ€ ô  	‰رر"ک6ز"à&ˆشط%:ˆش"ط$ˆŒط,ˆشط#6ˆش ط/Nˆش,ط1Rˆش.ط2Tˆش/ً "3ˆشط&ˆشط#6ˆش طˆŒط)Bˆص&r1   )r   r2   i   r   é   r   r   r   r   Tgڑ™™™™™©?g\ڈآُ(\ï?rJ   r<   s   @r0   rL   rL   ¯   sB   ّ„ ٌ!ًF ,€Oً ططططط"#طططط(,ط*.ط+/÷ Cٌ  Cr1   rL   c                   َ<   ‡ — e Zd ZdZdZeeedœZ	 	 	 	 dˆ fd„	Z	ˆ xZ
S )عEdgeTamConfiga|	  
    [`EdgeTamConfig`] is the configuration class to store the configuration of a [`EdgeTamModel`]. It is used to instantiate a
    EDGETAM model according to the specified arguments, defining the memory attention, memory encoder, and image encoder
    configs. Instantiating a configuration defaults will yield a similar configuration to that of the SAM 2.1 Hiera-tiny
    [facebook/edgetam.1-hiera-tiny](https://huggingface.co/facebook/edgetam.1-hiera-tiny) architecture.

    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.

    Args:
        vision_config (Union[`dict`, `EdgeTamVisionConfig`], *optional*):
            Dictionary of configuration options used to initialize [`EdgeTamVisionConfig`].
        prompt_encoder_config (Union[`dict`, `EdgeTamPromptEncoderConfig`], *optional*):
            Dictionary of configuration options used to initialize [`EdgeTamPromptEncoderConfig`].
        mask_decoder_config (Union[`dict`, `EdgeTamMaskDecoderConfig`], *optional*):
            Dictionary of configuration options used to initialize [`EdgeTamMaskDecoderConfig`].
        initializer_range (`float`, *optional*, defaults to 0.02):
            Standard deviation for parameter initialization.

    Example:

    ```python
    >>> from transformers import (
    ...     EdgeTamVisionConfig,
    ...     EdgeTamPromptEncoderConfig,
    ...     EdgeTamMaskDecoderConfig,
    ...     EdgeTamModel,
    ... )

    >>> # Initializing a EdgeTamConfig with `"facebook/edgetam.1_hiera_tiny"` style configuration
    >>> configuration = EdgeTamconfig()

    >>> # Initializing a EdgeTamModel (with random weights) from the `"facebook/edgetam.1_hiera_tiny"` style configuration
    >>> model = EdgeTamModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config

    >>> # We can also initialize a EdgeTamConfig from a EdgeTamVisionConfig, EdgeTamPromptEncoderConfig, and EdgeTamMaskDecoderConfig

    >>> # Initializing EDGETAM vision encoder, memory attention, and memory encoder configurations
    >>> vision_config = EdgeTamVisionConfig()
    >>> prompt_encoder_config = EdgeTamPromptEncoderConfig()
    >>> mask_decoder_config = EdgeTamMaskDecoderConfig()

    >>> config = EdgeTamConfig(vision_config, prompt_encoder_config, mask_decoder_config)
    ```عedgetam)r	   r?   rM   c                 َگ  •— t        ‰| چ  di |¤ژ |پ|ni }|پ|ni }|پ|ni }t        |t        «      r&|j	                  dd«      |d<   t        |d      di |¤ژ}t        |t        «      r|j                  «       }t        |t        «      r|j                  «       }|| _	        t        di |¤ژ| _
        t        di |¤ژ| _        || _        y )Nr   r
   r   )r   r   r   r   r    r   r>   عto_dictrL   r	   r?   rM   r,   )r-   r	   r?   rM   r,   r.   r/   s         €r0   r   zEdgeTamConfig.__init__0  sà   ّ€ ô 	‰رر"ک6ز"ط)6ذ)B™بˆط9Nذ9Zر 5ذ`bذط5Hذ5Tر1ذZ\ذنگm¤Tش*ط*7×*;ر*;¸LذJ`س*aˆMک,ر'ـ*¨=¸ر+FرGرXب-رXˆMـذ+ش-GشHط$9×$Aر$Aس$Cذ!ـذ)ش+CشDط"5×"=ر"=س"?ذà*ˆشـ%?ر%XذBWر%Xˆش"ـ#;ر#Rذ>Qر#Rˆش à!2ˆصr1   )NNNr4   )r5   r6   r7   r8   r   r   r>   rL   r:   r   r;   r<   s   @r0   r[   r[   ّ   s8   ّ„ ٌ.ً` €Jà#ط!;ط7ٌ€Kً ط"ط ط÷3ٌ 3r1   r[   )r[   r   r>   rL   N)
عconfiguration_utilsr   عautor   r   r   r>   rL   r[   ع__all__r   r1   r0   ع<module>rb      sU   ًُ, 4ك -ô^3ذ*ô ^3ôB1ذ!1ô 1ôhFCذ/ô FCôRQ3ذ$ô Q3ٍh mپr1   