
    PhPH                         d Z ddlmZ ddlmZ  ej
                  e      Z G d de      Z G d de      Z	 G d d	e      Z
 G d
 de      Z G d de      Zg dZy)zBlt model configuration   )PretrainedConfig)loggingc                   H     e Zd ZdZdZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fd	Z xZS )BltLocalEncoderConfigzB
    Configuration class for the Blt Local Encoder component.
    blt_local_encoderc                 r   || _         || _        || _        || _        || _        || _        |xs || _        ||z  | _        |xs t        d|z  dz        | _	        || _
        |	| _        |
| _        || _        || _        || _        || _        || _        |j%                  dd        t'        | P  di |ddi y N   r   tie_word_embeddingsF 
vocab_sizecross_attn_all_layerscross_attn_khidden_size_globalhidden_sizenum_attention_headsnum_key_value_headshead_dimintintermediate_sizenum_hidden_layersrms_norm_epsdropoutmax_position_embeddings
rope_thetarope_scaling
hidden_actinitializer_rangepopsuper__init__selfr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   kwargs	__class__s                     c/var/www/html/saasai/venv/lib/python3.12/site-packages/transformers/models/blt/configuration_blt.pyr"   zBltLocalEncoderConfig.__init__       ( %%:"("4&#6 #6#M:M #'::!2!Nc!k/A:M6N!2('>$$($!2 	

($/=6=u=    )  F            N   h㈵>         `      ANsilu   {Gz?__name__
__module____qualname____doc__
model_typer"   __classcell__r&   s   @r'   r   r      sK     %J #  %#(> (>r)   r   c                   H     e Zd ZdZdZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fd	Z xZS )BltLocalDecoderConfigzB
    Configuration class for the Blt Local Decoder component.
    blt_local_decoderc                 r   || _         || _        || _        || _        || _        || _        |xs || _        ||z  | _        |xs t        d|z  dz        | _	        || _
        |	| _        |
| _        || _        || _        || _        || _        || _        |j%                  dd        t'        | P  di |ddi y r	   r   r#   s                     r'   r"   zBltLocalDecoderConfig.__init__Q   r(   r)   )r*   Tr+   r,   r-   r.   N	   r0   r1   r2   r3   Nr4   r5   r6   r7   r>   s   @r'   r@   r@   J   sK     %J "  %#(> (>r)   r@   c                   @     e Zd ZdZdZ	 	 	 	 	 	 	 	 	 	 	 	 d fd	Z xZS )BltGlobalTransformerConfigzG
    Configuration class for the Blt Global Transformer component.
    blt_global_transformerc                 :   || _         || _        |xs || _        ||z  | _        |xs t	        d|z  dz        | _        || _        || _        || _        || _	        || _
        |	| _        |
| _        || _        |j                  dd        t        | @  di |ddi y r	   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   )r$   r   r   r   r   r   r   r   r   r   r   r   r   r%   r&   s                 r'   r"   z#BltGlobalTransformerConfig.__init__   s      '#6 #6#M:M #'::!2!Nc!k/A:M6N!2('>$$($!2 	

($/=6=u=r)   )r,   r.   N   r0   r1      r3   Nr4   i   r6   r7   r>   s   @r'   rE   rE   |   s?     *J   $ >  >r)   rE   c                   @     e Zd ZdZdZ	 	 	 	 	 	 	 	 	 	 	 	 d fd	Z xZS )BltPatcherConfiga	  
    Configuration class for the Blt Patcher/Entropy model component.

    Args:
            vocab_size (`int`, *optional*, defaults to 260):
                Vocabulary size of the Blt patcher model. Defines the number of different tokens that can be represented by the
                `inputs_ids` passed when calling the patcher model.
            hidden_size (`int`, *optional*, defaults to 768):
                Dimension of the hidden representations.
            num_hidden_layers (`int`, *optional*, defaults to 14):
                Number of hidden layers in the Transformer decoder.
            num_attention_heads (`int`, *optional*, defaults to 12):
                Number of attention heads for each attention layer in the Transformer decoder.
            num_key_value_heads (`int`, *optional*):
                This is the number of key_value heads that should be used to implement Grouped Query Attention. If
                `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
                `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
                converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
                by meanpooling all the original heads within that group. For more details, check out [this
                paper](https://huggingface.co/papers/2305.13245). If it is not specified, will default to
                `num_attention_heads`.
            max_position_embeddings (`int`, *optional*, defaults to 8192):
                The maximum sequence length that this model might ever be used with.
            rms_norm_eps (`float`, *optional*, defaults to 1e-05):
                The epsilon used by the rms normalization layers.
            dropout (`float`, *optional*, defaults to 0.0):
                The dropout ratio for the attention probabilities.
            rope_theta (`float`, *optional*, defaults to 10000.0):
                The base period of the RoPE embeddings.
            intermediate_size (`int`, *optional*, defaults to 2048):
                Dimension of the MLP representations.
            rope_scaling (`dict`, *optional*):
                Dictionary containing the RoPE scaling configuration.
            initializer_range (`float`, *optional*, defaults to 0.02):
                The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
    blt_patcherc                 \   || _         || _        || _        || _        ||z  | _        ||n|| _        || _        || _        || _        |	| _	        d| _
        |
xs t        d| j                  z  dz        | _        || _        || _        |j                  dd        t!        | D  di |ddi y )Nr4   r
   r   r   Fr   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   )r$   r   r   r   r   r   r   r   r   r   r   r   r   r%   r&   s                 r'   r"   zBltPatcherConfig.__init__   s      %&!2#6 #':::M:Y#6_r '>$($ !2!Sc!d>N>N:NQR:R6S(!2 	

($/=6=u=r)   )r*   i         Ni    r0   r1   g     @r,   Nr6   r7   r>   s   @r'   rK   rK      s@    #J J   $!> !>r)   rK   c                   d     e Zd ZdZdZdgZeeee	dZ
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fd	Z xZS )	BltConfigas  
    This is the configuration class to store the configuration of a [`BltModel`]. It is used to instantiate a
    Blt model according to the specified arguments, defining the model architecture.

    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.

    Args:
            vocab_size (`int`, *optional*, defaults to 260):
                Vocabulary size of the Blt model. Defines the number of different tokens that can be represented by the
                `inputs_ids` passed when calling [`BltModel`].
            max_position_embeddings (`int`, *optional*, defaults to 4096):
                The maximum sequence length that this model might ever be used with.
            patch_in_forward (`bool`, *optional*, defaults to `True`):
                Whether to perform patching during the forward pass.
            patch_size (`int`, *optional*, defaults to 4):
                Size of the patches used in the patching mechanism.
            patching_mode (`str`, *optional*, defaults to `"entropy"`):
                The mode used for patching, such as entropy-based patching.
            patching_threshold (`float`, *optional*, defaults to 1.34):
                Threshold value used for determining when to apply patches.
            patching_batch_size (`int`, *optional*, defaults to 1):
                Batch size used during the patching process.
            max_patch_length (`int`, *optional*):
                Maximum length of patches that can be generated.
            cross_attn_k (`int`, *optional*, defaults to 2):
                Number of cross-attention heads used in the model.
            encoder_hash_byte_group_size (`list`, *optional*):
                List of byte group sizes used in the encoder hash function.
            encoder_hash_byte_group_vocab (`int`, *optional*, defaults to 500002):
                Vocabulary size for the encoder hash byte groups.
            encoder_hash_byte_group_nb_functions (`int`, *optional*, defaults to 1):
                Number of hash functions used in the encoder byte grouping.
            patcher_config (`BltPatcherConfig`, *optional*):
                Configuration for the patcher component of the model.
            encoder_config (`BltLocalEncoderConfig`, *optional*):
                Configuration for the local encoder component of the model.
            decoder_config (`BltLocalDecoderConfig`, *optional*):
                Configuration for the local decoder component of the model.
            global_config (`BltGlobalTransformerConfig`, *optional*):
                Configuration for the global transformer component of the model.
            tie_word_embeddings (`bool`, *optional*, defaults to `False`):
                Whether to tie weight embeddings.
            initializer_range (`float`, *optional*, defaults to 0.02):
                The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
            rope_theta (`float`, *optional*, defaults to 500000.0):
                The base period of the RoPE embeddings.
            rope_scaling (`dict`, *optional*):
                Dictionary containing the RoPE scaling configuration.

    ```python
    >>> from transformers import BltModel, BltConfig

    >>> # Initializing a Blt configuration
    >>> configuration = BltConfig()

    >>> # Initializing a model from the configuration
    >>> model = BltModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```

    Checkpoint: [facebook/blt](https://huggingface.co/facebook/blt)
    bltpast_key_values)patcher_configencoder_configdecoder_configglobal_configc                    || _         || _        || _        || _        || _        || _        || _        || _        || _        || _	        || _
        |j                  dd      | _        |j                  dd      | _        |j                  d      | _        |j                  dd      | _        |	| _        |
xs g d| _        || _        || _        |'t)        |	      | _        t,        j/                  d
       nJt1        |t2              r#|j5                  d|       t)        di || _        nt1        |t(              r|| _        |'t7        |	      | _        t,        j/                  d       nJt1        |t2              r#|j5                  d|       t7        di || _        nt1        |t6              r|| _        |'t;        |	      | _        t,        j/                  d       nJt1        |t2              r#|j5                  d|       t;        di || _        nt1        |t:              r|| _        |'t?        |	      | _         t,        j/                  d       nJt1        |t2              r#|j5                  d|       t?        di || _         nt1        |t>              r|| _         | j8                  jB                  | j                   z  }|| j@                  jB                  k7  r|nd | j@                  _"        |jG                  dd        tI        |   dd|i| y )Npatching_devicecudarealtime_patchingTpatching_threshold_addmonotonicityF)r               r
   )r   z8patcher_config is None, using default Blt patcher configr   z8encoder_config is None, using default Blt encoder configz8decoder_config is None, using default Blt decoder configz6global_config is None, using default Blt global configr   r   )&r   r   r   r   r   patch_in_forward
patch_sizepatching_modepatching_thresholdpatching_batch_sizemax_patch_lengthgetrY   r[   r\   r]   r   encoder_hash_byte_group_sizeencoder_hash_byte_group_vocab$encoder_hash_byte_group_nb_functionsrK   rT   loggerinfo
isinstancedict
setdefaultr   rU   r@   rV   rE   rW   r   encoder_cross_output_sizer    r!   r"   )r$   r   r   rb   rc   rd   re   rf   rg   r   ri   rj   rk   rT   rU   rV   rW   r   r   r   r   r%   rq   r&   s                          r'   r"   zBltConfig.__init__>  s   2 %'>$!2$( !1$*"4#6  0%zz*;VD!',?!F&,jj1I&J#"JJ~u= ) -I,^L^)-J*4X1 !"2EV"WDKKRS-%%&9;LM"2"D^"DD(89"0D!"7J["\DKKRS-%%&9;LM"7"I."ID(=>"0D!"7J["\DKKRS-%%&9;LM"7"I."ID(=>"0D !;N_!`DKKPQt,$$%8:KL!;!Lm!LD'AB!.D %)$7$7$C$CdFWFW$W!)BdFXFXFdFd)d%jn 	4
 	

($/K-@KFKr)   )r*   rI   Tr^   entropyg   ]?r/   Nr+   Ni" r/   NNNNFr6   r3   N)r8   r9   r:   r;   r<   keys_to_ignore_at_inferencerK   r   r@   rE   sub_configsr"   r=   r>   s   @r'   rQ   rQ      sy    @D J#4"5*//3	K  $,%)&,-.!+`L `Lr)   rQ   )rQ   rK   r   r@   rE   N)r;   configuration_utilsr   utilsr   
get_loggerr8   rl   r   r@   rE   rK   rQ   __all__r   r)   r'   <module>ry      sw     3  
		H	%/>, />d/>, />d'>!1 '>TI>' I>XlL  lL^r)   