ó
    <±h/2  ã                   ó  • S r SSKJr  SSKJr  SSKJrJr  \" 5       (       a  SSKJ	r	J
r
JrJrJrJrJrJr  SrO1SSKJr  \S	   r	\S
   r
\S   r\S   r\S   r\S   rS\S\S\4S jrSr\R(                  " \5      r " S S\5      rS/rg)zxLSTM configuration.é    )ÚOptionalé   )ÚPretrainedConfig)Úis_xlstm_availableÚlogging)ÚBackendModeTypeÚChunkwiseKernelTypeÚ	DtypeTypeÚSequenceKernelTypeÚStepKernelTypeÚWeightModeTypeÚround_up_to_next_multiple_ofÚxLSTMLargeConfigT)ÚLiteral)ÚtrainÚtrain_with_paddingÚ	inference)úchunkwise--native_autogradzparallel--native_autograd)Úfloat32Úbfloat16Úfloat16Únative_sequence__nativeÚnative)ÚsingleÚfusedÚxÚmultiple_ofÚreturnc                 ó.   • [        X-   S-
  U-  U-  5      $ )z0Rounds up x to the next multiple of multiple_of.é   )Úint)r   r   s     Úe/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/xlstm/configuration_xlstm.pyr   r   2   s   € äQ‘_ qÑ(¨[Ñ8¸KÑGÓHÐHó    Fc            A       óP  ^ • \ rS rSrSrSr                                S+S\S\S\\   S\\   S\\   S	\S
\S\S\S\S\	S\	S\	S\
S\S\S\S\S\S\S\	S\S\	S\S\	S\	S\S\S \S!\S"\S#\4@U 4S$ jjjr\S% 5       r\S& 5       r\S' 5       r\S( 5       rS) rS*rU =r$ ),ÚxLSTMConfigé<   aÚ  
This is the configuration class to store the configuration of a [`xLSTM`]. It is used to instantiate a xLSTM
model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
defaults will yield a similar configuration to that of the xLSTM-7b [NX-AI/xLSTM-7b](https://huggingface.co/NX-AI/xLSTM-7b) model.

Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.


Args:
    vocab_size (int, optional, *optional*, defaults to 50304):
        Vocabulary size of the xLSTM model. Defines the number of different tokens that can be represented by the
        `inputs_ids` passed when calling [`xLSTMModel`]. Defaults to the GPT2-NeoX tokenizer size.
    hidden_size (int, optional, *optional*, defaults to 4096):
        Dimensionality of the embeddings or hidden states.
    embedding_dim (int, optional, *optional*, defaults to 4096):
        Dimensionality of the embeddings or hidden states, use hidde_size if None.
    num_hidden_layers (int, optional, *optional*, defaults to 32):
        Number of blocks of the xLSTM model.
    num_blocks (int, optional, *optional*, defaults to 32):
        Number of blocks of the xLSTM model, use num_hidden_layers if None.
    num_heads (int, optional, *optional*, defaults to 8):
        Number of heads for the xLSTM Layer/Cell.
    use_bias (bool, optional, *optional*, defaults to `False`):
        Whether to use biases in the xLSTM model.
    norm_reduction_force_float32 (bool, optional, *optional*, defaults to `True`):
        Whether to force the float32 norm reduction op to be done in fp32 precision.
    tie_word_embeddings (bool, optional, *optional*, defaults to `False`):
        Whether to tie word embeddings to the lm head weights.
    add_out_norm (bool, optional, *optional*, defaults to `True`):
        Whether to add an output norm after the blocks before the LMHead.
    norm_eps (float, optional, *optional*, defaults to 1e-06):
        Norm eps for RMSNorm and Layer Norm.
    qk_dim_factor (float, optional, *optional*, defaults to 0.5):
        Scale factor for the query and key dimension.
    v_dim_factor (float, optional, *optional*, defaults to 1.0):
        Scale factor for the value dimension.
    chunkwise_kernel (ChunkwiseKernelType, optional, *optional*, defaults to `"chunkwise--native_autograd"`):
        Kernel type for chunkwise processing mode.
    sequence_kernel (SequenceKernelType, optional, *optional*, defaults to `"native_sequence__native"`):
        Kernel type for sequence processing mode.
    step_kernel (StepKernelType, optional, *optional*, defaults to `"native"`):
        Kernel type for step processing mode.
    mode (BackendModeType, optional, *optional*, defaults to `"inference"`):
        Operation mode (inference is needed for generation).
    chunk_size (int, optional, *optional*, defaults to 64):
        Internal chunk size.
    return_last_states (bool, optional, *optional*, defaults to `True`):
        If to return the last states / cache internally. Needed as True for generation.
    autocast_kernel_dtype (DtypeType, optional, *optional*, defaults to `"bfloat16"`):
        Kernel dtype for the states.
    eps (float, optional, *optional*, defaults to 1e-06):
        Epsilon for the mLSTM cell post norm.
    inference_state_dtype (DtypeType, optional, *optional*, defaults to `"float32"`):
        Kernel dtype for states in inference.
    ffn_proj_factor (float, optional, *optional*, defaults to 2.667):
        Size factor of the post-up projection gated Feed Forward network.
    ffn_round_up_to_multiple_of (int, optional, *optional*, defaults to 64):
        Size factor round value of the post-up projection gated Feed Forward network.
    gate_soft_cap (float, optional, *optional*, defaults to 15.0):
        Gate soft cap scale.
    output_logit_soft_cap (float, optional, *optional*, defaults to 30.0):
        Output logit soft cap scale.
    weight_mode (`Literal`, *optional*, defaults to `"single"`):
        Whether parallel linear layers are separated or fused (single).
    use_cache (bool, optional, *optional*, defaults to `True`):
        Whether to use the cache (xLSTMCache).
    pad_token_id (int, optional, *optional*, defaults to 1):
        Pad token id needed for generation.
    bos_token_id (int, optional, *optional*, defaults to 0):
        BOS token id needed for generation.
    eos_token_id (int, optional, *optional*, defaults to 2):
        EOS token id needed for generation.
    max_inference_chunksize (int, optional, *optional*, defaults to 16384):
        Limit the chunk size for inference to save memory.

Example:

```python
>>> from transformers import xLSTMConfig, xLSTMModel

>>> # Initializing a xLSTM configuration
>>> configuration = xLSTMConfig()

>>> # Initializing a model (with random weights) from the configuration
>>> model = xLSTMModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```ÚxlstmÚ
vocab_sizeÚhidden_sizeÚembedding_dimÚnum_hidden_layersÚ
num_blocksÚ	num_headsÚuse_biasÚnorm_reduction_force_float32Útie_word_embeddingsÚadd_out_normÚnorm_epsÚqk_dim_factorÚv_dim_factorÚchunkwise_kernelÚsequence_kernelÚstep_kernelÚmodeÚ
chunk_sizeÚreturn_last_statesÚautocast_kernel_dtypeÚepsÚinference_state_dtypeÚffn_proj_factorÚffn_round_up_to_multiple_ofÚgate_soft_capÚoutput_logit_soft_capÚweight_modeÚ	use_cacheÚpad_token_idÚbos_token_idÚeos_token_idÚmax_inference_chunksizec!                 ó  >• Xl         Ub  UOUU l        Ub  UOUU l        Ub  UOUU l        Ub  UOUU l        X`l        Xpl        Xl        X l        X°l	        X€l
        XÀl        XÐl        Xàl        Xðl        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        U U l        [@        T"U ]„  " SUUUU	S.U!D6  g )N)rE   rF   rD   r0   © )"r(   r)   r*   r+   r,   r-   r.   r0   r1   r2   r/   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   ÚsuperÚ__init__)#Úselfr(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   ÚkwargsÚ	__class__s#                                     €r"   rK   ÚxLSTMConfig.__init__š   s.  ø€ ðX %ŒØ*5Ñ*A™;À}ˆÔØ.;Ñ.G™]È[ˆÔØ6GÑ6SÑ!2ÐYcˆÔØ(2Ñ(>™*ÐDUˆŒØ"ŒØ ŒØ#6Ô Ø(ÔØ ŒØ,HÔ)à*ÔØ(Ôà 0ÔØ.ÔØ&ˆÔØˆŒ	Ø$ˆŒØ"4ˆÔØ%:ˆÔ"ØˆŒØ%:ˆÔ"à.ˆÔØ+FˆÔ(à*ˆÔØ%:ˆÔ"Ø&ˆÔà"ˆŒØ(ˆÔØ(ˆÔØ(ˆÔØ'>ˆÔ$ä‰Òð 	
Ø%Ø%Ø%Ø 3ñ		
ð
 ó	
r#   c                 óD   • [        U R                  U R                  -  SS9$ ©Né@   )r   )r   r)   r3   ©rL   s    r"   Úqk_dimÚxLSTMConfig.qk_dimô   s&   € ä+Ø×Ñ˜t×1Ñ1Ñ1Øñ
ð 	
r#   c                 óD   • [        U R                  U R                  -  SS9$ rQ   )r   r)   r4   rS   s    r"   Úv_dimÚxLSTMConfig.v_dimû   s&   € ä+Ø×Ñ˜t×0Ñ0Ñ0Øñ
ð 	
r#   c                 ó4   • U R                   U R                  -  $ ©N)rT   r-   rS   s    r"   Úqk_head_dimÚxLSTMConfig.qk_head_dim  s   € à{‰{˜dŸn™nÑ,Ð,r#   c                 ó4   • U R                   U R                  -  $ rZ   )rW   r-   rS   s    r"   Ú
v_head_dimÚxLSTMConfig.v_head_dim  s   € àz‰z˜TŸ^™^Ñ+Ð+r#   c                 ó   • [         (       GaA  [        S0 SU R                  _SU R                  _SU R                  _SU R
                  _SU R                  _SU R                  _SU R                  _SU R                  _S	U R                  _S
U R                  _SU R                  _SU R                  _SU R                  _SU R                  _SU R                   _SU R"                  _SU R$                  _SU R&                  _SU R(                  _SU R*                  _SU R,                  _SU R.                  _SU R0                  _SU R2                  _6$ U $ )Nr(   r*   r,   r-   r.   r1   r2   r/   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rI   )Úexternal_xlstmr   r(   r)   r+   r-   r.   r1   r2   r/   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rS   s    r"   Úto_xlstm_block_configÚ!xLSTMConfig.to_xlstm_block_config
  s‰  € ß‹>Ü#ò ØŸ?š?ðà"×.Ò.ðð  ×1Ò1ðð Ÿ.š.ð	ð
 Ÿšðð "×.Ò.ðð Ÿšðð .2×-NÒ-Nðð #×0Ò0ðð "×.Ò.ðð "&×!6Ò!6ðð !%× 4Ò 4ðð !×,Ò,ðð  —Y’Yð!ð"  Ÿ?š?ð#ð$ $(×#:Ò#:ð%ð& '+×&@Ò&@ð'ð( —H’Hð)ð* '+×&@Ò&@ð+ð. !%× 4Ò 4ð/ð0 -1×,LÒ,Lð1ð4 #×0Ò0ð5ð6 '+×&@Ò&@ð7ð8 !×,Ò,ð9ð ð> ˆKr#   ) r1   r;   rE   r9   r5   r*   rF   r<   r>   r?   r@   r)   r=   rG   r8   r2   r/   r,   r-   r+   rA   rD   r3   r:   r6   r7   r0   r.   rC   r4   r(   rB   ) i€Ä  i   Né    Né   FTFTçíµ ÷Æ°>g      à?g      ð?r   r   r   r   rR   Tr   rf   r   g¼t“V@rR   g      .@g      >@r   Tr    r   é   i @  )Ú__name__Ú
__module__Ú__qualname__Ú__firstlineno__Ú__doc__Ú
model_typer!   r   ÚboolÚfloatr	   r   r   r   r
   r   rK   ÚpropertyrT   rW   r[   r^   rb   Ú__static_attributes__Ú__classcell__)rN   s   @r"   r%   r%   <   sU  ø† ñYðv €Jð  ØØ'+Ø+-Ø$(ØØØ-1Ø$)Ø!Øà"Ø!à0LØ.GØ&.à +Øà#'Ø+5ØØ+4à!&Ø+-à#Ø'+à&.àØØØØ',ñSX
àðX
ð ðX
ð   ‘}ð	X
ð
 $ C™=ðX
ð ˜S‘MðX
ð ðX
ð ðX
ð '+ðX
ð "ðX
ð ðX
ð ðX
ð ðX
ð ðX
ð" .ð#X
ð$ ,ð%X
ð& $ð'X
ð* ð+X
ð, ð-X
ð0 !ð1X
ð2  )ð3X
ð4 ð5X
ð6  )ð7X
ð: ð;X
ð< &)ð=X
ð@ ðAX
ðB  %ðCX
ðF $ðGX
ðJ ðKX
ðL ðMX
ðN ðOX
ðP ðQX
ðR "%÷SX
ð X
ðt ñ
ó ð
ð ñ
ó ð
ð ñ-ó ð-ð ñ,ó ð,÷!ð !r#   r%   N)rl   Útypingr   Úconfiguration_utilsr   Úutilsr   r   Úxlstm.xlstm_large.modelr   r	   r
   r   r   r   r   r   ra   r   r!   Ú
get_loggerrh   Úloggerr%   Ú__all__rI   r#   r"   Ú<module>rz      sÐ   ðñ  å å 3ß 0ñ ×Ñ÷	÷ 	ó 	ð NåàÐHÑI€OØ!ð	%ñÐð Ð8Ñ9€IØ Ð!:Ñ;ÐØ˜XÑ&€NØÐ.Ñ/€NðI¨ð I¸#ð IÀ#ô Ið €Nð 
×	Ò	˜HÓ	%€ôoÐ"ô oðd ˆ/r#   