
    <h/2                        S r SSKJr  SSKJr  SSKJrJr  \" 5       (       a  SSKJ	r	J
r
JrJrJrJrJrJr  SrO1SSKJr  \S	   r	\S
   r
\S   r\S   r\S   r\S   rS\S\S\4S jrSr\R(                  " \5      r " S S\5      rS/rg)zxLSTM configuration.    )Optional   )PretrainedConfig)is_xlstm_availablelogging)BackendModeTypeChunkwiseKernelType	DtypeTypeSequenceKernelTypeStepKernelTypeWeightModeTyperound_up_to_next_multiple_ofxLSTMLargeConfigT)Literal)traintrain_with_padding	inference)chunkwise--native_autogradzparallel--native_autograd)float32bfloat16float16native_sequence__nativenative)singlefusedxmultiple_ofreturnc                 .    [        X-   S-
  U-  U-  5      $ )z0Rounds up x to the next multiple of multiple_of.   )int)r   r   s     e/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/xlstm/configuration_xlstm.pyr   r   2   s    Q_q([8KGHH    Fc            A       P  ^  \ rS rSrSrSr                                S+S\S\S\\   S\\   S\\   S	\S
\S\S\S\S\	S\	S\	S\
S\S\S\S\S\S\S\	S\S\	S\S\	S\	S\S\S \S!\S"\S#\4@U 4S$ jjjr\S% 5       r\S& 5       r\S' 5       r\S( 5       rS) rS*rU =r$ ),xLSTMConfig<   a  
This is the configuration class to store the configuration of a [`xLSTM`]. It is used to instantiate a xLSTM
model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
defaults will yield a similar configuration to that of the xLSTM-7b [NX-AI/xLSTM-7b](https://huggingface.co/NX-AI/xLSTM-7b) model.

Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.


Args:
    vocab_size (int, optional, *optional*, defaults to 50304):
        Vocabulary size of the xLSTM model. Defines the number of different tokens that can be represented by the
        `inputs_ids` passed when calling [`xLSTMModel`]. Defaults to the GPT2-NeoX tokenizer size.
    hidden_size (int, optional, *optional*, defaults to 4096):
        Dimensionality of the embeddings or hidden states.
    embedding_dim (int, optional, *optional*, defaults to 4096):
        Dimensionality of the embeddings or hidden states, use hidde_size if None.
    num_hidden_layers (int, optional, *optional*, defaults to 32):
        Number of blocks of the xLSTM model.
    num_blocks (int, optional, *optional*, defaults to 32):
        Number of blocks of the xLSTM model, use num_hidden_layers if None.
    num_heads (int, optional, *optional*, defaults to 8):
        Number of heads for the xLSTM Layer/Cell.
    use_bias (bool, optional, *optional*, defaults to `False`):
        Whether to use biases in the xLSTM model.
    norm_reduction_force_float32 (bool, optional, *optional*, defaults to `True`):
        Whether to force the float32 norm reduction op to be done in fp32 precision.
    tie_word_embeddings (bool, optional, *optional*, defaults to `False`):
        Whether to tie word embeddings to the lm head weights.
    add_out_norm (bool, optional, *optional*, defaults to `True`):
        Whether to add an output norm after the blocks before the LMHead.
    norm_eps (float, optional, *optional*, defaults to 1e-06):
        Norm eps for RMSNorm and Layer Norm.
    qk_dim_factor (float, optional, *optional*, defaults to 0.5):
        Scale factor for the query and key dimension.
    v_dim_factor (float, optional, *optional*, defaults to 1.0):
        Scale factor for the value dimension.
    chunkwise_kernel (ChunkwiseKernelType, optional, *optional*, defaults to `"chunkwise--native_autograd"`):
        Kernel type for chunkwise processing mode.
    sequence_kernel (SequenceKernelType, optional, *optional*, defaults to `"native_sequence__native"`):
        Kernel type for sequence processing mode.
    step_kernel (StepKernelType, optional, *optional*, defaults to `"native"`):
        Kernel type for step processing mode.
    mode (BackendModeType, optional, *optional*, defaults to `"inference"`):
        Operation mode (inference is needed for generation).
    chunk_size (int, optional, *optional*, defaults to 64):
        Internal chunk size.
    return_last_states (bool, optional, *optional*, defaults to `True`):
        If to return the last states / cache internally. Needed as True for generation.
    autocast_kernel_dtype (DtypeType, optional, *optional*, defaults to `"bfloat16"`):
        Kernel dtype for the states.
    eps (float, optional, *optional*, defaults to 1e-06):
        Epsilon for the mLSTM cell post norm.
    inference_state_dtype (DtypeType, optional, *optional*, defaults to `"float32"`):
        Kernel dtype for states in inference.
    ffn_proj_factor (float, optional, *optional*, defaults to 2.667):
        Size factor of the post-up projection gated Feed Forward network.
    ffn_round_up_to_multiple_of (int, optional, *optional*, defaults to 64):
        Size factor round value of the post-up projection gated Feed Forward network.
    gate_soft_cap (float, optional, *optional*, defaults to 15.0):
        Gate soft cap scale.
    output_logit_soft_cap (float, optional, *optional*, defaults to 30.0):
        Output logit soft cap scale.
    weight_mode (`Literal`, *optional*, defaults to `"single"`):
        Whether parallel linear layers are separated or fused (single).
    use_cache (bool, optional, *optional*, defaults to `True`):
        Whether to use the cache (xLSTMCache).
    pad_token_id (int, optional, *optional*, defaults to 1):
        Pad token id needed for generation.
    bos_token_id (int, optional, *optional*, defaults to 0):
        BOS token id needed for generation.
    eos_token_id (int, optional, *optional*, defaults to 2):
        EOS token id needed for generation.
    max_inference_chunksize (int, optional, *optional*, defaults to 16384):
        Limit the chunk size for inference to save memory.

Example:

```python
>>> from transformers import xLSTMConfig, xLSTMModel

>>> # Initializing a xLSTM configuration
>>> configuration = xLSTMConfig()

>>> # Initializing a model (with random weights) from the configuration
>>> model = xLSTMModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```xlstm
vocab_sizehidden_sizeembedding_dimnum_hidden_layers
num_blocks	num_headsuse_biasnorm_reduction_force_float32tie_word_embeddingsadd_out_normnorm_epsqk_dim_factorv_dim_factorchunkwise_kernelsequence_kernelstep_kernelmode
chunk_sizereturn_last_statesautocast_kernel_dtypeepsinference_state_dtypeffn_proj_factorffn_round_up_to_multiple_ofgate_soft_capoutput_logit_soft_capweight_mode	use_cachepad_token_idbos_token_ideos_token_idmax_inference_chunksizec!                   > Xl         Ub  UOUU l        Ub  UOUU l        Ub  UOUU l        Ub  UOUU l        X`l        Xpl        Xl        Xl        Xl	        Xl
        Xl        Xl        Xl        Xl        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        U U l        [@        T"U ]  " SUUUU	S.U!D6  g )N)rE   rF   rD   r0    )"r(   r)   r*   r+   r,   r-   r.   r0   r1   r2   r/   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   super__init__)#selfr(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   kwargs	__class__s#                                     r"   rK   xLSTMConfig.__init__   s.   X %*5*A;}.;.G][6G6S!2Yc(2(>*DU" #6 ( ,H)*( 0.&	$"4%:"%:".+F(*%:"&"((('>$ 	
%%% 3		

 	
r#   c                 D    [        U R                  U R                  -  SS9$ N@   )r   )r   r)   r3   rL   s    r"   qk_dimxLSTMConfig.qk_dim   s&    +t111
 	
r#   c                 D    [        U R                  U R                  -  SS9$ rQ   )r   r)   r4   rS   s    r"   v_dimxLSTMConfig.v_dim   s&    +t000
 	
r#   c                 4    U R                   U R                  -  $ N)rT   r-   rS   s    r"   qk_head_dimxLSTMConfig.qk_head_dim  s    {{dnn,,r#   c                 4    U R                   U R                  -  $ rZ   )rW   r-   rS   s    r"   
v_head_dimxLSTMConfig.v_head_dim  s    zzT^^++r#   c                    [         (       GaA  [        S0 SU R                  _SU R                  _SU R                  _SU R
                  _SU R                  _SU R                  _SU R                  _SU R                  _S	U R                  _S
U R                  _SU R                  _SU R                  _SU R                  _SU R                  _SU R                   _SU R"                  _SU R$                  _SU R&                  _SU R(                  _SU R*                  _SU R,                  _SU R.                  _SU R0                  _SU R2                  _6$ U $ )Nr(   r*   r,   r-   r.   r1   r2   r/   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rI   )external_xlstmr   r(   r)   r+   r-   r.   r1   r2   r/   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rS   s    r"   to_xlstm_block_config!xLSTMConfig.to_xlstm_block_config
  s   ># ??"..  11 ..	
  "..  .2-N-N #00 ".. "&!6!6 !% 4 4 !,,  YY!"  ??#$ $(#:#:%& '+&@&@'( HH)* '+&@&@+. !% 4 4/0 -1,L,L14 #0056 '+&@&@78 !,,9 > Kr#   ) r1   r;   rE   r9   r5   r*   rF   r<   r>   r?   r@   r)   r=   rG   r8   r2   r/   r,   r-   r+   rA   rD   r3   r:   r6   r7   r0   r.   rC   r4   r(   rB   ) i  i   N    N   FTFTư>g      ?g      ?r   r   r   r   rR   Tr   rf   r   gtV@rR   g      .@g      >@r   Tr    r      i @  )__name__
__module____qualname____firstlineno____doc__
model_typer!   r   boolfloatr	   r   r   r   r
   r   rK   propertyrT   rW   r[   r^   rb   __static_attributes____classcell__)rN   s   @r"   r%   r%   <   sU   Yv J  '++-$(-1$)!"!0L.G&. +#'+5+4!&+-#'+&.',SX
X
 X
  }	X

 $C=X
 SMX
 X
 X
 '+X
 "X
 X
 X
 X
 X
" .#X
$ ,%X
& $'X
* +X
, -X
0 !1X
2  )3X
4 5X
6  )7X
: ;X
< &)=X
@ AX
B  %CX
F $GX
J KX
L MX
N OX
P QX
R "%SX
 X
t 
 
 
 
 - - , ,! !r#   r%   N)rl   typingr   configuration_utilsr   utilsr   r   xlstm.xlstm_large.modelr   r	   r
   r   r   r   r   r   ra   r   r!   
get_loggerrh   loggerr%   __all__rI   r#   r"   <module>rz      s       3 0 	 	 	 NHIO!	% 89I !:;X&N./NI I# I# I N 
		H	%o" od /r#   