
    <h"                         S r SSKJr  SSKJr  SSKJrJr  SSKJ	r	J
r
Jr  SSKJr  SSKJrJr  SS	KJr  \R&                  " \5      r " S
 S\5      r " S S\5      rSS/rg)zGPT-J model configuration    )OrderedDict)Mapping)AnyOptional   )PreTrainedTokenizer
TensorTypeis_torch_available)PretrainedConfig)OnnxConfigWithPastPatchingSpec)loggingc                   d   ^  \ rS rSrSrSrSSSSS.r                 SU 4S	 jjrS
rU =r	$ )
GPTJConfig   a  
This is the configuration class to store the configuration of a [`GPTJModel`]. It is used to instantiate a GPT-J
model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
defaults will yield a similar configuration to that of the GPT-J
[EleutherAI/gpt-j-6B](https://huggingface.co/EleutherAI/gpt-j-6B) architecture. Configuration objects inherit from
[`PretrainedConfig`] and can be used to control the model outputs. Read the documentation from [`PretrainedConfig`]
for more information.

Args:
    vocab_size (`int`, *optional*, defaults to 50400):
        Vocabulary size of the GPT-J model. Defines the number of different tokens that can be represented by the
        `inputs_ids` passed when calling [`GPTJModel`].
    n_positions (`int`, *optional*, defaults to 2048):
        The maximum sequence length that this model might ever be used with. Typically set this to something large
        just in case (e.g., 512 or 1024 or 2048).
    n_embd (`int`, *optional*, defaults to 4096):
        Dimensionality of the embeddings and hidden states.
    n_layer (`int`, *optional*, defaults to 28):
        Number of hidden layers in the Transformer encoder.
    n_head (`int`, *optional*, defaults to 16):
        Number of attention heads for each attention layer in the Transformer encoder.
    rotary_dim (`int`, *optional*, defaults to 64):
        Number of dimensions in the embedding that Rotary Position Embedding is applied to.
    n_inner (`int`, *optional*, defaults to None):
        Dimensionality of the inner feed-forward layers. `None` will set it to 4 times n_embd
    activation_function (`str`, *optional*, defaults to `"gelu_new"`):
        Activation function, to be selected in the list `["relu", "silu", "gelu", "tanh", "gelu_new"]`.
    resid_pdrop (`float`, *optional*, defaults to 0.1):
        The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
    embd_pdrop (`int`, *optional*, defaults to 0.1):
        The dropout ratio for the embeddings.
    attn_pdrop (`float`, *optional*, defaults to 0.1):
        The dropout ratio for the attention.
    layer_norm_epsilon (`float`, *optional*, defaults to 1e-5):
        The epsilon to use in the layer normalization layers.
    initializer_range (`float`, *optional*, defaults to 0.02):
        The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
    use_cache (`bool`, *optional*, defaults to `True`):
        Whether or not the model should return the last key/values attentions (not used by all models).

Example:

```python
>>> from transformers import GPTJModel, GPTJConfig

>>> # Initializing a GPT-J 6B configuration
>>> configuration = GPTJConfig()

>>> # Initializing a model from the configuration
>>> model = GPTJModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```gptjn_positionsn_embdn_headn_layer)max_position_embeddingshidden_sizenum_attention_headsnum_hidden_layersc                    > Xl         X l        X0l        X@l        XPl        Xpl        X`l        Xl        Xl        Xl	        Xl
        Xl        Xl        Xl        Xl        UU l        [         TU ]D  " SUUUS.UD6  g )N)bos_token_ideos_token_idtie_word_embeddings )
vocab_sizer   r   r   r   n_inner
rotary_dimactivation_functionresid_pdrop
embd_pdrop
attn_pdroplayer_norm_epsiloninitializer_range	use_cacher   r   super__init__)selfr    r   r   r   r   r"   r!   r#   r$   r%   r&   r'   r(   r)   r   r   r   kwargs	__class__s                      c/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/gptj/configuration_gptj.pyr+   GPTJConfig.__init__^   s    * %&$#6 &$$"4!2"(( 	
%LVi	
ms	
    )r#   r&   r   r%   r   r(   r'   r   r   r!   r   r   r$   r"   r)   r    )i  i   i         @   Ngelu_new        r6   r6   gh㈵>g{Gz?TP  r7   F)
__name__
__module____qualname____firstlineno____doc__
model_typeattribute_mapr+   __static_attributes____classcell__r.   s   @r/   r   r      sb    5n J#0'&	M &!%)
 )
r1   r   c                   
  ^  \ rS rSr   SS\S\S\\\      S\	4U 4S jjjr
\S\\\\\4   4   4S j5       r\S\4S	 j5       r\S\4S
 j5       r    SS\S\S\S\	S\\   S\\\4   4U 4S jjjr\S\4S j5       rSrU =r$ )GPTJOnnxConfig   configtaskpatching_specsuse_pastc                 ~   > [         TU ]  XX4S9  [        U R                  SS 5      (       d  SU R                  l        g g )N)rF   rG   rH   pad_token_idr   )r*   r+   getattr_configrJ   )r,   rE   rF   rG   rH   r.   s        r/   r+   GPTJOnnxConfig.__init__   s;     	>]t||^T::()DLL% ;r1   returnc                     [        SSSS.05      nU R                  (       a  U R                  USS9  SSS.US'   U$ SSS.US'   U$ )	N	input_idsbatchsequence)r      inputs)	directionzpast_sequence + sequenceattention_mask)r   rH   fill_with_past_key_values_)r,   common_inputss     r/   rT   GPTJOnnxConfig.inputs   sa    #[g*2M$NO==++MX+N29>X.YM*+  3:j.IM*+r1   c                 .    U R                   R                  $ N)rL   r   r,   s    r/   
num_layersGPTJOnnxConfig.num_layers   s    ||###r1   c                 .    U R                   R                  $ r[   )rL   r   r\   s    r/   r   "GPTJOnnxConfig.num_attention_heads   s    ||"""r1   	tokenizer
batch_size
seq_lengthis_pair	frameworkc           	      p  > [         [        U ]  XX4US9n[        SUS   05      nU R                  (       a  [        5       (       d  [        S5      eSS KnUS   R                  u  pU
S-   nU	U R                  UU R                  R                  U R                  -  4n[        U R                  5       Vs/ sH$  oR                  U5      UR                  U5      4PM&     snUS'   US   US'   U R                  (       a6  US   R                  nWR!                  US   UR#                  W	WUS9/S	S
9US'   U$ s  snf )N)rb   rc   rd   re   rP   zACannot generate dummy past_keys inputs without PyTorch installed.r      past_key_valuesrV   )dtyperS   )dim)r*   r   generate_dummy_inputsr   rH   r
   
ValueErrortorchshaper   rL   r   ranger]   zerosri   catones)r,   ra   rb   rc   rd   re   rX   ordered_inputsrm   rQ   seqlenpast_key_values_length
past_shape_
mask_dtyper.   s                  r/   rk   $GPTJOnnxConfig.generate_dummy_inputs   s^    0$M`i N 

 %k=3M%NO ==%'' !dee -k : @ @)/!&,,*LL,,0H0HH	
 QVVZVeVePf5Pf1[[,ekk*.EFPf501 ,99I+J'(=='(89??J/4yy 015::eE[cm:3nouv 09 0N+, 5s   2*D3c                     g)N   r   r\   s    r/   default_onnx_opset!GPTJOnnxConfig.default_onnx_opset   s    r1   r   )defaultNF)r   FN)r8   r9   r:   r;   r   strr   listr   boolr+   propertyr   intrT   r]   r   r   r	   r   rk   r|   r?   r@   rA   s   @r/   rC   rC      s(    7;
* 
* 
* !l!34	
*
 
* 
* WS#X%6 67   $C $ $ #S # # *.*&* * 	*
 * J'* 
c	* *X C  r1   rC   N)r<   collectionsr   collections.abcr   typingr   r    r   r	   r
   configuration_utilsr   onnxr   r   utilsr   
get_loggerr8   loggerr   rC   __all__r   r1   r/   <module>r      sa      # #   C C 3 4  
		H	%i
! i
ZN' Nb )
*r1   