ó
    <±hœ"  ã                   óÄ   • S r SSKJr  SSKJr  SSKJrJr  SSKJ	r	J
r
Jr  SSKJr  SSKJrJr  SS	KJr  \R&                  " \5      r " S
 S\5      r " S S\5      rSS/rg)zGPT-J model configurationé    )ÚOrderedDict)ÚMapping)ÚAnyÚOptionalé   )ÚPreTrainedTokenizerÚ
TensorTypeÚis_torch_available)ÚPretrainedConfig)ÚOnnxConfigWithPastÚPatchingSpec)Úloggingc                   ód   ^ • \ rS rSrSrSrSSSSS.r                 SU 4S	 jjrS
rU =r	$ )Ú
GPTJConfigé   a  
This is the configuration class to store the configuration of a [`GPTJModel`]. It is used to instantiate a GPT-J
model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
defaults will yield a similar configuration to that of the GPT-J
[EleutherAI/gpt-j-6B](https://huggingface.co/EleutherAI/gpt-j-6B) architecture. Configuration objects inherit from
[`PretrainedConfig`] and can be used to control the model outputs. Read the documentation from [`PretrainedConfig`]
for more information.

Args:
    vocab_size (`int`, *optional*, defaults to 50400):
        Vocabulary size of the GPT-J model. Defines the number of different tokens that can be represented by the
        `inputs_ids` passed when calling [`GPTJModel`].
    n_positions (`int`, *optional*, defaults to 2048):
        The maximum sequence length that this model might ever be used with. Typically set this to something large
        just in case (e.g., 512 or 1024 or 2048).
    n_embd (`int`, *optional*, defaults to 4096):
        Dimensionality of the embeddings and hidden states.
    n_layer (`int`, *optional*, defaults to 28):
        Number of hidden layers in the Transformer encoder.
    n_head (`int`, *optional*, defaults to 16):
        Number of attention heads for each attention layer in the Transformer encoder.
    rotary_dim (`int`, *optional*, defaults to 64):
        Number of dimensions in the embedding that Rotary Position Embedding is applied to.
    n_inner (`int`, *optional*, defaults to None):
        Dimensionality of the inner feed-forward layers. `None` will set it to 4 times n_embd
    activation_function (`str`, *optional*, defaults to `"gelu_new"`):
        Activation function, to be selected in the list `["relu", "silu", "gelu", "tanh", "gelu_new"]`.
    resid_pdrop (`float`, *optional*, defaults to 0.1):
        The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
    embd_pdrop (`int`, *optional*, defaults to 0.1):
        The dropout ratio for the embeddings.
    attn_pdrop (`float`, *optional*, defaults to 0.1):
        The dropout ratio for the attention.
    layer_norm_epsilon (`float`, *optional*, defaults to 1e-5):
        The epsilon to use in the layer normalization layers.
    initializer_range (`float`, *optional*, defaults to 0.02):
        The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
    use_cache (`bool`, *optional*, defaults to `True`):
        Whether or not the model should return the last key/values attentions (not used by all models).

Example:

```python
>>> from transformers import GPTJModel, GPTJConfig

>>> # Initializing a GPT-J 6B configuration
>>> configuration = GPTJConfig()

>>> # Initializing a model from the configuration
>>> model = GPTJModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```ÚgptjÚn_positionsÚn_embdÚn_headÚn_layer)Úmax_position_embeddingsÚhidden_sizeÚnum_attention_headsÚnum_hidden_layersc                 óð   >• Xl         X l        X0l        X@l        XPl        Xpl        X`l        X€l        Xl        X l	        X°l
        XÀl        XÐl        Xàl        Xðl        UU l        [         TU ]D  " SUUUS.UD6  g )N)Úbos_token_idÚeos_token_idÚtie_word_embeddings© )Ú
vocab_sizer   r   r   r   Ún_innerÚ
rotary_dimÚactivation_functionÚresid_pdropÚ
embd_pdropÚ
attn_pdropÚlayer_norm_epsilonÚinitializer_rangeÚ	use_cacher   r   ÚsuperÚ__init__)Úselfr    r   r   r   r   r"   r!   r#   r$   r%   r&   r'   r(   r)   r   r   r   ÚkwargsÚ	__class__s                      €Úc/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/gptj/configuration_gptj.pyr+   ÚGPTJConfig.__init__^   s€   ø€ ð* %ŒØ&ÔØŒØŒØŒØŒØ$ŒØ#6Ô Ø&ÔØ$ŒØ$ŒØ"4ÔØ!2ÔØ"Œà(ÔØ(ˆÔä‰Òð 	
Ø%°LÐViñ	
Ømsó	
ó    )r#   r&   r   r%   r   r(   r'   r   r   r!   r   r   r$   r"   r)   r    )iàÄ  i   i   é   é   é@   NÚgelu_newç        r6   r6   gñhãˆµøä>g{®Gáz”?TéPÄ  r7   F)
Ú__name__Ú
__module__Ú__qualname__Ú__firstlineno__Ú__doc__Ú
model_typeÚattribute_mapr+   Ú__static_attributes__Ú__classcell__©r.   s   @r/   r   r      sb   ø† ñ5ðn €Jà#0ØØ'Ø&ñ	€Mð ØØØØØØØ&ØØØØØØØØØ!÷%)
õ )
r1   r   c                   ó
  ^ • \ rS rSr   SS\S\S\\\      S\	4U 4S jjjr
\S\\\\\4   4   4S j5       r\S\4S	 j5       r\S\4S
 j5       r    SS\S\S\S\	S\\   S\\\4   4U 4S jjjr\S\4S j5       rSrU =r$ )ÚGPTJOnnxConfigé‹   ÚconfigÚtaskÚpatching_specsÚuse_pastc                 ó~   >• [         TU ]  XX4S9  [        U R                  SS 5      (       d  SU R                  l        g g )N)rF   rG   rH   Úpad_token_idr   )r*   r+   ÚgetattrÚ_configrJ   )r,   rE   rF   rG   rH   r.   s        €r/   r+   ÚGPTJOnnxConfig.__init__Œ   s;   ø€ ô 	‰Ñ˜¸>ÐÑ]Üt—|‘| ^°T×:Ñ:à()ˆDL‰LÕ%ð ;r1   Úreturnc                 óŒ   • [        SSSS.05      nU R                  (       a  U R                  USS9  SSS.US'   U$ SSS.US'   U$ )	NÚ	input_idsÚbatchÚsequence)r   é   Úinputs)Ú	directionzpast_sequence + sequenceÚattention_mask)r   rH   Úfill_with_past_key_values_)r,   Úcommon_inputss     r/   rT   ÚGPTJOnnxConfig.inputs˜   sa   € ä# [°gÀ*Ñ2MÐ$NÓOˆØ==Ø×+Ñ+¨MÀXÐ+ÑNØ29Ð>XÑ.YˆMÐ*Ñ+ð Ðð 3:¸jÑ.IˆMÐ*Ñ+àÐr1   c                 ó.   • U R                   R                  $ ©N)rL   r   ©r,   s    r/   Ú
num_layersÚGPTJOnnxConfig.num_layers£   s   € à|‰|×#Ñ#Ð#r1   c                 ó.   • U R                   R                  $ r[   )rL   r   r\   s    r/   r   Ú"GPTJOnnxConfig.num_attention_heads§   s   € à|‰|×"Ñ"Ð"r1   Ú	tokenizerÚ
batch_sizeÚ
seq_lengthÚis_pairÚ	frameworkc           	      óp  >• [         [        U ]  XX4US9n[        SUS   05      nU R                  (       a¬  [        5       (       d  [        S5      eSS KnUS   R                  u  pšU
S-   nU	U R                  UU R                  R                  U R                  -  4n[        U R                  5       Vs/ sH$  oØR                  U5      UR                  U5      4PM&     snUS'   US   US'   U R                  (       a6  US   R                  nWR!                  US   UR#                  W	WUS9/S	S
9US'   U$ s  snf )N)rb   rc   rd   re   rP   zACannot generate dummy past_keys inputs without PyTorch installed.r   é   Úpast_key_valuesrV   )ÚdtyperS   )Údim)r*   r   Úgenerate_dummy_inputsr   rH   r
   Ú
ValueErrorÚtorchÚshaper   rL   r   Úranger]   Úzerosri   ÚcatÚones)r,   ra   rb   rc   rd   re   rX   Úordered_inputsrm   rQ   ÚseqlenÚpast_key_values_lengthÚ
past_shapeÚ_Ú
mask_dtyper.   s                  €r/   rk   Ú$GPTJOnnxConfig.generate_dummy_inputs«   s^  ø€ ô Ô0°$ÑMØ¸Ð`ið Nð 
ˆô
 % k°=ÀÑ3MÐ%NÓOˆð ==Ü%×'Ñ'Ü Ð!dÓeÐeãà -¨kÑ :× @Ñ @‘à)/°!©Ð&àØ×,Ñ,Ø*Ø—L‘L×,Ñ,°×0HÑ0HÑHð	
ô QVÐVZ×VeÑVeÔPfó5ÙPfÈ1—[‘[ Ó,¨e¯k©k¸*Ó.EÓFÑPfñ5Ð0Ñ1ð ,9Ð9IÑ+JˆÐ'Ñ(Ø==Ø'Ð(8Ñ9×?Ñ?ˆJØ/4¯y©yØÐ 0Ñ1°5·:±:¸eÐE[Ðcm°:Ð3nÐoÐuvð 09ð 0ˆNÐ+Ñ,ð Ðùò5s   Â2*D3c                 ó   • g)Né   r   r\   s    r/   Údefault_onnx_opsetÚ!GPTJOnnxConfig.default_onnx_opset×   s   € àr1   r   )ÚdefaultNF)éÿÿÿÿr   FN)r8   r9   r:   r;   r   Ústrr   Úlistr   Úboolr+   Úpropertyr   ÚintrT   r]   r   r   r	   r   rk   r|   r?   r@   rA   s   @r/   rC   rC   ‹   s(  ø† ð Ø7;Øñ
*à ð
*ð ð
*ð !  lÑ!3Ñ4ð	
*ð
 ÷
*ð 
*ð ð˜  W¨S°#¨XÑ%6Ð 6Ñ7ó ó ðð ð$˜Có $ó ð$ð ð# Só #ó ð#ð ØØØ*.ñ*à&ð*ð ð*ð ð	*ð
 ð*ð ˜JÑ'ð*ð 
cÑ	÷*ð *ðX ð Có ó ör1   rC   N)r<   Úcollectionsr   Úcollections.abcr   Útypingr   r   Ú r   r	   r
   Úconfiguration_utilsr   Úonnxr   r   Úutilsr   Ú
get_loggerr8   Úloggerr   rC   Ú__all__r   r1   r/   Ú<module>r      sa   ðñ  å #Ý #ß  ç CÑ CÝ 3ß 4Ý ð 
×	Ò	˜HÓ	%€ôi
Ð!ô i
ôZNÐ'ô Nðb Ð)Ð
*r1   