ó
    <±h<:  ã                   óš   • S SK Jr  S SKJr  SSKJrJr  \R                  " \5      r	 " S S\5      r
 " S S\5      r " S	 S
\5      r/ SQrg)é   )ÚPretrainedConfig)Úloggingé   )ÚCONFIG_MAPPINGÚ
AutoConfigc                   ó\   ^ • \ rS rSrSrSrSr                  SU 4S jjrSrU =r	$ )ÚJanusVisionConfigé   a^
  
This is the configuration class to store the configuration of a [`JanusVisionModel`]. It is used to instantiate a
`JanusVisionModel` according to the specified arguments, defining the model architecture.

Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.
Args:
    hidden_size (`int`, *optional*, defaults to 1024):
        Dimensionality of the encoder layers and the pooler layer.
    num_hidden_layers (`int`, *optional*, defaults to 24):
        Number of hidden layers in the Transformer encoder.
    num_attention_heads (`int`, *optional*, defaults to 16):
        Number of attention heads for each attention layer in the Transformer encoder.
    num_channels (`int`, *optional*, defaults to 3):
        The number of input channels.
    patch_size (`int`, *optional*, defaults to 16):
        The size (resolution) of each patch.
    image_size (`int`, *optional*, defaults to 384):
        The size (resolution) of each image.
    attention_dropout (`float`, *optional*, defaults to 0.0):
        Dropout probability for attention weights.
    layer_norm_eps (`float`, *optional*, defaults to 1e-06):
        The epsilon used by the layer normalization layers.
    hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`):
        The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
        `"relu"`, `"selu"`, and `"gelu_new"` are supported.
    mlp_ratio (`float`, *optional*, defaults to 4.0):
        Ratio of MLP hidden dimensionality to embedding dimensionality.
    attention_bias (`bool`, *optional*, defaults to `True`):
        Whether to add a bias to the queries, keys, and values in the attention layers.
    hidden_dropout_rate (`float`, *optional*, defaults to 0.0):
        The dropout probability for fully connected layers in the encoder.
    projection_dim (`int`, *optional*, defaults to 2048):
        Dimensionality of the MLP projection head.
    projection_dropout (`float`, *optional*, defaults to 0.0):
        Dropout probability for the projection layer.
    use_qk_norm (`bool`, *optional*, defaults to `False`):
        Whether to normalize the query and key matrices.
    initializer_range (`float`, *optional*, defaults to 0.02):
        The standard deviation of the truncated normal initializer for initializing all weight matrices.
    depth (`int`, *optional*, defaults to 2):
        Number of hidden layers in the aligner module.
    num_image_tokens (`int`, *optional*, defaults to 576):
        Number of image tokens.
Újanus_vision_modelÚvision_configc                 ó  >• [         TU ]  " S0 UD6  Xl        X l        X0l        X@l        XPl        X`l        Xpl        X€l	        Xl
        X l        X°l        XÀl        XÐl        Xàl        Xðl        UU l        UU l        UU l        g ©N© )ÚsuperÚ__init__Úhidden_sizeÚnum_hidden_layersÚnum_attention_headsÚnum_channelsÚ
patch_sizeÚ
image_sizeÚattention_dropoutÚlayer_norm_epsÚ
hidden_actÚ	mlp_ratioÚattention_biasÚhidden_dropout_rateÚprojection_dimÚprojection_dropoutÚuse_qk_normÚinitializer_rangeÚdepthÚnum_image_tokens)Úselfr   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   ÚkwargsÚ	__class__s                       €Úe/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/janus/configuration_janus.pyr   ÚJanusVisionConfig.__init__P   s‚   ø€ ô, 	‰ÒÑ"˜6Ò"à&ÔØ!2ÔØ#6Ô Ø(ÔØ$ŒØ$ŒØ!2ÔØ,ÔØ$Œà"ŒØ,ÔØ#6Ô Ø,ÔØ"4ÔØ&ÔØ!2ˆÔØˆŒ
Ø 0ˆÕó    )r   r   r"   r   r   r   r   r!   r   r   r   r   r   r#   r   r   r   r    )i   é   é   r   r+   i€  ç        gíµ ÷Æ°>Úgelug      @Tr,   é   r,   Fç{®Gáz”?r   i@  )
Ú__name__Ú
__module__Ú__qualname__Ú__firstlineno__Ú__doc__Ú
model_typeÚbase_config_keyr   Ú__static_attributes__Ú__classcell__©r&   s   @r'   r	   r	      sW   ø† ñ,ð\ &€JØ%€Oð ØØØØØØØØØØØØØØØØØ÷'*1õ *1r)   r	   c                   ó’   ^ • \ rS rSrSrSrSrSSSSS	S
S
S/ SQSSSSSSS4S\S\S\S\S\S\S\S\S\	\   S\S\
4U 4S jjjrSrU =r$ )ÚJanusVQVAEConfigé}   a–	  
This is the configuration class to store the configuration of a [`JanusVQVAEModel`]. It is used to instantiate a
`JanusVQVAEModel` according to the specified arguments, defining the model architecture.
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information. Instantiating a
configuration with the defaults will yield a similar configuration to the VQModel of the
[deepseek-community/Janus-Pro-1B](https://huggingface.co/deepseek-community/Janus-Pro-1B).

Args:
    embed_dim (`int`, *optional*, defaults to 8):
        Dimensionality of each embedding vector.
    num_embeddings (`int`, *optional*, defaults to 16384):
        Number of codebook embeddings.
    double_latent (`bool`, *optional*, defaults to `False`):
        Whether to use double z channels.
    latent_channels (`int`, *optional*, defaults to 256):
        Number of channels for the latent space.
    num_patches (`int`, *optional*, defaults to 32):
        Num of patches the input images can be divided into.
    in_channels (`int`, *optional*, defaults to 3):
        Number of input channels.
    out_channels (`int`, *optional*, defaults to 3):
        Number of out channels.
    base_channels (`int`, *optional*, defaults to 128):
        Base channel count.
    channel_multiplier (`list[int]`, *optional*, defaults to `[1, 1, 2, 2, 4]`):
        Channel multipliers for each resolution.
    num_res_blocks (`int`, *optional*, defaults to 2):
        Number of residual blocks.
    dropout (`float`, *optional*, defaults to 0.0):
        Dropout rate.
    initializer_range (`float`, *optional*, defaults to 0.02):
        The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
    projection_dim (`int`, *optional*, defaults to 2048):
        Dimensionality of the MLP projection head.
    num_hidden_layers (`int`, *optional*, defaults to 2):
        Number of hidden layers in VAVAE MLP Connecter module.
    hidden_act (`str` or `Callable`, *optional*, defaults to `"gelu"`):
        The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
        `"relu"`, `"silu"` and `"gelu_new"` are supported.
    image_token_embed_dim (`int`, *optional*, defaults to 2048):
        Dimension of image embeddings. It should be same as the dimensionality of text embeddings.
Újanus_vqganÚ	vq_configé   i @  Fé   é    r   é€   )é   rC   r   r   é   r   r,   r/   r.   r-   Ú	embed_dimÚnum_embeddingsÚdouble_latentÚlatent_channelsÚnum_patchesÚin_channelsÚout_channelsÚbase_channelsÚchannel_multiplierÚnum_res_blocksÚdropoutc                 óè   >• [         TU ]  " S0 UD6  Xl        X l        X0l        X@l        X`l        X€l        Xl        X l	        X°l
        XÀl        XPl        Xpl        XÐl        Xàl        Xðl        UU l        g r   )r   r   rE   rF   rG   rH   rJ   rL   rM   rN   rO   r!   rI   rK   r   r   r   Úimage_token_embed_dim)r$   rE   rF   rG   rH   rI   rJ   rK   rL   rM   rN   rO   r!   r   r   r   rQ   r%   r&   s                     €r'   r   ÚJanusVQVAEConfig.__init__­   st   ø€ ô( 	‰ÒÑ"˜6Ò"Ø"ŒØ,ÔØ*ÔØ.ÔØ&ÔØ*ÔØ"4ÔØ,ÔØŒØ!2ÔØ&ÔØ(ÔØ,ÔØ!2ÔØ$ŒØ%:ˆÕ"r)   )rL   rM   rG   rO   rE   r   rQ   rJ   r!   rH   rF   r   rI   rN   rK   r   )r0   r1   r2   r3   r4   r5   r6   ÚintÚboolÚlistÚfloatr   r7   r8   r9   s   @r'   r;   r;   }   sÁ   ø† ñ*ðX €JØ!€Oð Ø#Ø#Ø"ØØØØ Ú(7ØØØØØØØ"ñ#$;àð$;ð ð$;ð ð	$;ð
 ð$;ð ð$;ð ð$;ð ð$;ð ð$;ð ! ™Ið$;ð ð$;ð ÷$;ö $;r)   r;   c                   óH   ^ • \ rS rSrSrSr\\\S.r	    SU 4S jjr
SrU =r$ )ÚJanusConfigéÔ   a»  
This is the configuration class to store the configuration of a [`JanusModel`]. It is used to instantiate an
Janus model according to the specified arguments, defining the model architecture. Instantiating a configuration
with the defaults will yield a similar configuration to that of the Janus-1B or Janus-7B models.

e.g. [deepseek-community/Janus-Pro-1B](https://huggingface.co/deepseek-community/Janus-Pro-1B) or
[deepseek-community/Janus-Pro-7B](https://huggingface.co/deepseek-community/Janus-Pro-7B)

Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.

Args:
    text_config (`Union[AutoConfig, dict]`, *optional*, defaults to `LlamaConfig`):
        The config object or dictionary of the text backbone.
    vision_config (`Union[AutoConfig, dict]`,  *optional*, defaults to `JanusVisionConfig`):
        The config object or dictionary of the vision backbone.
    vq_config (`Union[AutoConfig, dict]`,  *optional*, defaults to `JanusVQVAEConfig`):
        The config object or dictionary of the VQVAE backbone.
    image_token_id (`int`, *optional*, defaults to 100581):
        Token index of a placeholder image token.

Example:

```python
>>> from transformers import JanusForConditionalGeneration, JanusConfig, JanusVisionConfig, JanusVQVAEConfig, LlamaConfig

>>> # Initializing a Janus vision config
>>> vision_config = JanusVisionConfig()

>>> # Initializing a Llama config
>>> text_config = LlamaConfig()

>>> # Initializing a VQ config
>>> vq_config = JanusVQVAEConfig()

>>> # Initializing a Janus Pro 1B style configuration
>>> configuration = JanusConfig(vision_config=vision_config, text_config=text_config, vq_config=vq_config)

>>> # Initializing a model from the Janus Pro 1B style configuration
>>> model = JanusForConditionalGeneration(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```Újanus)Útext_configr   r>   c                 ó&  >• [        U[        5      (       a-  UR                  SS5      US'   [        US      " S	0 UD6U l        O_Uc)  [
        R                  S5        [        S   " 5       U l        O3[        U[        5      (       a  Xl        O[        S[        U5       35      eUc%  [
        R                  S5        [        5       U l        OY[        U[        5      (       a  [        S	0 UD6U l        O3[        U[        5      (       a  X l        O[        S[        U5       35      eUc%  [
        R                  S5        [        5       U l        OY[        U[        5      (       a  [        S	0 UD6U l        O3[        U[        5      (       a  X0l        O[        S[        U5       35      eU R                  R                  U l        U R                  R                  U R                  R                   -  U R                  l        X@l        [&        TU ]P  " S	0 UD6  g )
Nr5   Úllamaz7`text_config` is None. Initializing with default valueszTInvalid type for `text_config`. Must be either `dict` or `LlamaConfig`. Type found: zK`vision_config` is None. Initializing with default JanusVisionConfig valuesz\Invalid type for `vision_config`. Must be either `dict` or `JanusVisionConfig`. Type found: zF`vq_config` is None. Initializing with default JanusVQVAEConfig valueszWInvalid type for `vq_config`. Must be either `dict` or `JanusVQVAEConfig`. Type found: r   )Ú
isinstanceÚdictÚgetr   r[   ÚloggerÚinfor   Ú
ValueErrorÚtyper	   r   r;   r>   r!   r   r   rI   Úimage_token_idr   r   )r$   r[   r   r>   re   r%   r&   s         €r'   r   ÚJanusConfig.__init__	  sÑ  ø€ ô k¤4×(Ñ(Ø(3¯©¸ÀgÓ(NˆK˜Ñ%Ü-¨k¸,Ñ.GÒHÑWÈ;ÑWˆDÕàÑ ÜK‰KÐQÔRÜ-¨gÒ6Ó8ˆDÕÜ˜Ô%5×6Ñ6Ø*Õäð Ü $ [Ó 1Ð2ð4óð ð
 Ñ ÜK‰KÐeÔfÜ!2Ó!4ˆDÕÜ˜¤t×,Ñ,Ü!2Ñ!C°]Ñ!CˆDÕÜ˜Ô'8×9Ñ9Ø!.Õäð Ü $ ]Ó 3Ð4ð6óð ð
 ÑÜK‰KÐ`ÔaÜ-Ó/ˆDNÜ˜	¤4×(Ñ(Ü-Ñ:°	Ñ:ˆDNÜ˜	Ô#3×4Ñ4Ø&Näð Ü $ Y£Ð0ð2óð ð
 "&×!3Ñ!3×!EÑ!EˆÔà%)×%7Ñ%7×%BÑ%BÀd×FXÑFX×FcÑFcÑ%cˆ‰Ô"à,ÔÜ‰ÒÑ"˜6Ó"r)   )re   r!   r[   r   r>   )NNNiåˆ )r0   r1   r2   r3   r4   r5   r   r	   r;   Úsub_configsr   r7   r8   r9   s   @r'   rX   rX   Ô   s8   ø† ñ+ðZ €Jà!Ø*Ø%ñ€Kð ØØØ÷6#õ 6#r)   rX   )r;   r	   rX   N)Úconfiguration_utilsr   Úutilsr   Úautor   r   Ú
get_loggerr0   ra   r	   r;   rX   Ú__all__r   r)   r'   Ú<module>rm      sZ   ðõ, 4Ý ß -ð 
×	Ò	˜HÓ	%€ô\1Ð(ô \1ô~T;Ð'ô T;ônk#Ð"ô k#ò\ Cr)   