
    <h                        S r SSKJr  SSKJrJrJr  SSKrSSKJ	s  J
r  SSKrSSKJ	r	  SSKJr  SSKJrJr  SS	KJr  SS
KJr  SSKJr  SSKJr  SSKJrJr  SSKJrJr  SSK J!r!  SSK"J#r#J$r$J%r%J&r&  SSK'J(r(J)r)  \&RT                  " \+5      r, " S S\	RZ                  5      r. " S S\	RZ                  5      r/ " S S\/5      r0 " S S\/5      r1S r2SLS jr3 " S S\	RZ                  5      r4 " S S \	Rj                  5      r6S!\Rn                  S"\8S#\Rn                  4S$ jr9 SMS%\	RZ                  S&\Rn                  S'\Rn                  S(\Rn                  S)\\Rn                     S*\:S+\:S,\!\#   4S- jjr; " S. S/\	RZ                  5      r< " S0 S1\5      r= " S2 S3\5      r> " S4 S5\	RZ                  5      r? " S6 S7\	RZ                  5      r@ " S8 S9\	RZ                  5      rA " S: S;\	RZ                  5      rB " S< S=\	RZ                  5      rC " S> S?5      rD\$ " S@ SA\5      5       rE\$" SBSC9 " SD SE\E5      5       rF\$ " SF SG\E5      5       rG\$" SHSC9 " SI SJ\E\5      5       rH/ SKQrIg)NzPyTorch Chameleon model.    )cached_property)CallableOptionalUnionN)nn   )ACT2FN)CacheDynamicCache)GenerationMixin)create_causal_mask)FlashAttentionKwargs)GradientCheckpointingLayer)BaseModelOutputWithPastCausalLMOutputWithPast)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)TransformersKwargsauto_docstringcan_return_tuplelogging   )ChameleonConfigChameleonVQVAEConfigc                   8   ^  \ rS rSrSU 4S jjrS rS rSrU =r$ )ChameleonRMSNorm/   c                    > [         TU ]  5         [        R                  " [        R
                  " U5      5      U l        X l        g)z/
ChameleonRMSNorm is equivalent to T5LayerNorm
N)super__init__r   	Parametertorchonesweightvariance_epsilon)selfhidden_sizeeps	__class__s      h/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/chameleon/modeling_chameleon.pyr!   ChameleonRMSNorm.__init__0   s/     	ll5::k#:; #    c                    UR                   nUR                  [        R                  5      nUR	                  S5      R                  SSS9nU[        R                  " X0R                  -   5      -  nU R                  UR                  U5      -  $ )N   T)keepdim)	dtypetor#   float32powmeanrsqrtr&   r%   )r'   hidden_statesinput_dtypevariances       r+   forwardChameleonRMSNorm.forward8   sw    #))%((7 $$Q',,R,>%H?T?T4T(UU{{]--k:::r-   c                 ^    [        U R                  R                  5       SU R                   3$ )Nz, eps=)tupler%   shaper&   r'   s    r+   
extra_reprChameleonRMSNorm.extra_repr?   s*    ))*+6$2G2G1HIIr-   )r&   r%   )ư>)	__name__
__module____qualname____firstlineno__r!   r;   rA   __static_attributes____classcell__r*   s   @r+   r   r   /   s    $;J Jr-   r   c                   Z   ^  \ rS rSrSU 4S jjr\R                  " 5       S 5       rSrU =r	$ )ChameleonRotaryEmbeddingE   c           	      P  > [         TU ]  5         XPl        Xl        X l        X0l        SU R
                  [        R                  " SU R                  S[        R                  S9R                  U[        R                  S9U R                  -  -  -  nU R                  SUSS9  X l        g )	N      ?r   r/   r2   devicer2   inv_freqF
persistent)r    r!   scaling_factordimmax_position_embeddingsbaser#   arangeint64r3   floatregister_buffermax_seq_len_cached)r'   rW   rX   rY   rR   rV   rS   r*   s          r+   r!   !ChameleonRotaryEmbedding.__init__F   s    ,'>$	IIQ!5;;?BB&X]XcXcBdgkgogooq
 	ZeD"9r-   c                    U R                   S S S 2S 4   R                  5       R                  UR                  S   SS5      nUS S 2S S S 24   R                  5       nUR                  R
                  nUS:w  a  UOSn[        R                  " USS9   UR                  5       UR                  5       -  R                  SS5      n[        R                  " Xf4SS	9nUR                  5       nUR                  5       n	S S S 5        WR                  UR                  S
9W	R                  UR                  S
94$ ! , (       d  f       N@= f)Nr   r0   r   mpscpuF)device_typeenabledr/   rW   rP   )rS   r\   expandr?   rR   typer#   autocast	transposecatcossinr3   r2   )
r'   xposition_idsinv_freq_expandedposition_ids_expandedrc   freqsembrk   rl   s
             r+   r;    ChameleonRotaryEmbedding.forwardT   s    !MM$4-8>>@GGHZHZ[\H]_acde ,QaZ 8 > > @ hhmm%0E%9ku^^UC&,,.1F1L1L1NNYYZ[]^_E))UN3C'')C'')C	 D
 vvAGGv$cff177f&;;; DCs   A(D22
E )rY   rW   rX   r^   rV   )i   i'  NrO   )
rD   rE   rF   rG   r!   r#   no_gradr;   rH   rI   rJ   s   @r+   rL   rL   E   s"    : ]]_< <r-   rL   c                   ,   ^  \ rS rSrSrU 4S jrSrU =r$ )%ChameleonLinearScalingRotaryEmbeddinge   z_ChameleonRotaryEmbedding extended with linear scaling. Credits to the Reddit user /u/kaiokendevc                 f   > UR                  5       U R                  -  n[        TU ]  X5      u  p4X44$ N)r\   rV   r    r;   )r'   rm   rn   rk   rl   r*   s        r+   r;   -ChameleonLinearScalingRotaryEmbedding.forwardh   s3    #))+d.A.AA7?13xr-    rD   rE   rF   rG   __doc__r;   rH   rI   rJ   s   @r+   rv   rv   e   s    i r-   rv   c                   ,   ^  \ rS rSrSrU 4S jrSrU =r$ ))ChameleonDynamicNTKScalingRotaryEmbeddingo   zqChameleonRotaryEmbedding extended with Dynamic NTK scaling. Credits to the Reddit users /u/bloc97 and /u/emozillac           	        > [         R                  " U5      S-   nX0R                  :  a  U R                  U R                  U-  U R                  -  U R                  S-
  -
  U R
                  U R
                  S-
  -  -  -  nSU[         R                  " SU R
                  S[         R                  S9R                  UR                  [         R                  S9U R
                  -  -  -  nU R                  SUSS	9  [        TU ]5  X5      u  pgXg4$ )
Nr   r/   rO   r   rP   rQ   rS   FrT   )r#   maxrX   rY   rV   rW   rZ   r[   r3   rR   r\   r]   r    r;   )	r'   rm   rn   seq_lenrY   rS   rk   rl   r*   s	           r+   r;   1ChameleonDynamicNTKScalingRotaryEmbedding.forwardr   s    ))L)A-11199$$w.1M1MMRVReRehiRij((dhhl+ - -D LLDHHau{{CFFahh^c^i^iFjmqmumuuwH   X% H7?13xr-   r{   r|   rJ   s   @r+   r   r   o   s    { r-   r   c                     U SSU R                   S   S-  24   nU SU R                   S   S-  S24   n[        R                  " U* U4SS9$ )z*Rotates half the hidden dims of the input..Nr0   r/   re   )r?   r#   rj   )rm   x1x2s      r+   rotate_halfr      sZ    	
3"!''"+"""	#B	
3q ""	#B99rc2YB''r-   c                     UR                  U5      nUR                  U5      nX-  [        U 5      U-  -   nX-  [        U5      U-  -   nXg4$ )a  Applies Rotary Position Embedding to the query and key tensors.

Args:
    q (`torch.Tensor`): The query tensor.
    k (`torch.Tensor`): The key tensor.
    cos (`torch.Tensor`): The cosine part of the rotary embedding.
    sin (`torch.Tensor`): The sine part of the rotary embedding.
    position_ids (`torch.Tensor`, *optional*):
        Deprecated and unused.
    unsqueeze_dim (`int`, *optional*, defaults to 1):
        The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
        sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
        that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
        k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
        cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
        the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
Returns:
    `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
)	unsqueezer   )qkrk   rl   rn   unsqueeze_dimq_embedk_embeds           r+   apply_rotary_pos_embr      sS    ( --
&C
--
&Cw;q>C/0Gw;q>C/0Gr-   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )ChameleonMLP   c                   > [         TU ]  5         Xl        UR                  U l        UR                  U l        [
        R                  " U R                  U R                  UR                  S9U l        [
        R                  " U R                  U R                  UR                  S9U l	        [
        R                  " U R                  U R                  UR                  S9U l
        [        UR                     U l        g )Nbias)r    r!   configr(   intermediate_sizer   Linearmlp_bias	gate_projup_proj	down_projr	   
hidden_actact_fnr'   r   r*   s     r+   r!   ChameleonMLP.__init__   s    !--!'!9!94#3#3T5K5KRXRaRabyy!1!143I3IPVP_P_`4#9#94;K;KRXRaRabV../r-   c                     U R                  U R                  U R                  U5      5      U R                  U5      -  5      nU$ ry   )r   r   r   r   )r'   rm   r   s      r+   r;   ChameleonMLP.forward   s6    NN4;;t~~a/@#ADLLQRO#ST	r-   )r   r   r   r   r(   r   r   rD   rE   rF   rG   r!   r;   rH   rI   rJ   s   @r+   r   r      s    0 r-   r   c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )ChameleonLayerNorm   ar  
LayerNorm but computes stats only over the last dim because Chameleon applies gamma and beta
from each shard separately to each head, instead of reducing. We can apply each head's own
gamma/beta by repeat-interleaving weights from each shard, but the stats have to be computed
in the last dimension. This module applies gamma/beta manually to fulfill this requirement.
c                 D   > [         TU ]  " U/UQ70 UD6  US   4U l        g )Nr0   )r    r!   normalized_shape)r'   r(   argskwargsr*   s       r+   r!   ChameleonLayerNorm.__init__   s)    6t6v6!,R 2r-   c                 ~    [         R                  " XR                  S S SS9nXR                  -  U R                  -   nU$ )Ngh㈵>r)   )F
layer_normr   r%   r   r'   r8   s     r+   r;   ChameleonLayerNorm.forward   s9    ]4I4I4QU[_`%3dii?r-   )r   )	rD   rE   rF   rG   r}   r!   r;   rH   rI   rJ   s   @r+   r   r      s    3 r-   r   r8   n_repreturnc                     U R                   u  p#pEUS:X  a  U $ U SS2SS2SSS2SS24   R                  X#XU5      n U R                  X#U-  XE5      $ )z
This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
r   N)r?   rf   reshape)r8   r   batchnum_key_value_headsslenhead_dims         r+   	repeat_kvr      s_    
 2?1D1D.Ez!!Qa"23::5W\dlmM  e(CTTTr-   modulequerykeyvalueattention_maskscalingdropoutr   c                 @   [        X R                  5      n[        X0R                  5      n	[        R                  " XR	                  SS5      5      U-  n
Ub"  US S 2S S 2S S 2S UR
                  S   24   nX-   n
[        R                  R                  U
S[        R                  S9R                  UR                  5      n
[        R                  R                  XU R                  S9n
[        R                  " X5      nUR	                  SS5      R                  5       nX4$ )Nr/   r   r0   )rW   r2   )ptrainingr   )r   num_key_value_groupsr#   matmulri   r?   r   
functionalsoftmaxr4   r3   r2   r   r   
contiguous)r   r   r   r   r   r   r   r   
key_statesvalue_statesattn_weightscausal_maskattn_outputs                r+   eager_attention_forwardr      s     3 ; ;<JU$?$?@L<<';';Aq'ABWLL!$Q1.D
0@0@0D.D%DE#1==((2U]](SVVW\WbWbcL==((6??([L,,|:K''1-88:K$$r-   c                   D  ^  \ rS rSrSrSS\S\\   4U 4S jjjrS r	      SS\
R                  S\\
R                     S	\\
R                     S
\\   S\S\S\\
R                     S\\
R                  \\
R                     \\\
R                        4   4S jjrSrU =r$ )ChameleonAttention   z=Multi-headed attention from 'Attention Is All You Need' paperr   	layer_idxc                   > [         TU ]  5         Xl        X l        Uc-  [        R                  SU R                  R                   S35        UR                  U l        UR                  U l	        UR                  U l        U R                  U R                  -  U l        UR                  U l        U R                  U R                  -  U l        UR                  U l        UR                   U l        SU l        UR$                  U l        U R                  S-  U l        U R                  U R                  -  U R                  :w  a&  [)        SU R                   SU R                   S35      e[*        R,                  " U R                  U R                  U R                  -  UR.                  S9U l        [*        R,                  " U R                  U R                  U R                  -  UR.                  S9U l        [*        R,                  " U R                  U R                  U R                  -  UR.                  S9U l        [*        R,                  " U R                  U R                  UR.                  S9U l        [9        U R                  U R                  45      U l        [9        U R                  U R                  45      U l        U R?                  5         g )	NzInstantiating z without passing a `layer_idx` is not recommended and will lead to errors during the forward call if caching is used. Please make sure to provide a `layer_idx` when creating this class.T      z?hidden_size must be divisible by num_heads (got `hidden_size`: z and `num_heads`: z).r   ) r    r!   r   r   loggerwarning_oncer*   rD   attention_dropoutr(   num_attention_heads	num_headsr   r   r   rX   
rope_theta	is_causalmodel_parallel_sizer   
ValueErrorr   r   attention_biasq_projk_projv_projo_projr   q_normk_norm
_init_roper'   r   r   r*   s      r+   r!   ChameleonAttention.__init__   s,   " !8!8 9 :, , "(!9!9!--33((DNN:#)#=#= $(NNd6N6N$N!'-'E'E$ ++#)#=#= }}d*MMDNN*t/?/??QRVRbRbQc$T^^$4B8 
 ii 0 0$..4==2PW]WlWlmii 0 0$2J2JT]]2Zagavavwii 0 0$2J2JT]]2Zagavavwii 0 0$2B2BI^I^_($..$--)HI($*B*BDMM)RSr-   c                    U R                   R                  c/  [        U R                  U R                  U R
                  S9U l        g U R                   R                  S   nU R                   R                  S   nUS:X  a0  [        U R                  U R                  UU R
                  S9U l        g US:X  a0  [        U R                  U R                  UU R
                  S9U l        g [        SU 35      e)N)rX   rY   rg   factorlinear)rX   rV   rY   dynamiczUnknown RoPE scaling type )
r   rope_scalingrL   r   rX   r   
rotary_embrv   r   r   )r'   scaling_typerV   s      r+   r   ChameleonAttention._init_rope  s    ;;##+6(,(D(D__DO  ;;33F;L![[55h?Nx'"GMM,0,H,H#1	# *"KMM,0,H,H#1	# !#=l^!LMMr-   r8   r   rn   past_key_valueoutput_attentions	use_cachecache_positionr   c                    UR                  5       u  pnU R                  U5      nU R                  U5      nU R                  U5      nUR	                  SU R
                  U R                  5      nU R                  U5      nUR	                  SU R                  U R                  5      nU R                  U5      nUR	                  XU R
                  U R                  5      R                  SS5      nUR	                  XU R                  U R                  5      R                  SS5      nUR                  XU R                  U R                  5      R                  SS5      nU R                  X5      u  nn[        XUU5      u  pUb$  UXS.nUR                  XU R                  U5      u  p[         nU R"                  R$                  S:w  a  [&        U R"                  R$                     nU" U UUUU4U R(                  (       d  SOU R*                  U R,                  S.UD6u  nnUR	                  XS5      R/                  5       nU R1                  U5      nUU4$ )Nr0   r   r/   )rl   rk   r   eager        )r   r   )sizer   r   r   r   r   r   r   r   r   ri   viewr   r   updater   r   r   _attn_implementationr   r   r   r   r   r   )r'   r8   r   rn   r   r   r   r   r   bszq_len_query_statesr   r   rk   rl   cache_kwargsattention_interfacer   r   s                        r+   r;   ChameleonAttention.forward8  s    &**,A{{=1[[/
{{=1#++BN{{<0''D,D,DdmmT
[[,
#++CV``abdef''D4L4Ldmm\ffghjkl
#((T5M5Mt}}]gghiklm??<>S#7RUWZ#[ %#&sUL'5'<'<ZW[WeWegs't$J(?;;++w6"9$++:Z:Z"[$7	%
  $}}C$2H2HLL	%
 	%
!\ "))#b9DDFkk+.L((r-   )r   r   r   r(   r   r   r   r   rX   r   r   r   r   r   r   r   r   r   r   r   ry   NNNFFN)rD   rE   rF   rG   r}   r   r   intr!   r   r#   Tensor
LongTensorr
   boolr>   r;   rH   rI   rJ   s   @r+   r   r      s    G# #8C= # #NN< 2637*."'595)||5) !.5) u//0	5)
 !5)  5) 5) !!1!125) 
u||Xell3XeELL>Q5RR	S5) 5)r-   r   c                   8  ^  \ rS rSrS\S\4U 4S jjr      SS\R                  S\	\R                     S\	\R                     S\	\   S	\	\   S
\	\   S\	\R                     S\\R                  \	\\R                  \R                  4      4   4S jjrSrU =r$ )ChameleonDecoderLayeriq  r   r   c                   > [         TU ]  5         UR                  U l        [        XS9U l        [        U5      U l        [        UR                  UR                  S9U l	        [        UR                  UR                  S9U l
        g N)r   r   r   r    r!   r(   r   	self_attnr   mlpr   rms_norm_epsinput_layernormpost_attention_layernormr   s      r+   r!   ChameleonDecoderLayer.__init__r  k    !--+6O'/0B0BH[H[\(89K9KQWQdQd(e%r-   r8   r   rn   r   r   r   r   r   c                     Un	U R                  U5      nU R                  " SUUUUUUUS.UD6u  pX-   nUn	U R                  U5      nU R                  U5      nX-   nU4nU(       a  X4-  nU$ )at  
Args:
    hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
    attention_mask (`torch.FloatTensor`, *optional*):
        attention mask of size `(batch_size, sequence_length)` if flash attention is used or `(batch_size, 1,
        query_sequence_length, key_sequence_length)` if default attention is used.
    output_attentions (`bool`, *optional*):
        Whether or not to return the attentions tensors of all attention layers. See `attentions` under
        returned tensors for more detail.
    use_cache (`bool`, *optional*):
        If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
        (see `past_key_values`).
    past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states
    cache_position (`torch.LongTensor` of shape `(sequence_length)`, *optional*):
        Indices depicting the position of the input sequence tokens in the sequence
    kwargs (`dict`, *optional*):
        Arbitrary kwargs to be ignored, used for FSDP and other methods that injects code
        into the model
r8   r   rn   r   r   r   r   r{   )r  r  r  r  r'   r8   r   rn   r   r   r   r   r   residualself_attn_weightsoutputss               r+   r;   ChameleonDecoderLayer.forward|  s    < !,,]; ,0>> 	,
')%)/)	,
 	,
( !0 !55mD/ 0 "++Gr-   r(   r  r  r  r  r  rD   rE   rF   rG   r   r  r!   r#   r  r   r  r
   r	  r>   FloatTensorr;   rH   rI   rJ   s   @r+   r  r  q  s    f f3 f 2637*.,1$)59:||: !.: u//0	:
 !: $D>: D>: !!1!12: 
u  (51B1BEDUDU1U+V"WW	X: :r-   r  c                   8  ^  \ rS rSrS\S\4U 4S jjr      SS\R                  S\	\R                     S\	\R                     S\	\   S	\	\   S
\	\   S\	\R                     S\\R                  \	\\R                  \R                  4      4   4S jjrSrU =r$ )ChameleonSwinDecoderLayeri  r   r   c                   > [         TU ]  5         UR                  U l        [        XS9U l        [        U5      U l        [        UR                  UR                  S9U l	        [        UR                  UR                  S9U l
        g r  r  r   s      r+   r!   "ChameleonSwinDecoderLayer.__init__  r  r-   r8   r   rn   r   r   r   r   r   c                     Un	U R                   " SUUUUUUUS.UD6u  pU R                  U5      nX-   nUn	U R                  U5      nU R                  U5      nX-   nU4nU(       a  X4-  nU$ )a  
Args:
    hidden_states (`torch.FloatTensor`):
        input to the layer of shape `(batch, seq_len, embed_dim)`
    attention_mask (`torch.FloatTensor`, *optional*):
        attention mask of size `(batch_size, sequence_length)` if flash attention is used or `(batch_size, 1,
        query_sequence_length, key_sequence_length)` if default attention is used.
    position_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
        Indices of positions of each input sequence tokens in the position embeddings
    past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states
    output_attentions (`bool`, *optional*):
        Whether or not to return the attentions tensors of all attention layers. See `attentions` under
        returned tensors for more detail.
    use_cache (`bool`, *optional*):
        If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
        (see `past_key_values`).
    cache_position (`torch.LongTensor` of shape `(sequence_length)`, *optional*):
        Indices depicting the position of the input sequence tokens in the sequence.
r  r{   )r  r  r  r  r  s               r+   r;   !ChameleonSwinDecoderLayer.forward  s    > ! ,0>> 	,
')%)/)	,
 	,
( ,,]; 0 /55mD 0 "++Gr-   r  r  r  rJ   s   @r+   r!  r!    s    f f3 f 2637*.,1$)598||8 !.8 u//0	8
 !8 $D>8 D>8 !!1!128 
u  (51B1BEDUDU1U+V"WW	X8 8r-   r!  c                   N   ^  \ rS rSrSrU 4S jrS\R                  4S jrSr	U =r
$ )ChameleonVQVAEVectorQuantizeri  a  
A module for vector quantization using learned embedding vectors.

This module implements the quantization process similar to te one described in
the VQ-VAE (Vector Quantized Variational AutoEncoder) paper. It quantizes continuous
input vectors into discrete codebook vectors, which are learned during training.
Current implementation improves over previous ones by avoiding costly matrix multiplications
and allowing for post-hoc remapping of indices.
c                    > [         TU ]  5         UR                  U l        UR                  U l        [        USS5      U l        [        R                  " U R                  U R                  5      U l	        g )Nbetag      ?)
r    r!   num_embeddings	embed_dimembedding_dimgetattrr)  r   	Embedding	embeddingr   s     r+   r!   &ChameleonVQVAEVectorQuantizer.__init__
  sX    $33#--FFD1	d&9&94;M;MNr-   hidden_statec           
      >   UR                  SSSS5      R                  5       nUR                  SU R                  5      n[        R
                  " US-  SSS9[        R
                  " U R                  R                  S-  SS9-   S[        R                  " S	X R                  R                  R                  SS5      5      -  -
  n[        R                  " USS9nU R                  U5      R                  UR                  5      n[        R                  " UR                  5       U-
  S-  5      U R                  [        R                  " XQR                  5       -
  S-  5      -  -   nXU-
  R                  5       -   nUR                  SSSS5      R                  5       nXVU4$ )
Nr   r/   r   r   r0   T)rW   r1   re   z	bd,dn->bn)permuter   r   r,  r#   sumr/  r%   einsumri   argminr?   r6   detachr)  )r'   r1  hidden_state_flattened	distancesmin_encoding_indiceshidden_state_quantlosss          r+   r;   %ChameleonVQVAEVectorQuantizer.forward  s   #++Aq!Q7BBD!-!2!22t7I7I!J II,a/QEii--q0a89%,,{,BNNDYDYDcDcdeghDijjk 	  %||I1=!^^,@AFF|GYGYZ zz-446E!KLtyy[`[e[e"5"5"77A=\
 P
 

 *,-N,V,V,XX 0771aCNNP!)===r-   )r)  r/  r,  r*  )rD   rE   rF   rG   r}   r!   r#   r  r;   rH   rI   rJ   s   @r+   r'  r'    s#    O>ELL > >r-   r'  c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )#ChameleonVQVAEEncoderConvDownsamplei.  c                 Z   > [         TU ]  5         [        R                  " XSSSS9U l        g )Nr   r/   r   kernel_sizestridepadding)r    r!   r   Conv2dconvr'   in_channelsr*   s     r+   r!   ,ChameleonVQVAEEncoderConvDownsample.__init__/  s%    IIkAaYZ[	r-   c                 V    [         R                  " USSSS9nU R                  U5      nU$ )N)r   r   r   r   constantr   )padmoder   )r   rL  rF  r   s     r+   r;   +ChameleonVQVAEEncoderConvDownsample.forward3  s+    mJVWX		-0r-   )rF  r   rJ   s   @r+   r?  r?  .  s    \ r-   r?  c                   6   ^  \ rS rSr  SU 4S jjrS rSrU =r$ ) ChameleonVQVAEEncoderResnetBlocki:  c                   > [         TU ]  5         X l        Uc  UOUU l        X@l        [
        R                  R                  SUSSS9U l        [
        R                  R                  X#SSSS9U l
        [
        R                  R                  SUSSS9U l        [
        R                  R                  UR                  5      U l        [
        R                  R                  X3SSSS9U l        U R                  U R                  :w  a]  U R                  (       a&  [
        R                  R                  X#SSSS9U l        g [
        R                  R                  X#SSSS9U l        g g )	N    rC   T
num_groupsnum_channelsr)   affiner   r   rA  r   )r    r!   rH  out_channelsuse_conv_shortcutr#   r   	GroupNormnorm1rE  conv1norm2Dropoutr   conv2conv_shortcutnin_shortcut)r'   r   rH  rW  r_  r*   s        r+   r!   )ChameleonVQVAEEncoderResnetBlock.__init__;  s%    	&+7+?K\!.XX''2KUYbf'g
XX__[AVWab_c
XX''2LVZcg'h
xx''7XX__\QWXbc_d
t000%%%*XX__[\]fgqr_%s"$)HHOOK[\efpqO$r!	 1r-   c                    UnU R                  U5      nU[        R                  " U5      -  nU R                  U5      nU R	                  U5      nU[        R                  " U5      -  nU R                  U5      nU R                  U5      nU R                  U R                  :w  a7  U R                  (       a  U R                  U5      nX!-   $ U R                  U5      nX!-   $ ry   )rZ  r#   sigmoidr[  r\  r   r^  rH  rW  rX  r_  r`  )r'   r8   r  s      r+   r;   (ChameleonVQVAEEncoderResnetBlock.forwardR  s     

=1}55

=1

=1}55]3

=1t000%%--h7 ''  ,,X6''r-   )
r[  r^  r_  r   rH  r`  rZ  r\  rW  rX  )NFr   rJ   s   @r+   rP  rP  :  s    
 s.( (r-   rP  c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )ChameleonVQVAEEncoderAttnBlockif  c                   > [         TU ]  5         Xl        [        R                  R                  SUSSS9U l        [        R                  R                  XSSSS9U l        [        R                  R                  XSSSS9U l	        [        R                  R                  XSSSS9U l
        [        R                  R                  XSSSS9U l        g )NrR  rC   TrS  r   r   rA  )r    r!   rH  r#   r   rY  normrE  r   r   vproj_outrG  s     r+   r!   'ChameleonVQVAEEncoderAttnBlock.__init__g  s    &HH&&";TXae&f	qQR\]^qQR\]^qQR\]^aXYcder-   c                 Z   UnU R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nUR                  u  pgpUR                  XgX-  5      R                  SSS5      nUR                  XgX-  5      n[        R                  " X45      n
U
[        U5      S-  -  n
[        R                  " U
SS9n
UR                  XgX-  5      nU
R                  SSS5      n
[        R                  " XZ5      R                  XgX5      nU R                  U5      nX+-   $ )Nr   r/   r   r   re   )rh  r   r   ri  r?   r   r3  r#   bmmr  r   r   rj  )r'   r8   r  r  r   r   
batch_sizechannelsheightwidthr   r   s               r+   r;   &ChameleonVQVAEEncoderAttnBlock.forwardq  s    		-0vvm,VVM*
vvm, /;.@.@+
f#++J&.QYYZ[]^`ab''
fnM
yy:#s8}'>?yy15 $++J&.Q#++Aq!4ii;CCJZ`hmmK0%%r-   )rH  r   rh  rj  r   ri  r   rJ   s   @r+   rf  rf  f  s    f& &r-   rf  c                   J   ^  \ rS rSrU 4S jrS\R                  4S jrSrU =r	$ )ChameleonVQVAEEncoderi  c           
        > [         TU ]  5         [        UR                  5      U l        UR
                  U l        UR                  nUR                  nUR                  nUR                  nUR                  nUR                  n[        R                  R                  XBSSSS9U l        UnS[        U5      -   n	Xl        [        R"                  " 5       U l        ['        U R                  5       GH%  n
[        R"                  " 5       n[        R"                  " 5       nX)U
   -  nX'U
   -  n['        U R
                  5       Hk  nUR)                  [+        UUUS95        UnUR,                  c  M.  XR,                  ;   d  M?  UR.                  S:X  d  MQ  UR)                  [1        U5      5        Mm     [        R2                  " 5       nUUl        UUl        XR                  S-
  :w  a  [9        U5      Ul        US-  nU R$                  R)                  U5        GM(     [        R2                  " 5       U l        [+        UWUS9U R<                  l        UR.                  S:X  a  [1        U5      O[        R@                  " 5       U R<                  l!        [+        UUUS9U R<                  l"        [        R                  RG                  SUS	S
S9U l$        [        R                  R                  UU(       a  SU-  OUSSSS9U l%        g )Nr   r   rA  )r   )r   rH  rW  vanillar/   rR  rC   TrS  )&r    r!   lenchannel_multipliernum_resolutionsnum_res_blocksbase_channels
resolutionrH  double_latentlatent_channelsr#   r   rE  conv_inr>   in_channel_multiplier
ModuleListdownrangeappendrP  attn_resolutions	attn_typerf  Moduleblockattnr?  
downsamplemidblock_1Identityattn_1block_2rY  norm_outconv_out)r'   r   r{  r|  rH  r}  r~  rx  curr_resr  i_levelr  r  block_in	block_outi_blockr  r*   s                    r+   r!   ChameleonVQVAEEncoder.__init__  s   "6#<#<=$33,,&&
((,, 00#66xx{qYZdef $u-?'@ @%:"MMO	T112GMMOE==?D$W'EEH%7(CCI !4!454%$,%. %++7 $;$;;((I5KK >x HI 6  99;DDJDI..22"Eh"O#q=IIT"7 3: 99;; !

 GMFVFVZcFc8Bikititiv; !
 **bxUYbf*g#0Ao ( 
r-   pixel_valuesc                 @   U R                  U5      /n[        U R                  5       H  n[        U R                  5       H  nU R                  U   R
                  U   " US   5      n[        U R                  U   R                  5      S:  a"  U R                  U   R                  U   " U5      nUR                  U5        M     X0R                  S-
  :w  d  M  UR                  U R                  U   R                  US   5      5        M     US   nU R                  R                  U5      nU R                  R                  U5      nU R                  R                  U5      nU R                  U5      nU[        R                   " U5      -  nU R#                  U5      nU$ )Nr0   r   r   )r  r  ry  rz  r  r  rw  r  r  r  r  r  r  r  r  r#   rc  r  )r'   r  r8   r  r  r1  last_hidden_states          r+   r;   ChameleonVQVAEEncoder.forward  sr   l34T112G !4!45#yy177@!"%  tyy)../!3#'99W#5#:#:7#CL#QL$$\2 6 ..22$$TYYw%7%B%B=QSCT%UV 3 *"- HH,,->? HHOO,=> HH,,->? !MM*;<U]]+<== MM*;<  r-   )r  r  r  r  r  r  rz  ry  )
rD   rE   rF   rG   r!   r#   r  r;   rH   rI   rJ   s   @r+   rt  rt    s!    C
J!E$4$4 ! !r-   rt  c                       \ rS rSrSrS r\S 5       r\S 5       r\S 5       r	\S 5       r
\S 5       r\S	 5       rS
\R                  S\R                  4S jrSrg)ChameleonImageVocabularyMappingi  zE
A class for mapping discrete image tokens from VQGAN to BPE tokens.
c                 <    Xl         UR                  S5      U l        g )Nz<image>)	vocab_mapgetimage_token_id)r'   r  s     r+   r!   (ChameleonImageVocabularyMapping.__init__  s    "'mmI6r-   c                 j    U R                   R                  5        VVs0 sH  u  pX!_M	     snn$ s  snnf ry   )r  itemsr'   r   ri  s      r+   val2name(ChameleonImageVocabularyMapping.val2name  s-    !%!5!5!78!7!7888   /c           	          [        U R                  R                  5        VVs/ sH  u  pUR                  S5      (       d  M  UPM!     snn5      $ s  snnf )NIMGIMG)sortedr  r  
startswith)r'   namevals      r+   image_tokens,ChameleonImageVocabularyMapping.image_tokens  s<    DNN,@,@,B`,BytdooV^F_s,B`aa`s   A
A
c           
      $  ^ [        S5       Vs0 sH$  n[        [        S5      U-   5      [        U5      _M&     snmS[        S[        4U4S jjnU R                   Vs0 sH!  o3[        U" U R                  U   5      5      _M#     sn$ s  snf s  snf )N
   Aold_namer   c                 R   > SR                  U4S jU [        S5      S  5       5      $ )N c              3   D   >#    U H  nTR                  X5      v   M     g 7fry   )r  ).0cimg_tkn_chr_mappings     r+   	<genexpr>IChameleonImageVocabularyMapping.bpe2img.<locals>.remap.<locals>.<genexpr>   s"     _B^Q.22188B^s    r  r0   )joinrw  )r  r  s    r+   remap6ChameleonImageVocabularyMapping.bpe2img.<locals>.remap  s$    77_(3x=[]B^___r-   )r  chrordstrr  r  r  )r'   ir  tokr  s       @r+   bpe2img'ChameleonImageVocabularyMapping.bpe2img  s    BG)L)Qs3s8a<0#a&8)L	`C 	`C 	` @D?P?PQ?PSt}}S1233?PQQ M
 Rs   *B'Bc                 j    U R                   R                  5        VVs0 sH  u  pX!_M	     snn$ s  snnf ry   )r  r  r  s      r+   img2bpe'ChameleonImageVocabularyMapping.img2bpe  s-    !%!3!3!56!5!5666r  c                     [         R                  " [        U R                  R	                  5       5      5      [         R                  " [        U R                  R                  5       5      5      4$ ry   )r#   tensorr  r  keysvaluesr@   s    r+   bpe2img_search_tensors6ChameleonImageVocabularyMapping.bpe2img_search_tensors  sC    ||F4<<#4#4#678%,,vdllNaNaNcGd:eeer-   c                     [         R                  " [        U R                  R	                  5       5      S-   [         R
                  S9nU R                  R                  5        H	  u  p#X1U'   M     U$ )Nr   rP   )r#   zerosr   r  r  r  r  )r'   mappingr   ri  s       r+   img2bpe_mapping_tensor6ChameleonImageVocabularyMapping.img2bpe_mapping_tensor  sR    ++c$,,"3"3"56:%))LLL&&(DAAJ )r-   	img_batchr   c                 x    UR                   nU R                  UR                  S5         nUR                  U5      $ )Nrb   )rR   r  r3   )r'   r  rR   
img_tokenss       r+   convert_img2bpe/ChameleonImageVocabularyMapping.convert_img2bpe  s5    !!00e1DE
}}V$$r-   )r  r  N)rD   rE   rF   rG   r}   r!   r   r  r  r  r  r  r  r#   r  r  rH   r{   r-   r+   r  r    s    7 9 9 b b R R 7 7 f f  % %%,, %r-   r  c                   P    \ rS rSr% \\S'   SrSrSS/rSS/r	Sr
SrSrS	rSrSrS
rg)ChameleonPreTrainedModeli  r   modelTr  r!  past_key_valuesr   Fr{   N)rD   rE   rF   rG   r   __annotations__base_model_prefixsupports_gradient_checkpointing_no_split_modules_skip_keys_device_placement_supports_flash_attn_supports_sdpa_can_compile_fullgraph!_supports_param_buffer_assignment_supports_flex_attn_supports_attention_backendrH   r{   r-   r+   r  r    sN    &*#02MN#4m"DN!(-%"&r-   r  aW  
    The VQ-VAE model used in Chameleon for encoding/decoding images into discrete tokens.
    This model follows the "Make-a-scene: Scene-based text-to-image generation with human priors" paper from
    [ Oran Gafni, Adam Polyak, Oron Ashual, Shelly Sheynin, Devi Parikh, and Yaniv
    Taigman](https://huggingface.co/papers/2203.13131).
    )custom_introc                   f   ^  \ rS rSr% \\S'   / SQrS\4U 4S jjrS\R                  4S jr
SrU =r$ )ChameleonVQVAEi)  r   )r'  rf  rP  c                 l  > [         TU ]  U5        [        U5      U l        [	        U5      U l        [        R                  R                  UR                  UR                  S5      U l        [        R                  R                  UR                  UR                  S5      U l        U R                  5         g Nr   )r    r!   rt  encoderr'  quantizer#   r   rE  r~  r+  
quant_convpost_quant_convevalr   s     r+   r!   ChameleonVQVAE.__init__9  s|     ,V45f=((//&*@*@&BRBRTUV$xxv/?/?AWAWYZ[		r-   r  c                 v    U R                  U5      nU R                  U5      nU R                  U5      u  p4nX4U4$ ry   )r  r  r  )r'   r  r8   quantemb_lossindicess         r+   encodeChameleonVQVAE.encodeB  s<    \26#'==#? ''r-   )r  r  r  r  )rD   rE   rF   rG   r   r  r  r!   r#   r  r  rH   rI   rJ   s   @r+   r  r  )  s7     ! 3 (5#3#3 ( (r-   r  c                     ^  \ rS rSrS\4U 4S jjrS\R                  4S jrS\R                  4S jr	S\R                  S\R                  S	\R                  4S
 jr\           SS\\R                     S\\R                     S\\R                     S\\R                     S\\   S\\R                     S\\   S\\   S\\   S\\   S\\R                     S\\   S\\\4   4S jj5       rSrU =r$ )ChameleonModeliI  r   c           	        > [         TU ]  U5        UR                  U l        UR                  U l        [
        R                  " UR                  UR                  U R                  5      U l        [        UR                  5      U l        U R                  R                  (       d  [        O[        n[
        R                   " [#        UR$                  5       Vs/ sH
  o2" X5      PM     sn5      U l        [)        UR                  UR*                  S9U l        [.        R1                  UR2                  5      U l        SU l        U R9                  5         g s  snf )Nr   F)r    r!   pad_token_idpadding_idx
vocab_sizer   r.  r(   embed_tokensr  vocabulary_mapvocabulary_mappingr   	swin_normr  r!  r  r  num_hidden_layerslayersr   r  rh  r  _from_config	vq_configvqmodelgradient_checkpointing	post_init)r'   r   decoder_layerr   r*   s       r+   r!   ChameleonModel.__init__K  s     !.. ++LL):):F<N<NPTP`P`a"A&BWBW"X59[[5J5J-Pimm?DVE]E]?^_?^)]6-?^_
 %V%7%7V=P=PQ	%2263C3CD&+# 	 `s   Er  c                     UR                   S   nU R                  R                  U5      u    p4U R                  R	                  U5      nUR                  US5      nU$ )a;  
Tokenizes images into discrete tokens with VQGAN module. Converts
obtained image tokens into BPE tokens and wraps with "boi" and "eoi"
special tokens.

Args:
    pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, image_size, image_size)):
        The tensors corresponding to the input images.
r   r0   )r?   r  r  r  r  r   )r'   r  rn  r   
image_toksbpe_tokss         r+   get_image_tokensChameleonModel.get_image_tokens]  sX     "''*
<<..|<1**:::F==R0r-   c                 T    U R                  U5      nU R                  5       " U5      nU$ )a  
Tokenizes images into discrete tokens with VQGAN module and embeds
them with text embeddings layer

Args:
    pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, image_size, image_size)):
        The tensors corresponding to the input images.
)r  get_input_embeddings)r'   r  r  vision_embeddingss       r+   get_image_features!ChameleonModel.get_image_featuresm  s.     ,,\: 557E  r-   	input_idsinputs_embedsimage_featuresc           	      J   Ucj  X R                  5       " [        R                  " U R                  R                  [        R
                  UR                  S95      :H  nUR                  S5      nOXR                  R                  :H  nUR                  5       nUR                  S5      R                  U5      R                  UR                  5      nUR                  S   UR                  S   -  nX$   R                  5       UR                  5       :w  a  [        SU SU 35      eU$ )z
Obtains multimodal placeholdr mask from `input_ids` or `inputs_embeds`, and checks that the placeholder token count is
equal to the length of multimodal features. If the lengths are different, an error is raised.
)r2   rR   r0   r   r   z6Image features and image tokens do not match: tokens: z, features )r  r#   r  r  r  longrR   allr4  r   	expand_asr3   r?   numelr   )r'   r  r  r  special_image_maskn_image_tokensn_image_featuress          r+   get_placeholder_mask#ChameleonModel.get_placeholder_maskz  s    !.2K2K2MT44CC5::^k^r^rs3 " "4!7!7!;!*.E.E.T.T!T+//1/99"=GGVYYZgZnZno)//2^5I5I!5LL,2248L8L8NNHHXXcdtcuv  "!r-   r   rn   r  r   r   output_hidden_statesreturn_dictr   r   r   c                    Ub  UOU R                   R                  nU	b  U	OU R                   R                  n	Ub  UOU R                   R                  nU
b  U
OU R                   R                  n
U R
                  (       a/  U R                  (       a  U(       a  [        R                  S5        SnUS L US L-  (       a  [        S5      eUc  U R                  U5      nUb2  U R                  U5      nU R                  XUS9nUR                  X5      nU(       a0  Uc-  [        R                  R!                  5       (       d
  [#        5       nUcD  Ub  UR%                  5       OSn[        R&                  " XUR(                  S   -   UR*                  S9nUc  UR-                  S5      n[/        U R                   UUUUUS9nUnU	(       a  S	OS nU(       a  S	OS nU R0                   H7  nU	(       a  UU4-  nU" U4UUUUUUS
.UD6nUS   nU(       d  M.  UUS   4-  nM9     U R3                  U5      nU	(       a  UU4-  nU
(       d  [5        S UUUU4 5       5      $ [7        UUUUS9$ )NzX`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.Fz:You must specify exactly one of input_ids or inputs_embeds)r  r  r   r   )rR   )r   input_embedsr   r   r  rn   r{   )r   rn   r   r   r   r   c              3   ,   #    U H  oc  M  Uv   M     g 7fry   r{   )r  ri  s     r+   r  )ChameleonModel.forward.<locals>.<genexpr>  s      ^a^s   	)r  r  r8   
attentions)r   r   r$  r   use_return_dictr	  r   r   r   r   r   r  r"  masked_scatterr#   jit
is_tracingr   get_seq_lengthrZ   r?   rR   r   r   r  rh  r>   r   )r'   r  r  r   rn   r  r  r   r   r$  r%  r   r   image_embedsr  past_seen_tokensr   r8   all_hidden_statesall_self_attnsr  layer_outputss                         r+   r;   ChameleonModel.forward  s     2C1N-TXT_T_TqTq$8$D $++JjJj 	 "+!6IDKK<Q<Q	%0%<k$++B]B]&&4==Yj I-t";<YZZ  --i8M#22<@L!%!:!:| "; " *889KZM 09M9M9O9O*nO!CRC^==?de"\\ ]5H5H5K"KTaThThN )33A6L(;;&))+%
 & #7BD0d![[M#!m%55!)	*)."3#-	 	M *!,M  =#3"55% )( 		-0  -!11 )?<M~^   '+++%	
 	
r-   )r   r	  r  rh  r  r  r  r  NNNNNNNNNNN)rD   rE   rF   rG   r   r!   r#   r  r  r  r  r"  r   r   r  r
   r	  r   r   r   r>   r   r;   rH   rI   rJ   s   @r+   r  r  I  s    $U->->  !u/@/@ !"))":?:K:K"]b]n]n"0  15481537+/59$(,0/3&*59j
E,,-j
 u001j
 !.	j

 u//0j
 "%j
   1 12j
 D>j
 $D>j
 'tnj
 d^j
 !!1!12j
 -.j
 
u--	.j
 j
r-   r  zb
    Chameleon Model with a head on top used for outputting logits for next token prediction.
    c                     ^  \ rS rSrS/rU 4S jrS rS rS rS r	\
\           SS\\R                     S	\\R                     S
\\R                      S\\R                     S\\   S\\R                     S\\R                     S\\   S\\   S\\   S\\R                     S\\   S\\\4   4S jj5       5       r       SU 4S jjrSrU =r$ )!ChameleonForConditionalGenerationi   zlm_head.weightc                    > [         TU ]  U5        [        U5      U l        UR                  U l        [
        R                  " UR                  UR                  SS9U l        U R                  5         g )NFr   )
r    r!   r  r  r  r   r   r(   lm_headr
  r   s     r+   r!   *ChameleonForConditionalGeneration.__init__  sU     #F+
 ++yy!3!3V5F5FUS 	r-   c                     Xl         g ry   r  )r'   decoders     r+   set_decoder-ChameleonForConditionalGeneration.set_decoder  s    
r-   c                     U R                   $ ry   r=  r@   s    r+   get_decoder-ChameleonForConditionalGeneration.get_decoder  s    zzr-   c                 8    U R                   R                  U5      $ ry   )r  r  r'   r  s     r+   r  2ChameleonForConditionalGeneration.get_image_tokens  s    zz**<88r-   c                 8    U R                   R                  U5      $ ry   )r  r  rE  s     r+   r  4ChameleonForConditionalGeneration.get_image_features  s    zz,,\::r-   r  r  r   rn   r  r  labelsr   r   r$  r   r   r   c                 0   U	b  U	OU R                   R                  n	U
b  U
OU R                   R                  n
U R                  " SUUUUUUUU	U
SUS.UD6nUS   nU R	                  U5      nU R                  R
                  R                  n[        R                  " UR                  5      R                  USS2SS2U4'   SnUb)  U R                  " SXU R                   R                  S.UD6n[        UUUR                  UR                  UR                   S9$ )aE  
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
    config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
    (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.

Example:

```python
>>> from transformers import ChameleonProcessor, ChameleonForConditionalGeneration
>>> import torch
>>> import requests
>>> from PIL import Image

>>> model = ChameleonForConditionalGeneration.from_pretrained("facebook/chameleon-7b", torch_dtype=torch.bfloat16)
>>> processor = ChameleonProcessor.from_pretrained("facebook/chameleon-7b")

>>> prompt = "I used to know a lot about constellations when I was younger, but as I grew older, I forgot most of what I knew. These are the only two constellations that I really remember now.<image><image>I would like for you to tell me about 3 more constellations and give me a little bit of history about the constellation."
>>> image = Image.open(requests.get("https://nineplanets.org/wp-content/uploads/2020/12/the-big-dipper-1.jpg", stream=True).raw)
>>> image_2 = Image.open(requests.get("https://www.kxan.com/wp-content/uploads/sites/40/2020/10/ORION.jpg", stream=True).raw)

>>> inputs = processor(images=[image, image_2], text=prompt, return_tensors="pt").to(model.device, torch.bfloat16)

>>> generated_ids = model.generate(**inputs, max_new_tokens=100, do_sample=False)
>>> processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
```NT)r  r  r   rn   r  r  r   r   r$  r%  r   r   )logitsrI  r  )r<  rK  r  r8   r*  r{   )r   r   r$  r  r:  r  r  r#   finfor2   minloss_functionr  r   r  r8   r*  )r'   r  r  r   rn   r  r  rI  r   r   r$  r   r   r  r8   rK  r  r<  s                     r+   r;   )ChameleonForConditionalGeneration.forward  s-   V 2C1N-TXT_T_TqTq$8$D $++JjJj 	
 ** 
%)%+'/!5)
 
  
m, zz44AA%*[[%>%B%Bq!\!"%%pVt{{OeOepiopD%#33!//))
 	
r-   c	                 V   > [         TU ]  " U4UUUUUUUS.U	D6n
US   S:w  a  S U
S'   U
$ )N)r  r  r   r  r   rn   r   r   r  )r    prepare_inputs_for_generation)r'   r  r  r  r   r  r   rn   r   r   model_inputsr*   s              r+   rQ  ?ChameleonForConditionalGeneration.prepare_inputs_for_generationp  s\     w<

%+)')%

 

 !! ,0L(r-   )r:  r  r  r6  )NNNNNNT)rD   rE   rF   rG   _tied_weights_keysr!   r?  rB  r  r  r   r   r   r#   r  r  r  r
   r	  r   r   r   r>   r   r;   rQ  rH   rI   rJ   s   @r+   r8  r8     s    ++9;  15481537+/59-1$(,0/359O
E,,-O
 u001O
 !.	O

 u//0O
 "%O
   1 12O
 ))*O
 D>O
 $D>O
 'tnO
 !!1!12O
 +,O
 
u,,	-O
  O
h  r-   r8  )r8  r  r  r  r  )r   )Jr}   	functoolsr   typingr   r   r   r#   torch.nn.functionalr   r   r   torch.utils.checkpointactivationsr	   cache_utilsr
   r   
generationr   masking_utilsr   modeling_flash_attention_utilsr   modeling_layersr   modeling_outputsr   r   modeling_utilsr   r   processing_utilsr   utilsr   r   r   r   configuration_chameleonr   r   
get_loggerrD   r   r  r   rL   rv   r   r   r   r   	LayerNormr   r  r  r   r\   r   r   r  r!  r'  r?  rP  rf  rt  r  r  r  r  r8  __all__r{   r-   r+   <module>rg     s    % , ,      ! . ) / B 9 O F &  K 
		H	%Jryy J,<ryy <@,D 0H *(8299 " &	UU\\ 	U# 	U%,, 	U( %II%<<% 
% <<	%
 U\\*% % % '(%4z) z)|E6 EPC : CL,>BII ,>^	")) 	)(ryy )(X &RYY  &F^!BII ^!B,% ,%^ ' ' ' (- ((0 s
- s
 s
l 
J(@/ J
JZ pr-   