
    <hn                     n   S SK rS SKJrJrJr  S SKrS SKJr  S SKJ	r	J
r
Jr  SSKJr  SSKJr  SSKJrJrJr  SSKJrJr  SS	KJrJr  SS
KJrJrJr  SSKJr  \R@                  " \!5      r" " S S\RF                  5      r$ " S S\RF                  5      r%\ " S S\5      5       r& S2S\RF                  S\RN                  S\RN                  S\RN                  S\\RN                     S\(S\(4S jjr) " S S\RF                  5      r* " S S\RF                  5      r+ " S S \RF                  5      r, " S! S"\RF                  5      r- " S# S$\RF                  5      r. " S% S&\5      r/ " S' S(\RF                  5      r0 " S) S*\RF                  5      r1\ " S+ S,\&5      5       r2\" S-S.9 " S/ S0\&5      5       r3/ S1Qr4g)3    N)CallableOptionalUnion)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)GradientCheckpointingLayer)BaseModelOutputBaseModelOutputWithPoolingImageClassifierOutput)ALL_ATTENTION_FUNCTIONSPreTrainedModel) find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringlogging	torch_int   )IJepaConfigc                   n   ^  \ rS rSrSrU 4S jrS	S\R                  S\S\R                  4S jjr	Sr
U =r$ )
IJepaPatchEmbeddings   z
This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
`hidden_states` (patch embeddings) of shape `(batch_size, seq_length, hidden_size)` to be consumed by a
Transformer.
c                   > [         TU ]  5         UR                  UR                  p2UR                  UR
                  pT[        U[        R                  R                  5      (       a  UOX"4n[        U[        R                  R                  5      (       a  UOX34nUS   US   -  US   US   -  -  nX l        X0l        X@l        X`l
        [        R                  " XEX3S9U l        g )Nr   r   )kernel_sizestride)super__init__
image_size
patch_sizenum_channelshidden_size
isinstancecollectionsabcIterablenum_patchesnnConv2d
projection)selfconfigr    r!   r"   r#   r(   	__class__s          `/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/ijepa/modeling_ijepa.pyr   IJepaPatchEmbeddings.__init__!   s    !'!2!2F4E4EJ$*$7$79K9Kk#-j+//:R:R#S#SZZdYq
#-j+//:R:R#S#SZZdYq
!!}
15*Q-:VW=:XY$$(&))L:i    pixel_valuesinterpolate_pos_encodingreturnc                    UR                   u  p4pVX@R                  :w  a  [        SU R                   SU S35      eU(       dV  XPR                  S   :w  d  X`R                  S   :w  a2  [        SU SU SU R                  S    SU R                  S    S	3	5      eU R	                  U5      R                  S
5      R                  SS
5      nU$ )NzoMake sure that the channel dimension of the pixel values match with the one set in the configuration. Expected z	 but got .r   r   zInput image size (*z) doesn't match model (z).   )shaper"   
ValueErrorr    r+   flatten	transpose)r,   r2   r3   
batch_sizer"   heightwidth
embeddingss           r/   forwardIJepaPatchEmbeddings.forward0   s    2>2D2D/
&,,,!../yaI  (++u8J/J (% 9+,Adooa.@-AE  __\2::1=GG1M
r1   )r    r"   r(   r!   r+   F)__name__
__module____qualname____firstlineno____doc__r   torchTensorboolrA   __static_attributes____classcell__r.   s   @r/   r   r      s8    jELL D ]b]i]i  r1   r   c            	          ^  \ rS rSrSrSS\S\SS4U 4S jjjrS\R                  S	\
S
\
S\R                  4S jr  SS\R                  S\\R                     S\S\R                  4S jjrSrU =r$ )IJepaEmbeddingsA   zZ
Construct the CLS token, position and patch embeddings. Optionally, also the mask token.
r-   use_mask_tokenr4   Nc                   > [         TU ]  5         U(       a6  [        R                  " [        R
                  " SSUR                  5      5      OS U l        [        U5      U l	        U R                  R                  n[        R                  " [        R                  " SX1R                  5      5      U l        [        R                  " UR                  5      U l        UR                   U l        Xl        g )Nr   )r   r   r)   	ParameterrI   zerosr#   
mask_tokenr   patch_embeddingsr(   randnposition_embeddingsDropouthidden_dropout_probdropoutr!   r-   )r,   r-   rR   r(   r.   s       r/   r   IJepaEmbeddings.__init__F   s    Q_",,u{{1a9K9K'LMei 4V <++77#%<<A{L^L^0_#` zz&"<"<= ++r1   r@   r>   r?   c                 ,   UR                   S   nU R                  R                   S   n[        R                  R	                  5       (       d  XE:X  a  X#:X  a  U R                  $ U R                  nUR                   S   nX R
                  -  nX0R
                  -  n	[        US-  5      n
UR                  SXU5      nUR                  SSSS5      n[        R                  R                  UX4SSS	9nUR                  SSSS5      R                  SSU5      nU$ )
a  
This method allows to interpolate the pre-trained position encodings, to be able to use the model on higher resolution
images. This method is also adapted to support torch.jit tracing.

Adapted from:
- https://github.com/facebookresearch/dino/blob/de9ee3df6cf39fac952ab558447af1fa1365362a/vision_transformer.py#L174-L194, and
- https://github.com/facebookresearch/dinov2/blob/e1277af2ba9496fbadf7aec6eba56e8d882d1e35/dinov2/models/vision_transformer.py#L179-L211
r   g      ?r   r	   r8   bicubicF)sizemodealign_corners)r9   rY   rI   jit
is_tracingr!   r   reshapepermuter)   
functionalinterpolateview)r,   r@   r>   r?   r(   num_positionspatch_pos_embeddim
new_height	new_widthsqrt_num_positionss              r/   r3   (IJepaEmbeddings.interpolate_pos_encodingP   s    !&&q)0066q9 yy##%%+*F6?+++22r".
__,	&}c'9:)11!5G]`a)11!Q1=--33(	 4 
 *11!Q1=BB1b#Nr1   r2   bool_masked_posr3   c                 n   UR                   u  pEpgU R                  XS9nUbX  UR                   S   n	U R                  R                  XIS5      n
UR	                  S5      R                  U
5      nUSU-
  -  X-  -   nU(       a  XR                  XU5      -   nOXR                  -   nU R                  U5      nU$ )N)r3   r   r_         ?)	r9   rW   rV   expand	unsqueezetype_asr3   rY   r\   )r,   r2   rr   r3   r=   _r>   r?   r@   
seq_lengthmask_tokensmasks               r/   rA   IJepaEmbeddings.forwardw   s     (4'9'9$
v**<*k
&#))!,J//00LK",,R088ED#sTz2[5GGJ $#&C&CJX]&^^J#&>&>>J\\*-
r1   )r-   r\   rV   rW   r!   rY   rC   NF)rD   rE   rF   rG   rH   r   rK   r   rI   rJ   intr3   r   
BoolTensorrA   rL   rM   rN   s   @r/   rP   rP   A   s    { D T  %5<< % %UX %]b]i]i %T 7;).	ll "%"2"23 #'	
 
 r1   rP   c                       \ rS rSr% \\S'   SrSrSrSS/r	Sr
SrSrSrS\\R                   \R"                  \R$                  4   S	S
4S jrSrg
)IJepaPreTrainedModel   r-   ijepar2   TrP   
IJepaLayermoduler4   Nc                    [        U[        R                  [        R                  45      (       a  [        R                  R                  UR                  R                  R                  [        R                  5      SU R                  R                  S9R                  UR                  R                  5      UR                  l        UR                  b%  UR                  R                  R                  5         gg[        U[        R                   5      (       aJ  UR                  R                  R                  5         UR                  R                  R#                  S5        g[        U[$        5      (       a  [        R                  R                  UR&                  R                  R                  [        R                  5      SU R                  R                  S9R                  UR&                  R                  5      UR&                  l        UR(                  b%  UR(                  R                  R                  5         ggg)zInitialize the weights        )meanstdNrt   )r$   r)   Linearr*   inittrunc_normal_weightdatatorI   float32r-   initializer_rangedtypebiaszero_	LayerNormfill_rP   rY   rV   )r,   r   s     r/   _init_weights"IJepaPreTrainedModel._init_weights   s   fryy"))455 "$!6!6""%%emm43DKKDaDa "7 "b$$% MM {{&  &&( '--KK""$MM$$S)00.0gg.C.C**//225==AKK11 /D / b++112	 &&+
   ,!!&&,,. - 1r1    )rD   rE   rF   rG   r   __annotations__base_model_prefixmain_input_namesupports_gradient_checkpointing_no_split_modules_supports_sdpa_supports_flash_attn_supports_flex_attn_supports_attention_backendr   r)   r   r*   r   r   rL   r   r1   r/   r   r      sg    $O&*#*L9N"&/E"))RYY*L$M /RV /r1   r   r   querykeyvalueattention_maskscalingr\   c                    [         R                  " XR                  SS5      5      U-  n[        R                  R                  US[         R                  S9R                  UR                  5      n[        R                  R                  XU R                  S9nUb  X-  n[         R                  " X5      n	U	R                  SS5      R                  5       n	X4$ )Nr_   )rm   r   )ptrainingr   r8   )rI   matmulr<   r)   rh   softmaxr   r   r   r\   r   
contiguous)
r   r   r   r   r   r   r\   kwargsattn_weightsattn_outputs
             r/   eager_attention_forwardr      s     <<}}R'<=GL ==((2U]](SVVW\WbWbcL ==((6??([L !#4,,|3K''1-88:K$$r1   c            
          ^  \ rS rSrS\SS4U 4S jjr  S
S\\R                     S\	S\
\\R                  \R                  4   \\R                     4   4S jjrS	rU =r$ )IJepaSelfAttention   r-   r4   Nc                 0  > [         TU ]  5         UR                  UR                  -  S:w  a7  [	        US5      (       d&  [        SUR                   SUR                   S35      eXl        UR                  U l        [        UR                  UR                  -  5      U l        U R                  U R                  -  U l	        UR                  U l        U R                  S-  U l        SU l        [        R                  " UR                  U R                  UR                   S9U l        [        R                  " UR                  U R                  UR                   S9U l        [        R                  " UR                  U R                  UR                   S9U l        g )	Nr   embedding_sizezThe hidden size z4 is not a multiple of the number of attention heads r6   g      F)r   )r   r   r#   num_attention_headshasattrr:   r-   r~   attention_head_sizeall_head_sizeattention_probs_dropout_probdropout_probr   	is_causalr)   r   qkv_biasr   r   r   r,   r-   r.   s     r/   r   IJepaSelfAttention.__init__   sG    : ::a?PVXhHiHi"6#5#5"6 7334A7 
 #)#=#= #&v'9'9F<V<V'V#W !558P8PP"??//5YYv1143E3EFOO\
99V//1C1C&//ZYYv1143E3EFOO\
r1   	head_maskoutput_attentionsc                    UR                   u  pEnU R                  U5      R                  USU R                  U R                  5      R                  SS5      nU R                  U5      R                  USU R                  U R                  5      R                  SS5      nU R                  U5      R                  USU R                  U R                  5      R                  SS5      n	[        n
U R                  R                  S:w  aT  U R                  R                  S:X  a  U(       a  [        R                  S5        O[        U R                  R                     n
U
" U U	UUUU R                  U R                  U R                   (       d  SOU R"                  S9u  pUR%                  5       S S	 U R&                  4-   nUR)                  U5      nU(       a  X4nU$ U4nU$ )
Nr_   r   r8   eagersdpaz`torch.nn.functional.scaled_dot_product_attention` does not support `output_attentions=True`. Falling back to eager attention. This warning can be removed using the argument `attn_implementation="eager"` when loading the model.r   )r   r   r\   r   )r9   r   rj   r   r   r<   r   r   r   r-   _attn_implementationloggerwarning_oncer   r   r   r   r   ra   r   rf   )r,   hidden_statesr   r   r=   ry   rx   	key_layervalue_layerquery_layerattention_interfacecontext_layerattention_probsnew_context_layer_shapeoutputss                  r/   rA   IJepaSelfAttention.forward   s    %2$7$7!
HH]#T*b$":":D<T<TUYq!_ 	 JJ}%T*b$":":D<T<TUYq!_ 	 JJ}%T*b$":":D<T<TUYq!_ 	 )@;;++w6{{//69>O##L
 '>dkk>^>^&_#)<nnLL#}}C$2C2C	*
& #0"4"4"6s";t?Q?Q>S"S%--.EF6G=2 O\M]r1   )
r   r   r-   r   r   r   r   r   r   r   r}   )rD   rE   rF   rG   r   r   r   rI   rJ   rK   r   tuplerA   rL   rM   rN   s   @r/   r   r      sw    ]{ ]t ]. -1"'	1 ELL)1  	1
 
uU\\5<</0%2EE	F1 1r1   r   c                      ^  \ rS rSrSrS\SS4U 4S jjrS\R                  S\R                  S\R                  4S	 jr	S
r
U =r$ )IJepaSelfOutputi  z
The residual connection is defined in IJepaLayer instead of here (as is the case with other models), due to the
layernorm applied before each block.
r-   r4   Nc                    > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " UR                  5      U l        g N)	r   r   r)   r   r#   denserZ   r[   r\   r   s     r/   r   IJepaSelfOutput.__init__"  sB    YYv1163E3EF
zz&"<"<=r1   r   input_tensorc                 J    U R                  U5      nU R                  U5      nU$ r   r   r\   r,   r   r   s      r/   rA   IJepaSelfOutput.forward'  s$    

=1]3r1   r   )rD   rE   rF   rG   rH   r   r   rI   rJ   rA   rL   rM   rN   s   @r/   r   r     sI    
>{ >t >
U\\  RWR^R^  r1   r   c                      ^  \ rS rSrS\SS4U 4S jjrS\\   SS4S jr  SS\	R                  S	\\	R                     S
\S\\\	R                  \	R                  4   \\	R                     4   4S jjrSrU =r$ )IJepaAttentioni.  r-   r4   Nc                    > [         TU ]  5         [        U5      U l        [	        U5      U l        [        5       U l        g r   )r   r   r   	attentionr   outputsetpruned_headsr   s     r/   r   IJepaAttention.__init__/  s0    +F3%f-Er1   headsc                 6   [        U5      S:X  a  g [        XR                  R                  U R                  R                  U R
                  5      u  p[        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l	        [        U R                  R                  USS9U R                  l        U R                  R                  [        U5      -
  U R                  l        U R                  R                  U R                  R                  -  U R                  l        U R
                  R                  U5      U l        g )Nr   r   rm   )lenr   r   r   r   r   r   r   r   r   r   r   r   union)r,   r   indexs      r/   prune_headsIJepaAttention.prune_heads5  s   u:?7>>55t~~7Y7Y[_[l[l

  2$..2F2FN/0B0BEJ1$..2F2FN.t{{/@/@%QO .2^^-O-ORUV[R\-\*'+~~'I'IDNNLnLn'n$ --33E:r1   r   r   r   c                 f    U R                  XU5      nU R                  US   U5      nU4USS  -   nU$ )Nr   r   )r   r   )r,   r   r   r   self_outputsattention_outputr   s          r/   rA   IJepaAttention.forwardG  sC     ~~m@QR;;|AF#%QR(88r1   )r   r   r   r}   )rD   rE   rF   rG   r   r   r   r~   r   rI   rJ   r   rK   r   r   rA   rL   rM   rN   s   @r/   r   r   .  s    "{ "t ";S ;d ;* -1"'	|| ELL)  	
 
uU\\5<</0%2EE	F r1   r   c                   n   ^  \ rS rSrS\SS4U 4S jjrS\R                  S\R                  4S jrSr	U =r
$ )	IJepaIntermediateiU  r-   r4   Nc                   > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                  [        5      (       a  [        UR                     U l        g UR                  U l        g r   )r   r   r)   r   r#   intermediate_sizer   r$   
hidden_actstrr
   intermediate_act_fnr   s     r/   r   IJepaIntermediate.__init__V  s`    YYv1163K3KL
f''--'-f.?.?'@D$'-'8'8D$r1   r   c                 J    U R                  U5      nU R                  U5      nU$ r   r   r   )r,   r   s     r/   rA   IJepaIntermediate.forward^  s&    

=100?r1   r   rD   rE   rF   rG   r   r   rI   rJ   rA   rL   rM   rN   s   @r/   r   r   U  s6    9{ 9t 9U\\ ell  r1   r   c                      ^  \ rS rSrS\SS4U 4S jjrS\R                  S\R                  S\R                  4S jrS	r	U =r
$ )
IJepaOutputie  r-   r4   Nc                    > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        R                  " UR                  5      U l	        g r   )
r   r   r)   r   r   r#   r   rZ   r[   r\   r   s     r/   r   IJepaOutput.__init__f  sB    YYv779K9KL
zz&"<"<=r1   r   r   c                 R    U R                  U5      nU R                  U5      nX-   nU$ r   r   r   s      r/   rA   IJepaOutput.forwardk  s,    

=1]3%4r1   r   r   rN   s   @r/   r  r  e  sD    >{ >t >
U\\  RWR^R^  r1   r  c                      ^  \ rS rSrSrS\SS4U 4S jjr  SS\R                  S\	\R                     S	\
S\\\R                  \R                  4   \\R                     4   4S
 jjrSrU =r$ )r   it  z?This corresponds to the Block class in the timm implementation.r-   r4   Nc                 j  > [         TU ]  5         UR                  U l        SU l        [	        U5      U l        [        U5      U l        [        U5      U l	        [        R                  " UR                  UR                  S9U l        [        R                  " UR                  UR                  S9U l        g )Nr   eps)r   r   chunk_size_feed_forwardseq_len_dimr   r   r   intermediater  r   r)   r   r#   layer_norm_epslayernorm_beforelayernorm_afterr   s     r/   r   IJepaLayer.__init__w  s    '-'E'E$'/-f5!&) "V-?-?VEZEZ [!||F,>,>FDYDYZr1   r   r   r   c                     U R                  U R                  U5      UUS9nUS   nUSS  nXQ-   nU R                  U5      nU R                  U5      nU R	                  Xq5      nU4U-   nU$ )N)r   r   r   )r   r  r  r  r   )r,   r   r   r   self_attention_outputsr   r   layer_outputs           r/   rA   IJepaLayer.forward  s     "&!!-0/ "0 "

 2!4(, )8 ++M:((6 {{<?/G+r1   )r   r
  r  r  r  r   r  r}   )rD   rE   rF   rG   rH   r   r   rI   rJ   r   rK   r   r   rA   rL   rM   rN   s   @r/   r   r   t  s    I[{ [t [ -1"'	|| ELL)  	
 
uU\\5<</0%2EE	F r1   r   c                      ^  \ rS rSrS\SS4U 4S jjr    SS\R                  S\\R                     S\	S	\	S
\	S\
\\4   4S jjrSrU =r$ )IJepaEncoderi  r-   r4   Nc                    > [         TU ]  5         Xl        [        R                  " [        UR                  5       Vs/ sH  n[        U5      PM     sn5      U l        SU l	        g s  snf r}   )
r   r   r-   r)   
ModuleListrangenum_hidden_layersr   layergradient_checkpointing)r,   r-   rx   r.   s      r/   r   IJepaEncoder.__init__  sR    ]]fF^F^@_#`@_1Jv$6@_#`a
&+# $as   A%r   r   r   output_hidden_statesreturn_dictc                 6   U(       a  SOS nU(       a  SOS n[        U R                  5       H9  u  pU(       a  Xa4-   nUb  X(   OS n
U	" XU5      nUS   nU(       d  M1  X{S   4-   nM;     U(       a  Xa4-   nU(       d  [        S XU4 5       5      $ [        UUUS9$ )Nr   r   r   c              3   ,   #    U H  oc  M  Uv   M     g 7fr   r   ).0vs     r/   	<genexpr>'IJepaEncoder.forward.<locals>.<genexpr>  s     m$[q$[s   	)last_hidden_stater   
attentions)	enumerater  r   r   )r,   r   r   r   r  r  all_hidden_statesall_self_attentionsilayer_modulelayer_head_masklayer_outputss               r/   rA   IJepaEncoder.forward  s     #7BD$5b4(4OA#$58H$H!.7.CilO(IZ[M)!,M  &91=M<O&O#  5   14D Dm]GZ$[mmm++*
 	
r1   )r-   r  r  )NFFT)rD   rE   rF   rG   r   r   rI   rJ   r   rK   r   r   r   rA   rL   rM   rN   s   @r/   r  r    s    ,{ ,t , -1"'%* !
||!
 ELL)!
  	!

 #!
 !
 
uo%	&!
 !
r1   r  c                   6   ^  \ rS rSrS\4U 4S jjrS rSrU =r$ )IJepaPooleri  r-   c                    > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                     U l	        g r   )
r   r   r)   r   r#   pooler_output_sizer   r
   
pooler_act
activationr   s     r/   r   IJepaPooler.__init__  s>    YYv1163L3LM
 !2!23r1   c                 \    US S 2S4   nU R                  U5      nU R                  U5      nU$ )Nr   )r   r5  )r,   r   first_token_tensorpooled_outputs       r/   rA   IJepaPooler.forward  s6     +1a40

#566r1   )r5  r   )	rD   rE   rF   rG   r   r   rA   rL   rM   rN   s   @r/   r1  r1    s    4{ 4
 r1   r1  c                   "  ^  \ rS rSrSS\S\S\4U 4S jjjrS\4S jrS\	\
\\
   4   SS	4S
 jr\       SS\\R                      S\\R"                     S\\R                      S\\   S\\   S\\   S\\   S\\\4   4S jj5       rSrU =r$ )
IJepaModeli  r-   add_pooling_layerrR   c                   > [         TU ]  U5        Xl        [        XS9U l        [        U5      U l        [        R                  " UR                  UR                  S9U l        U(       a  [        U5      OSU l        U R                  5         g)z
add_pooling_layer (bool, *optional*, defaults to `True`):
    Whether to add a pooling layer
use_mask_token (`bool`, *optional*, defaults to `False`):
    Whether to use a mask token for masked image modeling.
)rR   r  N)r   r   r-   rP   r@   r  encoderr)   r   r#   r  	layernormr1  pooler	post_init)r,   r-   r=  rR   r.   s       r/   r   IJepaModel.__init__  si     	 )&P#F+f&8&8f>S>ST->k&)D 	r1   r4   c                 .    U R                   R                  $ r   )r@   rW   )r,   s    r/   get_input_embeddingsIJepaModel.get_input_embeddings  s    ///r1   heads_to_pruneNc                     UR                  5        H7  u  p#U R                  R                  U   R                  R	                  U5        M9     g)z
Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
class PreTrainedModel
N)itemsr?  r  r   r   )r,   rG  r  r   s       r/   _prune_headsIJepaModel._prune_heads  s<    
 +002LELLu%//;;EB 3r1   r2   rr   r   r   r  r3   r  c                    Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nUc  [	        S5      eU R                  X0R                   R                  5      nU R                  R                  R                  R                  R                  nUR                  U:w  a  UR                  U5      nU R                  XUS9n	U R                  U	UUUUS9n
U
S   nU R                  U5      nU R                  b  U R                  U5      OSnU(       d  Ub  X4OU4nXSS -   $ [!        UUU
R"                  U
R$                  S9$ )z
bool_masked_pos (`torch.BoolTensor` of shape `(batch_size, num_patches)`, *optional*):
    Boolean masked positions. Indicates which patches are masked (1) and which aren't (0).
Nz You have to specify pixel_values)rr   r3   )r   r   r  r  r   r   )r&  pooler_outputr   r'  )r-   r   r  use_return_dictr:   get_head_maskr  r@   rW   r+   r   r   r   r?  r@  rA  r   r   r'  )r,   r2   rr   r   r   r  r3   r  expected_dtypeembedding_outputencoder_outputssequence_outputr9  head_outputss                 r/   rA   IJepaModel.forward  s|    2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B]?@@ &&y++2O2OP	 99DDKKQQ/'??>:L??Tl + 
 ,,/!5# ' 
 *!,..98<8OO4UY?L?XO;_n^pL!""555)-')77&11	
 	
r1   )r-   r@   r?  r@  rA  )FFNNNNNNN)rD   rE   rF   rG   r   rK   r   r   rE  dictr~   listrJ  r   r   rI   rJ   r   r   r   r   rA   rL   rM   rN   s   @r/   r<  r<    s   { t ]a  $0&: 0C4T#Y+? CD C  046:,0,0/337&*;
u||,;
 "%"2"23;
 ELL)	;

 $D>;
 'tn;
 #+4.;
 d^;
 
u00	1;
 ;
r1   r<  a  
    IJepa Model transformer with an image classification head on top (a linear layer on top of the final hidden states)
    e.g. for ImageNet.

    <Tip>

        Note that it's possible to fine-tune IJepa on higher resolution images than the ones it has been trained on, by
        setting `interpolate_pos_encoding` to `True` in the forward of the model. This will interpolate the pre-trained
        position embeddings to the higher resolution.

    </Tip>
    )custom_introc                      ^  \ rS rSrS\SS4U 4S jjr\       SS\\R                     S\\R                     S\\R                     S	\\
   S
\\
   S\\
   S\\
   S\\\4   4S jj5       rSrU =r$ )IJepaForImageClassificationi6  r-   r4   Nc                 .  > [         TU ]  U5        UR                  U l        [        USS9U l        UR                  S:  a+  [
        R                  " UR                  UR                  5      O[
        R                  " 5       U l	        U R                  5         g )NF)r=  r   )r   r   
num_labelsr<  r   r)   r   r#   Identity
classifierrB  r   s     r/   r   $IJepaForImageClassification.__init__E  ss      ++%@
 OUN_N_bcNc"))F$6$68I8IJikititiv 	r1   r2   r   labelsr   r  r3   r  c           	      z   Ub  UOU R                   R                  nU R                  UUUUUUS9nUS   n	U R                  U	R	                  SS95      n
SnUGb  UR                  U
R                  5      nU R                   R                  c  U R                  S:X  a  SU R                   l        OoU R                  S:  aN  UR                  [        R                  :X  d  UR                  [        R                  :X  a  SU R                   l        OSU R                   l        U R                   R                  S:X  aI  [        5       nU R                  S:X  a&  U" U
R                  5       UR                  5       5      nOU" X5      nOU R                   R                  S:X  a=  [        5       nU" U
R!                  S	U R                  5      UR!                  S	5      5      nO,U R                   R                  S:X  a  [#        5       nU" X5      nU(       d  U
4USS -   nUb  U4U-   $ U$ [%        UU
UR&                  UR(                  S
9$ )ab  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
N)r   r   r  r3   r  r   r   r   
regressionsingle_label_classificationmulti_label_classificationr_   )losslogitsr   r'  )r-   rN  r   r_  r   r   deviceproblem_typer]  r   rI   longr~   r   squeezer   rj   r   r   r   r'  )r,   r2   r   ra  r   r  r3   r  r   rS  rg  rf  loss_fctr   s                 r/   rA   #IJepaForImageClassification.forwardQ  s   " &1%<k$++B]B]**/!5%=#  
 "!*!5!5!!5!<=YYv}}-F{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#F3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE$!//))	
 	
r1   )r_  r   r]  rV  )rD   rE   rF   rG   r   r   r   r   rI   rJ   rK   r   r   r   rA   rL   rM   rN   s   @r/   r[  r[  6  s    
{ 
t 
  04,0)-,0/337&*A
u||,A
 ELL)A
 &	A

 $D>A
 'tnA
 #+4.A
 d^A
 
u++	,A
 A
r1   r[  )r   r<  r[  )r   )5collections.abcr%   typingr   r   r   rI   torch.nnr)   r   r   r   activationsr
   modeling_layersr   modeling_outputsr   r   r   modeling_utilsr   r   pytorch_utilsr   r   utilsr   r   r   configuration_ijepar   
get_loggerrD   r   Moduler   rP   r   rJ   floatr   r   r   r   r   r  r   r  r1  r<  r[  __all__r   r1   r/   <module>r|     s    , ,   A A ! 9 b b F Q 7 7 , 
		H	%$299 $NNbii Nb /? / /R %II%<<% 
% <<	%
 U\\*% % %<F FRbii $$RYY $N		  ")) '+ 'T(
299 (
V"))  Z
% Z
 Z
z O
"6 O
O
d Pr1   