
    <hX                     $   S r SSKrSSKJrJrJr  SSKrSSKrSSKJ	r	  SSK
JrJrJr  SSKJr  SSKJr  SS	KJrJrJrJr  SS
KJrJr  SSKJrJr  SSKJrJrJ r   SSK!J"r"  SSK#J$r$  \RJ                  " \&5      r' " S S\	RP                  5      r) " S S\	RP                  5      r* S?S\	RP                  S\RV                  S\RV                  S\RV                  S\\RV                     S\,S\,4S jjr- " S S\	RP                  5      r. " S S\	RP                  5      r/ " S  S!\	RP                  5      r0 " S" S#\	RP                  5      r1S@S$\RV                  S%\,S&\2S'\RV                  4S( jjr3 " S) S*\	RP                  5      r4 " S+ S,\	RP                  5      r5 " S- S.\	RP                  5      r6 " S/ S0\5      r7 " S1 S2\	RP                  5      r8\ " S3 S4\5      5       r9\ " S5 S6\95      5       r:\" S7S89 " S9 S:\95      5       r;\" S;S89 " S< S=\9\"5      5       r</ S>Qr=g)AzPyTorch DINOv2 model.    N)CallableOptionalUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)GradientCheckpointingLayer)BackboneOutputBaseModelOutputBaseModelOutputWithPoolingImageClassifierOutput)ALL_ATTENTION_FUNCTIONSPreTrainedModel) find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringlogging	torch_int)BackboneMixin   )Dinov2Configc                      ^  \ rS rSrSrS\SS4U 4S jjrS\R                  S\	S	\	S\R                  4S
 jr
SS\R                  S\\R                     S\R                  4S jjrSrU =r$ )Dinov2Embeddings&   zE
Construct the CLS token, mask token, position and patch embeddings.
configreturnNc                   > [         TU ]  5         [        R                  " [        R
                  " SSUR                  5      5      U l        UR                  (       a:  [        R                  " [        R                  " SUR                  5      5      U l
        [        U5      U l        U R                  R                  n[        R                  " [        R
                  " SUS-   UR                  5      5      U l        [        R                  " UR                   5      U l        UR$                  U l        UR                  U l        Xl        g )Nr   )super__init__r   	Parametertorchrandnhidden_size	cls_tokenuse_mask_tokenzeros
mask_tokenDinov2PatchEmbeddingspatch_embeddingsnum_patchesposition_embeddingsDropouthidden_dropout_probdropout
patch_sizer   )selfr   r-   	__class__s      b/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/dinov2/modeling_dinov2.pyr"   Dinov2Embeddings.__init__+   s    ekk!Q8J8J&KL   ll5;;q&:L:L+MNDO 5f =++77#%<<A{QPVPbPb0c#d zz&"<"<= ++$33    
embeddingsheightwidthc                    UR                   S   S-
  nU R                  R                   S   S-
  n[        R                  R	                  5       (       d  XE:X  a  X#:X  a  U R                  $ U R                  SS2SS24   nU R                  SS2SS24   nUR                   S   nX R
                  -  n	X0R
                  -  n
[        US-  5      nUR                  SXU5      nUR                  SSSS5      nUR                  n[        R                  R                  UR                  [        R                  5      X4SS	S
9R                  US9nUR                  SSSS5      R                  SSU5      n[        R                   " Xg4SS9$ )a  
This method allows to interpolate the pre-trained position encodings, to be able to use the model on higher resolution
images. This method is also adapted to support torch.jit tracing and interpolation at torch.float32 precision.

Adapted from:
- https://github.com/facebookresearch/dino/blob/de9ee3df6cf39fac952ab558447af1fa1365362a/vision_transformer.py#L174-L194, and
- https://github.com/facebookresearch/dinov2/blob/e1277af2ba9496fbadf7aec6eba56e8d882d1e35/dinov2/models/vision_transformer.py#L179-L211
r   Ng      ?r   r
      bicubicF)sizemodealign_cornersdtypedim)shaper.   r$   jit
is_tracingr2   r   reshapepermuterC   r   
functionalinterpolatetofloat32viewcat)r3   r8   r9   r:   r-   num_positionsclass_pos_embedpatch_pos_embedrE   
new_height	new_widthsqrt_num_positionstarget_dtypes                r5   interpolate_pos_encoding)Dinov2Embeddings.interpolate_pos_encoding9   s~    !&&q)A-0066q9A= yy##%%+*F6?+++221bqb59221ab59r".
__,	&}c'9:)11!5G]`a)11!Q1=&,,--33u}}-(	 4 

 "<"
  	 *11!Q1=BB1b#Nyy/;CCr7   pixel_valuesbool_masked_posc                 >   UR                   u  p4pVU R                  R                  R                  R                  nU R                  UR                  US95      nUbj  U R                  (       aY  [        R                  " UR                  S5      U R                  R                  UR                  5      R                  S5      U5      nU R                  R                  USS5      n	[        R                  " X4SS9nXR                  XU5      -   nU R                  U5      nU$ )NrB   r<   r   r   rD   )rF   r,   
projectionweightrC   rM   r(   r$   where	unsqueezer*   r'   expandrP   rX   r1   )
r3   rZ   r[   
batch_size_r9   r:   rW   r8   
cls_tokenss
             r5   forwardDinov2Embeddings.forwarda   s    '3'9'9$
v,,77>>DD**<???+NO
&4+>+>))"-t/A/A*BRBR/S/]/]^_/`blJ
 ^^**:r2>
YY
7Q?
  "?"?
TY"ZZ
\\*-
r7   )r'   r   r1   r*   r,   r2   r.   r(   N)__name__
__module____qualname____firstlineno____doc__r   r"   r$   TensorintrX   r   re   __static_attributes____classcell__r4   s   @r5   r   r   &   s    |  &D5<< &D &DUX &D]b]i]i &DPELL 8ELLCY ejeqeq  r7   r   c                   f   ^  \ rS rSrSrU 4S jrS\R                  S\R                  4S jrSr	U =r
$ )r+   w   z
This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
`hidden_states` (patch embeddings) of shape `(batch_size, seq_length, hidden_size)` to be consumed by a
Transformer.
c                   > [         TU ]  5         UR                  UR                  p2UR                  UR
                  pT[        U[        R                  R                  5      (       a  UOX"4n[        U[        R                  R                  5      (       a  UOX34nUS   US   -  US   US   -  -  nX l        X0l        X@l        X`l
        [        R                  " XEX3S9U l        g )Nr   r   )kernel_sizestride)r!   r"   
image_sizer2   num_channelsr&   
isinstancecollectionsabcIterabler-   r   Conv2dr]   )r3   r   rw   r2   rx   r&   r-   r4   s          r5   r"   Dinov2PatchEmbeddings.__init__~   s    !'!2!2F4E4EJ$*$7$79K9Kk#-j+//:R:R#S#SZZdYq
#-j+//:R:R#S#SZZdYq
!!}
15*Q-:VW=:XY$$(&))L:ir7   rZ   r   c                     UR                   S   nX R                  :w  a  [        SU R                   SU S35      eU R                  U5      R	                  S5      R                  SS5      nU$ )Nr   zoMake sure that the channel dimension of the pixel values match with the one set in the configuration. Expected z	 but got .r=   )rF   rx   
ValueErrorr]   flatten	transpose)r3   rZ   rx   r8   s       r5   re   Dinov2PatchEmbeddings.forward   sx    #))!,,,,!../yaI  __\2::1=GG1M
r7   )rw   rx   r-   r2   r]   )rh   ri   rj   rk   rl   r"   r$   rm   re   ro   rp   rq   s   @r5   r+   r+   w   s.    jELL U\\  r7   r+   modulequerykeyvalueattention_maskscalingr1   c                    [         R                  " XR                  SS5      5      U-  n[        R                  R                  US[         R                  S9R                  UR                  5      n[        R                  R                  XU R                  S9nUb  X-  n[         R                  " X5      n	U	R                  SS5      R                  5       n	X4$ )Nr<   )rE   rC   )ptrainingr   r=   )r$   matmulr   r   rK   softmaxrN   rM   rC   r1   r   
contiguous)
r   r   r   r   r   r   r1   kwargsattn_weightsattn_outputs
             r5   eager_attention_forwardr      s     <<}}R'<=GL ==((2U]](SVVW\WbWbcL ==((6??([L !#4,,|3K''1-88:K$$r7   c            
          ^  \ rS rSrS\SS4U 4S jjr  S
S\\R                     S\	S\
\\R                  \R                  4   \\R                     4   4S jjrS	rU =r$ )Dinov2SelfAttention   r   r   Nc                 0  > [         TU ]  5         UR                  UR                  -  S:w  a7  [	        US5      (       d&  [        SUR                   SUR                   S35      eXl        UR                  U l        [        UR                  UR                  -  5      U l        U R                  U R                  -  U l	        UR                  U l        U R                  S-  U l        SU l        [        R                  " UR                  U R                  UR                   S9U l        [        R                  " UR                  U R                  UR                   S9U l        [        R                  " UR                  U R                  UR                   S9U l        g )	Nr   embedding_sizezThe hidden size z4 is not a multiple of the number of attention heads r   g      Fbias)r!   r"   r&   num_attention_headshasattrr   r   rn   attention_head_sizeall_head_sizeattention_probs_dropout_probdropout_probr   	is_causalr   Linearqkv_biasr   r   r   r3   r   r4   s     r5   r"   Dinov2SelfAttention.__init__   sG    : ::a?PVXhHiHi"6#5#5"6 7334A7 
 #)#=#= #&v'9'9F<V<V'V#W !558P8PP"??//5YYv1143E3EFOO\
99V//1C1C&//ZYYv1143E3EFOO\
r7   	head_maskoutput_attentionsc                    UR                   u  pEnU R                  U5      R                  USU R                  U R                  5      R                  SS5      nU R                  U5      R                  USU R                  U R                  5      R                  SS5      nU R                  U5      R                  USU R                  U R                  5      R                  SS5      n	[        n
U R                  R                  S:w  aT  U R                  R                  S:X  a  U(       a  [        R                  S5        O[        U R                  R                     n
U
" U U	UUUU R                  U R                  U R                   (       d  SOU R"                  S9u  pUR%                  5       S S	 U R&                  4-   nUR)                  U5      nU(       a  X4nU$ U4nU$ )
Nr<   r   r=   eagersdpaz`torch.nn.functional.scaled_dot_product_attention` does not support `output_attentions=True`. Falling back to eager attention. This warning can be removed using the argument `attn_implementation="eager"` when loading the model.        )r   r   r1   r   )rF   r   rO   r   r   r   r   r   r   r   _attn_implementationloggerwarning_oncer   r   r   r   r   r?   r   rI   )r3   hidden_statesr   r   rb   
seq_lengthrc   	key_layervalue_layerquery_layerattention_interfacecontext_layerattention_probsnew_context_layer_shapeoutputss                  r5   re   Dinov2SelfAttention.forward   s    %2$7$7!
HH]#T*b$":":D<T<TUYq!_ 	 JJ}%T*b$":":D<T<TUYq!_ 	 JJ}%T*b$":":D<T<TUYq!_ 	 )@;;++w6{{//69>O##L
 '>dkk>^>^&_#)<nnLL#}}C$2C2C	*
& #0"4"4"6s";t?Q?Q>S"S%--.EF6G=2 O\M]r7   )
r   r   r   r   r   r   r   r   r   r   NF)rh   ri   rj   rk   r   r"   r   r$   rm   boolr   tuplere   ro   rp   rq   s   @r5   r   r      sw    ]| ] ]. -1"'	1 ELL)1  	1
 
uU\\5<</0%2EE	F1 1r7   r   c                      ^  \ rS rSrSrS\SS4U 4S jjrS\R                  S\R                  S\R                  4S	 jr	S
r
U =r$ )Dinov2SelfOutputi  z
The residual connection is defined in Dinov2Layer instead of here (as is the case with other models), due to the
layernorm applied before each block.
r   r   Nc                    > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " UR                  5      U l        g rg   )	r!   r"   r   r   r&   denser/   r0   r1   r   s     r5   r"   Dinov2SelfOutput.__init__  sB    YYv1163E3EF
zz&"<"<=r7   r   input_tensorc                 J    U R                  U5      nU R                  U5      nU$ rg   r   r1   )r3   r   r   s      r5   re   Dinov2SelfOutput.forward  s$    

=1]3r7   r   )rh   ri   rj   rk   rl   r   r"   r$   rm   re   ro   rp   rq   s   @r5   r   r     sI    
>| > >
U\\  RWR^R^  r7   r   c                      ^  \ rS rSrS\SS4U 4S jjrS\\   SS4S jr  SS\	R                  S	\\	R                     S
\S\\\	R                  \	R                  4   \\	R                     4   4S jjrSrU =r$ )Dinov2Attentioni  r   r   Nc                    > [         TU ]  5         [        U5      U l        [	        U5      U l        [        5       U l        g rg   )r!   r"   r   	attentionr   outputsetpruned_headsr   s     r5   r"   Dinov2Attention.__init__  s0    ,V4&v.Er7   headsc                 6   [        U5      S:X  a  g [        XR                  R                  U R                  R                  U R
                  5      u  p[        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l	        [        U R                  R                  USS9U R                  l        U R                  R                  [        U5      -
  U R                  l        U R                  R                  U R                  R                  -  U R                  l        U R
                  R                  U5      U l        g )Nr   r   rD   )lenr   r   r   r   r   r   r   r   r   r   r   r   union)r3   r   indexs      r5   prune_headsDinov2Attention.prune_heads  s   u:?7>>55t~~7Y7Y[_[l[l

  2$..2F2FN/0B0BEJ1$..2F2FN.t{{/@/@%QO .2^^-O-ORUV[R\-\*'+~~'I'IDNNLnLn'n$ --33E:r7   r   r   r   c                 f    U R                  XU5      nU R                  US   U5      nU4USS  -   nU$ )Nr   r   )r   r   )r3   r   r   r   self_outputsattention_outputr   s          r5   re   Dinov2Attention.forward.  sC     ~~m@QR;;|AF#%QR(88r7   )r   r   r   r   )rh   ri   rj   rk   r   r"   r   rn   r   r$   rm   r   r   r   r   re   ro   rp   rq   s   @r5   r   r     s    "| " ";S ;d ;* -1"'	|| ELL)  	
 
uU\\5<</0%2EE	F r7   r   c                   f   ^  \ rS rSrSU 4S jjrS\R                  S\R                  4S jrSrU =r	$ )Dinov2LayerScalei<  r   c                    > [         TU ]  5         [        R                  " UR                  [
        R                  " UR                  5      -  5      U l        g rg   )	r!   r"   r   r#   layerscale_valuer$   onesr&   lambda1r   s     r5   r"   Dinov2LayerScale.__init__=  s8    ||F$;$;ejjI[I[>\$\]r7   hidden_statec                     XR                   -  $ rg   r   r3   r   s     r5   re   Dinov2LayerScale.forwardA  s    ll**r7   r   r   N
rh   ri   rj   rk   r"   r$   rm   re   ro   rp   rq   s   @r5   r   r   <  s)    ^+ELL +U\\ + +r7   r   input	drop_probr   r   c                    US:X  d  U(       d  U $ SU-
  nU R                   S   4SU R                  S-
  -  -   nU[        R                  " X@R                  U R
                  S9-   nUR                  5         U R                  U5      U-  nU$ )a*  
Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
argument.
r   r   r   )r   )rC   device)rF   ndimr$   randrC   r   floor_div)r   r   r   	keep_probrF   random_tensorr   s          r5   	drop_pathr   F  s     CxII[[^

Q 77E

5ELL YYMYYy!M1FMr7   c                      ^  \ rS rSrSrSS\\   SS4U 4S jjjrS\R                  S\R                  4S jr
S\4S	 jrS
rU =r$ )Dinov2DropPathi[  zXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nr   r   c                 .   > [         TU ]  5         Xl        g rg   )r!   r"   r   )r3   r   r4   s     r5   r"   Dinov2DropPath.__init__^  s    "r7   r   c                 B    [        XR                  U R                  5      $ rg   )r   r   r   )r3   r   s     r5   re   Dinov2DropPath.forwardb  s    FFr7   c                      SU R                    3$ )Nzp=r   r3   s    r5   
extra_reprDinov2DropPath.extra_repre  s    DNN#$$r7   r   rg   )rh   ri   rj   rk   rl   r   floatr"   r$   rm   re   strr   ro   rp   rq   s   @r5   r   r   [  sQ    b#(5/ #T # #GU\\ Gell G%C % %r7   r   c                   f   ^  \ rS rSrSU 4S jjrS\R                  S\R                  4S jrSrU =r	$ )	Dinov2MLPii  r   c                 z  > [         TU ]  5         UR                  =p#[        UR                  UR                  -  5      n[
        R                  " X$SS9U l        [        UR                  [        5      (       a  [        UR                     U l        OUR                  U l        [
        R                  " XCSS9U l        g )NTr   )r!   r"   r&   rn   	mlp_ratior   r   fc1ry   
hidden_actr   r   
activationfc2r3   r   in_featuresout_featureshidden_featuresr4   s        r5   r"   Dinov2MLP.__init__j  s    %+%7%77f0063C3CCD99[Ef''--$V%6%67DO$//DO99_Fr7   r   c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ rg   )r  r  r  r   s     r5   re   Dinov2MLP.forwardu  s2    xx-|4xx-r7   )r  r  r  r   r   rq   s   @r5   r  r  i  s)    	GELL U\\  r7   r  c                   f   ^  \ rS rSrSU 4S jjrS\R                  S\R                  4S jrSrU =r	$ )Dinov2SwiGLUFFNi|  r   c                 $  > [         TU ]  5         UR                  =p#[        UR                  UR                  -  5      n[        US-  S-  5      S-   S-  S-  n[
        R                  " USU-  SS9U l        [
        R                  " XCSS9U l        g )Nr=   r
         Tr   )	r!   r"   r&   rn   r  r   r   
weights_inweights_outr	  s        r5   r"   Dinov2SwiGLUFFN.__init__}  s    %+%7%77f0063C3CCD2Q67!;AAE))K_1D4P99_Nr7   r   c                     U R                  U5      nUR                  SSS9u  p#[        R                  R	                  U5      U-  nU R                  U5      $ )Nr=   r<   rD   )r  chunkr   rK   silur  )r3   r   x1x2hiddens        r5   re   Dinov2SwiGLUFFN.forward  sQ    |4##A2#.##B'",''r7   )r  r  r   r   rq   s   @r5   r  r  |  s)    O(ELL (U\\ ( (r7   r  c                      ^  \ rS rSrSrS\SS4U 4S jjr  SS\R                  S\	\R                     S	\
S\\\R                  \R                  4   \\R                     4   4S
 jjrSrU =r$ )Dinov2Layeri  zCThis corresponds to the Block class in the original implementation.r   r   Nc                   > [         TU ]  5         [        R                  " UR                  UR
                  S9U l        [        U5      U l        [        U5      U l
        UR                  S:  a  [        UR                  5      O[        R                  " 5       U l        [        R                  " UR                  UR
                  S9U l        UR                   (       a  [#        U5      U l        O['        U5      U l        [        U5      U l        g )Nepsr   )r!   r"   r   	LayerNormr&   layer_norm_epsnorm1r   r   r   layer_scale1drop_path_rater   Identityr   norm2use_swiglu_ffnr  mlpr  layer_scale2r   s     r5   r"   Dinov2Layer.__init__  s    \\&"4"4&:O:OP
(0,V4BHBWBWZ]B](=(=>cecncncp\\&"4"4&:O:OP
  &v.DH (DH,V4r7   r   r   r   c                 >   U R                  U R                  U5      UUS9nUS   nU R                  U5      nUSS  nU R                  U5      U-   nU R	                  U5      nU R                  U5      nU R                  U5      nU R                  U5      U-   nU4U-   nU$ )N)r   r   r   )r   r&  r'  r   r*  r,  r-  )r3   r   r   r   self_attention_outputsr   r   layer_outputs           r5   re   Dinov2Layer.forward  s     "&JJ}%/ "0 "

 2!4,,-=>(, '78=H zz-0xx-((6 ~~l3mC/G+r7   )r   r   r'  r-  r,  r&  r*  r   )rh   ri   rj   rk   rl   r   r"   r$   rm   r   r   r   r   re   ro   rp   rq   s   @r5   r   r     s    M5| 5 5& -1"'	|| ELL)  	
 
uU\\5<</0%2EE	F r7   r   c                      ^  \ rS rSrS\SS4U 4S jjr    SS\R                  S\\R                     S\	S	\	S
\	S\
\\4   4S jjrSrU =r$ )Dinov2Encoderi  r   r   Nc                    > [         TU ]  5         Xl        [        R                  " [        UR                  5       Vs/ sH  n[        U5      PM     sn5      U l        SU l	        g s  snf r   )
r!   r"   r   r   
ModuleListrangenum_hidden_layersr   layergradient_checkpointingr3   r   rc   r4   s      r5   r"   Dinov2Encoder.__init__  sR    ]]vG_G_A`#aA`AK$7A`#ab
&+# $bs   A%r   r   r   output_hidden_statesreturn_dictc                 6   U(       a  SOS nU(       a  SOS n[        U R                  5       H9  u  pU(       a  Xa4-   nUb  X(   OS n
U	" XU5      nUS   nU(       d  M1  X{S   4-   nM;     U(       a  Xa4-   nU(       d  [        S XU4 5       5      $ [        UUUS9$ )N r   r   c              3   ,   #    U H  oc  M  Uv   M     g 7frg   r@  ).0vs     r5   	<genexpr>(Dinov2Encoder.forward.<locals>.<genexpr>  s     m$[q$[s   	)last_hidden_stater   
attentions)	enumerater9  r   r   )r3   r   r   r   r=  r>  all_hidden_statesall_self_attentionsilayer_modulelayer_head_masklayer_outputss               r5   re   Dinov2Encoder.forward  s     #7BD$5b4(4OA#$58H$H!.7.CilO(IZ[M)!,M  &91=M<O&O#  5   14D Dm]GZ$[mmm++*
 	
r7   )r   r:  r9  )NFFT)rh   ri   rj   rk   r   r"   r$   rm   r   r   r   r   r   re   ro   rp   rq   s   @r5   r4  r4    s    ,| , , -1"'%* !
||!
 ELL)!
  	!

 #!
 !
 
uo%	&!
 !
r7   r4  c                       \ rS rSr% \\S'   SrSrSrS/r	Sr
SrSrSrS\\R                   \R"                  \R$                  4   SS	4S
 jrSrg	)Dinov2PreTrainedModeli  r   dinov2rZ   Tr   r   r   Nc                 j   [        U[        R                  [        R                  45      (       a  [        R                  R                  UR                  R                  R                  [        R                  5      SU R                  R                  S9R                  UR                  R                  5      UR                  l        UR                  b%  UR                  R                  R                  5         gg[        U[        R                   5      (       aJ  UR                  R                  R                  5         UR                  R                  R#                  S5        g[        U[$        5      (       Gam  [        R                  R                  UR&                  R                  R                  [        R                  5      SU R                  R                  S9R                  UR&                  R                  5      UR&                  l        [        R                  R                  UR(                  R                  R                  [        R                  5      SU R                  R                  S9R                  UR(                  R                  5      UR(                  l        U R                  R*                  (       a%  UR,                  R                  R                  5         gg[        U[.        5      (       a:  UR0                  R                  R#                  U R                  R2                  5        gg)zInitialize the weightsr   )meanstdNg      ?)ry   r   r   r}   inittrunc_normal_r^   datarM   r$   rN   r   initializer_rangerC   r   zero_r$  fill_r   r.   r'   r(   r*   r   r   r   )r3   r   s     r5   _init_weights#Dinov2PreTrainedModel._init_weights  s-   fryy"))455 "$!6!6""%%emm43DKKDaDa "7 "b$$% MM {{&  &&( '--KK""$MM$$S) 011.0gg.C.C**//225==AKK11 /D / b++112	 &&+ %'GG$9$9  %%((7KK11 %: % b!!''(	 ! {{))!!&&,,. * 011NN%%dkk&B&BC 2r7   r@  )rh   ri   rj   rk   r   __annotations__base_model_prefixmain_input_namesupports_gradient_checkpointing_no_split_modules_supports_sdpa_supports_flash_attn_supports_flex_attn_supports_attention_backendr   r   r   r}   r$  r\  ro   r@  r7   r5   rQ  rQ    sg     $O&*#&N"&DE"))RYY*L$M DRV Dr7   rQ  c                   
  ^  \ rS rSrS\4U 4S jjrS\4S jrS\\	\
\	   4   SS4S jr\      SS	\\R                     S
\\R                     S\\R                     S\\   S\\   S\\   S\\\4   4S jj5       rSrU =r$ )Dinov2Modeli  r   c                    > [         TU ]  U5        Xl        [        U5      U l        [        U5      U l        [        R                  " UR                  UR                  S9U l        U R                  5         g )Nr"  )r!   r"   r   r   r8   r4  encoderr   r$  r&   r%  	layernorm	post_initr   s     r5   r"   Dinov2Model.__init__  sW     *62$V,f&8&8f>S>ST 	r7   r   c                 .    U R                   R                  $ rg   r8   r,   r   s    r5   get_input_embeddings Dinov2Model.get_input_embeddings&      ///r7   heads_to_pruneNc                     UR                  5        H7  u  p#U R                  R                  U   R                  R	                  U5        M9     g)z
Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
class PreTrainedModel
N)itemsrj  r9  r   r   )r3   rs  r9  r   s       r5   _prune_headsDinov2Model._prune_heads)  s<    
 +002LELLu%//;;EB 3r7   rZ   r[   r   r   r=  r>  c                    Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nUc  [	        S5      eU R                  X0R                   R                  5      nU R                  XS9nU R                  UUUUUS9nUS   n	U R                  U	5      n	U	SS2SSS24   n
U(       d
  X4nXSS -   $ [        U	U
UR                  UR                  S9$ )z
bool_masked_pos (`torch.BoolTensor` of shape `(batch_size, sequence_length)`):
    Boolean masked positions. Indicates which patches are masked (1) and which aren't (0). Only relevant for
    pre-training.
Nz You have to specify pixel_values)r[   r   r   r=  r>  r   r   )rF  pooler_outputr   rG  )r   r   r=  use_return_dictr   get_head_maskr8  r8   rj  rk  r   r   rG  )r3   rZ   r[   r   r   r=  r>  embedding_outputencoder_outputssequence_outputpooled_outputhead_outputss               r5   re   Dinov2Model.forward1  s    2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B]?@@ &&y++2O2OP	??<?Y,,/!5# ' 
 *!,..9'1a0+;L!""555)-')77&11	
 	
r7   )r   r8   rj  rk  NNNNNN)rh   ri   rj   rk   r   r"   r+   rp  dictrn   listrv  r   r   r$   rm   r   r   r   r   re   ro   rp   rq   s   @r5   rh  rh    s    
| 
0&; 0C4T#Y+? CD C  0426,0,0/3&*4
u||,4
 "%,,/4
 ELL)	4

 $D>4
 'tn4
 d^4
 
u00	14
 4
r7   rh  z
    Dinov2 Model transformer with an image classification head on top (a linear layer on top of the final hidden state
    of the [CLS] token) e.g. for ImageNet.
    )custom_introc                      ^  \ rS rSrS\SS4U 4S jjr\      SS\\R                     S\\R                     S\\R                     S	\\
   S
\\
   S\\
   S\\\4   4S jj5       rSrU =r$ )Dinov2ForImageClassificationii  r   r   Nc                 6  > [         TU ]  U5        UR                  U l        [        U5      U l        UR                  S:  a.  [
        R                  " UR                  S-  UR                  5      O[
        R                  " 5       U l	        U R                  5         g )Nr   r=   )r!   r"   
num_labelsrh  rR  r   r   r&   r)  
classifierrl  r   s     r5   r"   %Dinov2ForImageClassification.__init__p  sy      ++!&) EKDUDUXYDYBIIf((1,f.?.?@_a_j_j_l 	
 	r7   rZ   r   labelsr   r=  r>  c                    Ub  UOU R                   R                  nU R                  UUUUUS9nUS   nUSS2S4   n	USS2SS24   n
[        R                  " XR                  SS9/SS9nU R                  U5      nSnUGb  UR                  UR                  5      nU R                   R                  c  U R                  S:X  a  SU R                   l	        OoU R                  S:  aN  UR                  [        R                  :X  d  UR                  [        R                  :X  a  SU R                   l	        OSU R                   l	        U R                   R                  S:X  aI  [        5       nU R                  S:X  a&  U" UR                  5       UR                  5       5      nOU" X5      nOU R                   R                  S:X  a=  [!        5       nU" UR#                  S	U R                  5      UR#                  S	5      5      nO,U R                   R                  S:X  a  [%        5       nU" X5      nU(       d  U4US
S -   nUb  U4U-   $ U$ ['        UUUR(                  UR*                  S9$ )ab  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
Nry  r   r   rD   
regressionsingle_label_classificationmulti_label_classificationr<   r=   )losslogitsr   rG  )r   r{  rR  r$   rP   rT  r  rM   r   problem_typer  rC   longrn   r	   squeezer   rO   r   r   r   rG  )r3   rZ   r   r  r   r=  r>  r   r  r'   patch_tokenslinear_inputr  r  loss_fctr   s                   r5   re   $Dinov2ForImageClassification.forward~  s     &1%<k$++B]B]++/!5#  
 "!*#AqD)	&q!"u-yy)->->1->-E!FAN.YYv}}-F{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#F3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE$!//))	
 	
r7   )r  rR  r  r  )rh   ri   rj   rk   r   r"   r   r   r$   rm   r   r   r   r   re   ro   rp   rq   s   @r5   r  r  i  s    |    04,0)-,0/3&*D
u||,D
 ELL)D
 &	D

 $D>D
 'tnD
 d^D
 
u++	,D
 D
r7   r  zO
    Dinov2 backbone, to be used with frameworks like DETR and MaskFormer.
    c                      ^  \ rS rSrU 4S jrS\4S jr\   SS\R                  S\
\   S\
\   S\
\   S\4
S	 jj5       rS
rU =r$ )Dinov2Backbonei  c                 t  > [         TU ]  U5        [         TU ]	  U5        [        UR                  S-   5       Vs/ sH  o!R
                  PM     snU l        [        U5      U l        [        U5      U l
        [        R                  " UR
                  UR                  S9U l        U R                  5         g s  snf )Nr   r"  )r!   r"   _init_backboner7  r8  r&   num_featuresr   r8   r4  rj  r   r$  r%  rk  rl  r;  s      r5   r"   Dinov2Backbone.__init__  s     v&9>v?W?WZ[?[9\]9\A//9\]*62$V,f&8&8f>S>ST 	 ^s   B5r   c                 .    U R                   R                  $ rg   ro  r   s    r5   rp  #Dinov2Backbone.get_input_embeddings  rr  r7   rZ   r=  r   r>  c                    Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nU R	                  U5      nU R                  USX4S9nU(       a  UR                  OUS   nSn[        U R                  U5       H  u  pXR                  ;   d  M  U R                   R                  (       a  U R                  U
5      n
U R                   R                  (       aj  U
SS2SS24   n
UR                  u  ppU R                   R                  nU
R                  XU-  X-  S5      n
U
R!                  SSSS	5      R#                  5       n
X4-  nM     U(       d  U(       a  U4USS -   nU$ U4US	S -   nU$ [%        UU(       a  UR                  OSU(       a  UR&                  S
9$ SS
9$ )a  
Examples:

```python
>>> from transformers import AutoImageProcessor, AutoBackbone
>>> import torch
>>> from PIL import Image
>>> import requests

>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)

>>> processor = AutoImageProcessor.from_pretrained("facebook/dinov2-base")
>>> model = AutoBackbone.from_pretrained(
...     "facebook/dinov2-base", out_features=["stage2", "stage5", "stage8", "stage11"]
... )

>>> inputs = processor(image, return_tensors="pt")

>>> outputs = model(**inputs)
>>> feature_maps = outputs.feature_maps
>>> list(feature_maps[-1].shape)
[1, 768, 16, 16]
```NT)r=  r   r>  r   r@  r<   r   r
   r=   )feature_mapsr   rG  )r   r{  r=  r   r8   rj  r   zipstage_namesr  apply_layernormrk  reshape_hidden_statesrF   r2   rI   rJ   r   r   rG  )r3   rZ   r=  r   r>  r}  r   r   r  stager   rb   rc   r9   r:   r2   r   s                    r5   re   Dinov2Backbone.forward  s   @ &1%<k$++B]B]$8$D $++JjJj 	 2C1N-TXT_T_TqTq??<8,,4K\  
 2=--'!*#&t'7'7#GE)));;..#'>>,#?L;;44#/12#6L 4@3E3E0J6!%!7!7J#/#7#7
jDXZ_Zmoq#rL#/#7#71a#C#N#N#PL/ $H #&712;6 M '712;6M%3G'//T->w))
 	
 EI
 	
r7   )r8   rj  rk  r  )NNN)rh   ri   rj   rk   r"   r+   rp  r   r$   rm   r   r   r   re   ro   rp   rq   s   @r5   r  r    s{    0&; 0  04,0&*G
llG
 'tnG
 $D>	G

 d^G
 
G
 G
r7   r  )r  rh  rQ  r  )r   )r   F)>rl   collections.abcrz   typingr   r   r   r$   torch.utils.checkpointr   torch.nnr   r   r	   activationsr   modeling_layersr   modeling_outputsr   r   r   r   modeling_utilsr   r   pytorch_utilsr   r   utilsr   r   r   utils.backbone_utilsr   configuration_dinov2r   
get_loggerrh   r   Moduler   r+   rm   r   r   r   r   r   r   r   r   r   r  r  r   r4  rQ  rh  r  r  __all__r@  r7   r5   <module>r     s7     , ,    A A ! 9 r r F Q 7 7 1 . 
		H	%Nryy NbBII R %II%<<% 
% <<	%
 U\\*% % %>F")) FTryy &$bii $N+ryy +U\\ e T V[VbVb *%RYY %		 &(bii ("0, 0h(
BII (
V (DO (D (DV M
' M
 M
` T
#8 T
T
n 
Y
*M Y

Y
x er7   