
    <h{              	          S r SSKrSSKJr  SSKJrJr  SSKrSSKrSSKJ	r	  SSK
JrJrJr  SSKJr  SS	KJr  SS
KJr  SSKJrJr  SSKJrJrJrJrJrJr  SSKJr  SSK J!r!  \" 5       (       a	  SSK"J#r#J$r$  OS r$S r#\RJ                  " \&5      r'\\" SS9 " S S\5      5       5       r(\\" SS9 " S S\5      5       5       r)\\" SS9 " S S\5      5       5       r* " S S\	RV                  5      r, " S S \	RV                  5      r- " S! S"\	RV                  5      r.SES#\R^                  S$\0S%\1S&\R^                  4S' jjr2 " S( S)\	RV                  5      r3 " S* S+\	RV                  5      r4 " S, S-\	RV                  5      r5 " S. S/\	RV                  5      r6 " S0 S1\	RV                  5      r7 " S2 S3\	RV                  5      r8 " S4 S5\	RV                  5      r9 " S6 S7\	RV                  5      r: " S8 S9\	RV                  5      r;\ " S: S;\5      5       r<\ " S< S=\<5      5       r=\" S>S9 " S? S@\<5      5       r>\" SAS9 " SB SC\<\5      5       r?/ SDQr@g)Fz9PyTorch Dilated Neighborhood Attention Transformer model.    N)	dataclass)OptionalUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BackboneOutput)PreTrainedModel) find_pruneable_heads_and_indicesprune_linear_layer)ModelOutputOptionalDependencyNotAvailableauto_docstringis_natten_availableloggingrequires_backends)BackboneMixin   )DinatConfig)
natten2davnatten2dqkrpbc                      [        5       eNr   argskwargss     `/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/dinat/modeling_dinat.pyr   r   .       ,..    c                      [        5       er   r   r   s     r!   r   r   1   r"   r#   zO
    Dinat encoder's outputs, with potential hidden states and attentions.
    )custom_introc                       \ rS rSr% SrSr\\R                     \	S'   Sr
\\\R                  S4      \	S'   Sr\\\R                  S4      \	S'   Sr\\\R                  S4      \	S'   S	rg)
DinatEncoderOutput;   a  
reshaped_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
    Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
    shape `(batch_size, hidden_size, height, width)`.

    Hidden-states of the model at the output of each layer plus the initial embedding outputs reshaped to
    include the spatial dimensions.
Nlast_hidden_state.hidden_states
attentionsreshaped_hidden_states )__name__
__module____qualname____firstlineno____doc__r)   r   torchFloatTensor__annotations__r*   tupler+   r,   __static_attributes__r-   r#   r!   r'   r'   ;   s}     6:x 1 129=AM8E%"3"3S"89:A:>Ju00#567>FJHU5+<+<c+A%BCJr#   r'   zW
    Dinat model's outputs that also contains a pooling of the last hidden states.
    c                       \ rS rSr% SrSr\\R                     \	S'   Sr
\\R                     \	S'   Sr\\\R                  S4      \	S'   Sr\\\R                  S4      \	S'   Sr\\\R                  S4      \	S	'   S
rg)DinatModelOutputQ   a  
pooler_output (`torch.FloatTensor` of shape `(batch_size, hidden_size)`, *optional*, returned when `add_pooling_layer=True` is passed):
    Average pooling of the last layer hidden-state.
reshaped_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
    Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
    shape `(batch_size, hidden_size, height, width)`.

    Hidden-states of the model at the output of each layer plus the initial embedding outputs reshaped to
    include the spatial dimensions.
Nr)   pooler_output.r*   r+   r,   r-   )r.   r/   r0   r1   r2   r)   r   r3   r4   r5   r;   r*   r6   r+   r,   r7   r-   r#   r!   r9   r9   Q   s    	 6:x 1 12915M8E--.5=AM8E%"3"3S"89:A:>Ju00#567>FJHU5+<+<c+A%BCJr#   r9   z1
    Dinat outputs for image classification.
    c                       \ rS rSr% SrSr\\R                     \	S'   Sr
\\R                     \	S'   Sr\\\R                  S4      \	S'   Sr\\\R                  S4      \	S'   Sr\\\R                  S4      \	S	'   S
rg)DinatImageClassifierOutputj   a  
loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
    Classification (or regression if config.num_labels==1) loss.
logits (`torch.FloatTensor` of shape `(batch_size, config.num_labels)`):
    Classification (or regression if config.num_labels==1) scores (before SoftMax).
reshaped_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
    Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
    shape `(batch_size, hidden_size, height, width)`.

    Hidden-states of the model at the output of each layer plus the initial embedding outputs reshaped to
    include the spatial dimensions.
Nlosslogits.r*   r+   r,   r-   )r.   r/   r0   r1   r2   r?   r   r3   r4   r5   r@   r*   r6   r+   r,   r7   r-   r#   r!   r=   r=   j   s     )-D(5$$
%,*.FHU&&'.=AM8E%"3"3S"89:A:>Ju00#567>FJHU5+<+<c+A%BCJr#   r=   c                   r   ^  \ rS rSrSrU 4S jrS\\R                     S\	\R                     4S jrSrU =r$ )DinatEmbeddings   z.
Construct the patch and position embeddings.
c                    > [         TU ]  5         [        U5      U l        [        R
                  " UR                  5      U l        [        R                  " UR                  5      U l
        g r   )super__init__DinatPatchEmbeddingspatch_embeddingsr   	LayerNorm	embed_dimnormDropouthidden_dropout_probdropoutselfconfig	__class__s     r!   rF   DinatEmbeddings.__init__   sG     4V <LL!1!12	zz&"<"<=r#   pixel_valuesreturnc                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ r   )rH   rK   rN   )rP   rT   
embeddingss      r!   forwardDinatEmbeddings.forward   s4    **<8
YYz*
\\*-
r#   )rN   rK   rH   )r.   r/   r0   r1   r2   rF   r   r3   r4   r6   TensorrX   r7   __classcell__rR   s   @r!   rB   rB      s9    >HU->->$? E%,,DW  r#   rB   c                   l   ^  \ rS rSrSrU 4S jrS\\R                     S\R                  4S jr
SrU =r$ )rG      z
This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
`hidden_states` (patch embeddings) of shape `(batch_size, height, width, hidden_size)` to be consumed by a
Transformer.
c                 H  > [         TU ]  5         UR                  nUR                  UR                  pCX0l        US:X  a  O[        S5      e[        R                  " [        R                  " U R                  US-  SSSS9[        R                  " US-  USSSS95      U l	        g )N   z2Dinat only supports patch size of 4 at the moment.   r
   r
   ra   ra   r   r   )kernel_sizestridepadding)
rE   rF   
patch_sizenum_channelsrJ   
ValueErrorr   
SequentialConv2d
projection)rP   rQ   rh   ri   hidden_sizerR   s        r!   rF   DinatPatchEmbeddings.__init__   s    &&
$*$7$79I9Ik(? QRR--IId'')9vV\flmIIkQ&PV`fg
r#   rT   rU   c                     UR                   u  p#pEX0R                  :w  a  [        S5      eU R                  U5      nUR	                  SSSS5      nU$ )NzeMake sure that the channel dimension of the pixel values match with the one set in the configuration.r   ra   r
   r   )shaperi   rj   rm   permute)rP   rT   _ri   heightwidthrW   s          r!   rX   DinatPatchEmbeddings.forward   sZ    )5););&,,,w  __\2
''1a3
r#   )ri   rm   )r.   r/   r0   r1   r2   rF   r   r3   r4   rZ   rX   r7   r[   r\   s   @r!   rG   rG      s4    
"	HU->->$? 	ELL 	 	r#   rG   c                      ^  \ rS rSrSr\R                  4S\S\R                  SS4U 4S jjjr	S\
R                  S\
R                  4S	 jrS
rU =r$ )DinatDownsampler   z
Convolutional Downsampling Layer.

Args:
    dim (`int`):
        Number of input channels.
    norm_layer (`nn.Module`, *optional*, defaults to `nn.LayerNorm`):
        Normalization layer class.
dim
norm_layerrU   Nc           	         > [         TU ]  5         Xl        [        R                  " USU-  SSSSS9U l        U" SU-  5      U l        g )Nra   rb   rc   rd   F)re   rf   rg   bias)rE   rF   rz   r   rl   	reductionrK   )rP   rz   r{   rR   s      r!   rF   DinatDownsampler.__init__   sC    3CVF\binoq3w'	r#   input_featurec                     U R                  UR                  SSSS5      5      R                  SSSS5      nU R                  U5      nU$ )Nr   r
   r   ra   )r~   rr   rK   )rP   r   s     r!   rX   DinatDownsampler.forward   sJ    }'<'<Q1a'HIQQRSUVXY[\]		-0r#   )rz   rK   r~   )r.   r/   r0   r1   r2   r   rI   intModulerF   r3   rZ   rX   r7   r[   r\   s   @r!   rx   rx      sT     :< (C (RYY ($ ( (U\\ ell  r#   rx   input	drop_probtrainingrU   c                    US:X  d  U(       d  U $ SU-
  nU R                   S   4SU R                  S-
  -  -   nU[        R                  " X@R                  U R
                  S9-   nUR                  5         U R                  U5      U-  nU$ )a*  
Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
argument.
        r   r   )r   )dtypedevice)rq   ndimr3   randr   r   floor_div)r   r   r   	keep_probrq   random_tensoroutputs          r!   	drop_pathr      s     CxII[[^

Q 77E

5ELL YYMYYy!M1FMr#   c                      ^  \ rS rSrSrSS\\   SS4U 4S jjjrS\R                  S\R                  4S jr
S\4S	 jrS
rU =r$ )DinatDropPath   zXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nr   rU   c                 .   > [         TU ]  5         Xl        g r   )rE   rF   r   )rP   r   rR   s     r!   rF   DinatDropPath.__init__   s    "r#   r*   c                 B    [        XR                  U R                  5      $ r   )r   r   r   rP   r*   s     r!   rX   DinatDropPath.forward   s    FFr#   c                      SU R                    3$ )Nzp=r   rP   s    r!   
extra_reprDinatDropPath.extra_repr   s    DNN#$$r#   r   r   )r.   r/   r0   r1   r2   r   floatrF   r3   rZ   rX   strr   r7   r[   r\   s   @r!   r   r      sQ    b#(5/ #T # #GU\\ Gell G%C % %r#   r   c                   x   ^  \ rS rSrU 4S jr SS\R                  S\\   S\	\R                     4S jjr
SrU =r$ )	NeighborhoodAttention   c                   > [         TU ]  5         X#-  S:w  a  [        SU SU S35      eX0l        [	        X#-  5      U l        U R                  U R
                  -  U l        X@l        XPl        [        R                  " [        R                  " USU R                  -  S-
  SU R                  -  S-
  5      5      U l        [        R                  " U R                  U R                  UR                  S9U l        [        R                  " U R                  U R                  UR                  S9U l        [        R                  " U R                  U R                  UR                  S9U l        [        R&                  " UR(                  5      U l        g )Nr   zThe hidden size (z6) is not a multiple of the number of attention heads ()ra   r   )r}   )rE   rF   rj   num_attention_headsr   attention_head_sizeall_head_sizere   dilationr   	Parameterr3   zerosrpbLinearqkv_biasquerykeyvaluerL   attention_probs_dropout_probrN   rP   rQ   rz   	num_headsre   r   rR   s         r!   rF   NeighborhoodAttention.__init__   s:   ?a#C5(^_h^iijk  $- #&s#7 !558P8PP&  <<ID<L<L8Lq8PTUX\XhXhThklTl noYYt1143E3EFOO\
99T//1C1C&//ZYYt1143E3EFOO\
zz&"E"EFr#   r*   output_attentionsrU   c                    UR                   u  p4nU R                  U5      R                  USU R                  U R                  5      R                  SS5      nU R                  U5      R                  USU R                  U R                  5      R                  SS5      nU R                  U5      R                  USU R                  U R                  5      R                  SS5      nU[        R                  " U R                  5      -  n[        XgU R                  U R                  U R                  5      n	[        R                  R!                  U	SS9n
U R#                  U
5      n
[%        XU R                  U R                  5      nUR'                  SSSSS5      R)                  5       nUR+                  5       S S U R,                  4-   nUR                  U5      nU(       a  X4nU$ U4nU$ )	Nr   ra   rz   r   r
   r`   )rq   r   viewr   r   	transposer   r   mathsqrtr   r   re   r   r   
functionalsoftmaxrN   r   rr   
contiguoussizer   )rP   r*   r   
batch_size
seq_lengthrs   query_layer	key_layervalue_layerattention_scoresattention_probscontext_layernew_context_layer_shapeoutputss                 r!   rX   NeighborhoodAttention.forward  s   
 %2$7$7!
JJ}%T*b$":":D<T<TUYq!_ 	 HH]#T*b$":":D<T<TUYq!_ 	 JJ}%T*b$":":D<T<TUYq!_ 	 "DIId.F.F$GG )4K[K[]a]j]jk --//0@b/I ,,7"?AQAQSWS`S`a%--aAq!<GGI"/"4"4"6s";t?Q?Q>S"S%**+BC6G=2 O\M]r#   )
r   r   r   rN   re   r   r   r   r   r   Fr.   r/   r0   r1   rF   r3   rZ   r   boolr6   rX   r7   r[   r\   s   @r!   r   r      sE    G2 -2,||, $D>, 
u||		, ,r#   r   c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )NeighborhoodAttentionOutputi@  c                    > [         TU ]  5         [        R                  " X"5      U l        [        R
                  " UR                  5      U l        g r   )rE   rF   r   r   denserL   r   rN   rP   rQ   rz   rR   s      r!   rF   $NeighborhoodAttentionOutput.__init__A  s4    YYs(
zz&"E"EFr#   r*   input_tensorrU   c                 J    U R                  U5      nU R                  U5      nU$ r   r   rN   )rP   r*   r   s      r!   rX   #NeighborhoodAttentionOutput.forwardF  s$    

=1]3r#   r   
r.   r/   r0   r1   rF   r3   rZ   rX   r7   r[   r\   s   @r!   r   r   @  s7    G
U\\  RWR^R^  r#   r   c                   ~   ^  \ rS rSrU 4S jrS r S	S\R                  S\\	   S\
\R                     4S jjrSrU =r$ )
NeighborhoodAttentionModuleiM  c                    > [         TU ]  5         [        XX4U5      U l        [	        X5      U l        [        5       U l        g r   )rE   rF   r   rP   r   r   setpruned_headsr   s         r!   rF   $NeighborhoodAttentionModule.__init__N  s4    )&yxX	1&>Er#   c                 6   [        U5      S:X  a  g [        XR                  R                  U R                  R                  U R
                  5      u  p[        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l	        [        U R                  R                  USS9U R                  l        U R                  R                  [        U5      -
  U R                  l        U R                  R                  U R                  R                  -  U R                  l        U R
                  R                  U5      U l        g )Nr   r   r   )lenr   rP   r   r   r   r   r   r   r   r   r   r   union)rP   headsindexs      r!   prune_heads'NeighborhoodAttentionModule.prune_headsT  s   u:?79900$))2O2OQUQbQb

 -TYY__eD		*499==%@		,TYY__eD		.t{{/@/@%QO )-		(E(EE
(R		%"&))"?"?$))B_B_"_		 --33E:r#   r*   r   rU   c                 d    U R                  X5      nU R                  US   U5      nU4USS  -   nU$ Nr   r   )rP   r   )rP   r*   r   self_outputsattention_outputr   s         r!   rX   #NeighborhoodAttentionModule.forwardf  s@    
 yyB;;|AF#%QR(88r#   )r   r   rP   r   )r.   r/   r0   r1   rF   r   r3   rZ   r   r   r6   rX   r7   r[   r\   s   @r!   r   r   M  sI    ";* -2|| $D> 
u||		 r#   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )DinatIntermediateiq  c                   > [         TU ]  5         [        R                  " U[	        UR
                  U-  5      5      U l        [        UR                  [        5      (       a  [        UR                     U l        g UR                  U l        g r   )rE   rF   r   r   r   	mlp_ratior   
isinstance
hidden_actr   r   intermediate_act_fnr   s      r!   rF   DinatIntermediate.__init__r  sd    YYsC(8(83(>$?@
f''--'-f.?.?'@D$'-'8'8D$r#   r*   rU   c                 J    U R                  U5      nU R                  U5      nU$ r   r   r   r   s     r!   rX   DinatIntermediate.forwardz  s&    

=100?r#   r   r   r\   s   @r!   r   r   q  s(    9U\\ ell  r#   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )DinatOutputi  c                    > [         TU ]  5         [        R                  " [	        UR
                  U-  5      U5      U l        [        R                  " UR                  5      U l	        g r   )
rE   rF   r   r   r   r   r   rL   rM   rN   r   s      r!   rF   DinatOutput.__init__  sF    YYs6#3#3c#9:C@
zz&"<"<=r#   r*   rU   c                 J    U R                  U5      nU R                  U5      nU$ r   r   r   s     r!   rX   DinatOutput.forward  s$    

=1]3r#   r   r   r\   s   @r!   r   r     s(    >
U\\ ell  r#   r   c            	          ^  \ rS rSrS	U 4S jjrS r S
S\R                  S\\	   S\
\R                  \R                  4   4S jjrSrU =r$ )
DinatLayeri  c                   > [         TU ]  5         UR                  U l        UR                  U l        X@l        U R                  U R                  -  U l        [        R                  " X!R                  S9U l	        [        XX0R                  U R                  S9U l        US:  a  [        U5      O[        R                  " 5       U l        [        R                  " X!R                  S9U l        [!        X5      U l        [%        X5      U l        UR(                  S:  a>  [        R*                  " UR(                  [,        R.                  " SU45      -  SS9U l        g S U l        g )Neps)re   r   r   r   ra   T)requires_grad)rE   rF   chunk_size_feed_forwardre   r   window_sizer   rI   layer_norm_epslayernorm_beforer   	attentionr   Identityr   layernorm_afterr   intermediater   r   layer_scale_init_valuer   r3   oneslayer_scale_parameters)rP   rQ   rz   r   r   drop_path_raterR   s         r!   rF   DinatLayer.__init__  s   '-'E'E$!-- ++dmm; "S6K6K L40@0@4==
 ;I3:N~6TVT_T_Ta!||C5J5JK-f:!&. ,,q0 LL66QH9MM]ab 	#  	#r#   c                     U R                   nSnX$:  d  X4:  aD  S=pg[        SXC-
  5      n[        SXB-
  5      n	SSXhXy4n[        R                  R	                  X5      nX4$ )N)r   r   r   r   r   r   r   )r  maxr   r   pad)
rP   r*   rt   ru   r  
pad_valuespad_lpad_tpad_rpad_bs
             r!   	maybe_padDinatLayer.maybe_pad  sn    &&'
5#6E;./E;/0EQe;JMM--mHM((r#   r*   r   rU   c                    UR                  5       u  p4pVUnU R                  U5      nU R                  XU5      u  pUR                  u  ppU R	                  XS9nUS   nUS   S:  =(       d    US   S:  nU(       a  US S 2S U2S U2S S 24   R                  5       nU R                  b  U R                  S   U-  nXpR                  U5      -   nU R                  U5      nU R                  U R                  U5      5      nU R                  b  U R                  S   U-  nXR                  U5      -   nU(       a  XS   4nU$ U4nU$ )N)r   r   r
      r   )r   r
  r  rq   r  r   r  r   r  r   r  )rP   r*   r   r   rt   ru   channelsshortcutr  rs   
height_pad	width_padattention_outputsr   
was_paddedlayer_outputlayer_outputss                    r!   rX   DinatLayer.forward  sf   
 /<.@.@.B+
E --m<$(NN=%$P!&3&9&9#y NN=N^,Q/]Q&;*Q-!*;
/7F7FUFA0EFQQS&&2#::1=@PP >>2B#CC++M:{{4#4#4\#BC&&266q9LHL$~~l'CC@Q';< YeWfr#   )r  r  r   r   r  re   r  r  r
  r   r  )r   r   )r.   r/   r0   r1   rF   r  r3   rZ   r   r   r6   rX   r7   r[   r\   s   @r!   r  r    sR    
(	) -2$||$ $D>$ 
u||U\\)	*	$ $r#   r  c                   x   ^  \ rS rSrU 4S jr SS\R                  S\\   S\	\R                     4S jjr
SrU =r$ )	
DinatStagei  c                 "  > [         T	U ]  5         Xl        X l        [        R
                  " [        U5       Vs/ sH  n[        UUUXX   Xh   S9PM     sn5      U l        Ub  U" U[        R                  S9U l
        OS U l
        SU l        g s  snf )N)rQ   rz   r   r   r  )rz   r{   F)rE   rF   rQ   rz   r   
ModuleListranger  layersrI   
downsamplepointing)
rP   rQ   rz   depthr   	dilationsr  r/  irR   s
            r!   rF   DinatStage.__init__  s    mm u	 &A !'&\#1#4 &	
 !(SR\\JDO"DO%	s   Br*   r   rU   c                     UR                  5       u  p4pS[        U R                  5       H  u  pgU" X5      nUS   nM     Un	U R                  b  U R                  U	5      nX4n
U(       a  U
WSS  -  n
U
$ r   )r   	enumerater.  r/  )rP   r*   r   rs   rt   ru   r3  layer_moduler'  !hidden_states_before_downsamplingstage_outputss              r!   rX   DinatStage.forward  s    
 ,0025(5OA(JM)!,M  6 -:)??& OO,MNM&J]12..Mr#   )rQ   rz   r/  r.  r0  r   r   r\   s   @r!   r*  r*    sD    8 -2|| $D> 
u||		 r#   r*  c                      ^  \ rS rSrU 4S jr    SS\R                  S\\   S\\   S\\   S\\   S\	\
\4   4S	 jjrS
rU =r$ )DinatEncoderi  c                   > [         TU ]  5         [        UR                  5      U l        Xl        [        R                  " SUR                  [        UR                  5      SS9 Vs/ sH  o"R                  5       PM     nn[        R                  " [        U R                  5       Vs/ sH  n[        U[        UR                   SU-  -  5      UR                  U   UR"                  U   UR$                  U   U[        UR                  S U 5      [        UR                  S US-    5       X@R                  S-
  :  a  [&        OS S9PM     sn5      U l        g s  snf s  snf )Nr   cpu)r   ra   r   )rQ   rz   r1  r   r2  r  r/  )rE   rF   r   depths
num_levelsrQ   r3   linspacer  sumitemr   r,  r-  r*  r   rJ   r   r2  rx   levels)rP   rQ   xdpri_layerrR   s        r!   rF   DinatEncoder.__init__  s0   fmm,!&63H3H#fmmJ\ej!kl!kAvvx!klmm  %T__5  6G !F,,q'z9: --0$..w7$..w7#&s6=='+B'Cc&--XeZadeZeJfFg#h4;ooPQ>Q4Q/X\  6
 ms   &E'B"Er*   r   output_hidden_states(output_hidden_states_before_downsamplingreturn_dictrU   c                    U(       a  SOS nU(       a  SOS nU(       a  SOS nU(       a  UR                  SSSS5      n	Xa4-  nXy4-  n[        U R                  5       H  u  pU" X5      nUS   nUS   nU(       a&  U(       a  UR                  SSSS5      n	Xm4-  nXy4-  nO,U(       a%  U(       d  UR                  SSSS5      n	Xa4-  nXy4-  nU(       d  My  XSS  -  nM     U(       d  [        S XU4 5       5      $ [	        UUUUS9$ )Nr-   r   r
   r   ra   c              3   ,   #    U H  oc  M  Uv   M     g 7fr   r-   ).0vs     r!   	<genexpr>'DinatEncoder.forward.<locals>.<genexpr>>  s     m$[q$[s   	)r)   r*   r+   r,   )rr   r6  rD  r6   r'   )rP   r*   r   rI  rJ  rK  all_hidden_statesall_reshaped_hidden_statesall_self_attentionsreshaped_hidden_stater3  r7  r'  r8  s                 r!   rX   DinatEncoder.forward  sA    #7BD+?RT"$5b4$1$9$9!Q1$E!!11&*BB&(5OA(JM)!,M0=a0@-#(P(I(Q(QRSUVXY[\(]%!%II!*.FF*%.V(5(=(=aAq(I%!%55!*.FF*  #QR'88#%  6( m]GZ$[mmm!++*#=	
 	
r#   )rQ   rD  r@  )FFFT)r.   r/   r0   r1   rF   r3   rZ   r   r   r   r6   r'   rX   r7   r[   r\   s   @r!   r<  r<    sy    
. -2/4CH&*.
||.
 $D>.
 'tn	.

 3;4..
 d^.
 
u((	).
 .
r#   r<  c                   .    \ rS rSr% \\S'   SrSrS rSr	g)DinatPreTrainedModeliH  rQ   dinatrT   c                 
   [        U[        R                  [        R                  45      (       ak  UR                  R
                  R                  SU R                  R                  S9  UR                  b%  UR                  R
                  R                  5         gg[        U[        R                  5      (       aJ  UR                  R
                  R                  5         UR                  R
                  R                  S5        gg)zInitialize the weightsr   )meanstdNg      ?)r   r   r   rl   weightdatanormal_rQ   initializer_ranger}   zero_rI   fill_)rP   modules     r!   _init_weights"DinatPreTrainedModel._init_weightsN  s    fryy"))455 MM&&CT[[5R5R&S{{&  &&( '--KK""$MM$$S) .r#   r-   N)
r.   r/   r0   r1   r   r5   base_model_prefixmain_input_namerd  r7   r-   r#   r!   rX  rX  H  s    $O
*r#   rX  c                      ^  \ rS rSrSU 4S jjrS rS r\    SS\\	R                     S\\   S\\   S\\   S	\\\4   4
S
 jj5       rSrU =r$ )
DinatModeli[  c                   > [         TU ]  U5        [        U S/5        Xl        [	        UR
                  5      U l        [        UR                  SU R                  S-
  -  -  5      U l	        [        U5      U l        [        U5      U l        [        R                  " U R                  UR                   S9U l        U(       a  [        R$                  " S5      OSU l        U R)                  5         g)z^
add_pooling_layer (bool, *optional*, defaults to `True`):
    Whether to add a pooling layer
nattenra   r   r  N)rE   rF   r   rQ   r   r?  r@  r   rJ   num_featuresrB   rW   r<  encoderr   rI   r	  	layernormAdaptiveAvgPool1dpooler	post_init)rP   rQ   add_pooling_layerrR   s      r!   rF   DinatModel.__init__]  s    
 	 $
+fmm, 0 0119L3M MN)&1#F+d&7&7V=R=RS1Bb**1- 	r#   c                 .    U R                   R                  $ r   rW   rH   r   s    r!   get_input_embeddingsDinatModel.get_input_embeddingss      ///r#   c                     UR                  5        H7  u  p#U R                  R                  U   R                  R	                  U5        M9     g)z
Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
class PreTrainedModel
N)itemsrm  layerr  r   )rP   heads_to_pruner{  r   s       r!   _prune_headsDinatModel._prune_headsv  s<    
 +002LELLu%//;;EB 3r#   rT   r   rI  rK  rU   c                 Z   Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nUc  [	        S5      eU R                  U5      nU R                  UUUUS9nUS   nU R                  U5      nS nU R                  bH  U R                  UR                  SS5      R                  SS5      5      n[        R                  " US5      nU(       d  Xx4USS  -   n	U	$ [        UUUR                  UR                  UR                  S9$ )Nz You have to specify pixel_valuesr   rI  rK  r   r   ra   )r)   r;   r*   r+   r,   )rQ   r   rI  use_return_dictrj   rW   rm  rn  rp  flattenr   r3   r9   r*   r+   r,   )
rP   rT   r   rI  rK  embedding_outputencoder_outputssequence_outputpooled_outputr   s
             r!   rX   DinatModel.forward~  s?    2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B]?@@??<8,,/!5#	 ' 
 *!,..9;;" KK(?(?1(E(O(OPQST(UVM!MM-;M%58KKFM-')77&11#2#I#I
 	
r#   )rQ   rW   rm  rn  rl  r@  rp  )T)NNNN)r.   r/   r0   r1   rF   rv  r}  r   r   r3   r4   r   r   r6   r9   rX   r7   r[   r\   s   @r!   ri  ri  [  s    ,0C  59,0/3&*,
u001,
 $D>,
 'tn	,

 d^,
 
u&&	',
 ,
r#   ri  z
    Dinat Model transformer with an image classification head on top (a linear layer on top of the final hidden state
    of the [CLS] token) e.g. for ImageNet.
    c                      ^  \ rS rSrU 4S jr\     SS\\R                     S\\R                     S\\
   S\\
   S\\
   S\\\4   4S	 jj5       rS
rU =r$ )DinatForImageClassificationi  c                 ^  > [         TU ]  U5        [        U S/5        UR                  U l        [	        U5      U l        UR                  S:  a5  [        R                  " U R
                  R                  UR                  5      O[        R                  " 5       U l
        U R                  5         g )Nrk  r   )rE   rF   r   
num_labelsri  rY  r   r   rl  r  
classifierrq  rO   s     r!   rF   $DinatForImageClassification.__init__  s     $
+ ++'
 FLEVEVYZEZBIIdjj--v/@/@A`b`k`k`m 	
 	r#   rT   labelsr   rI  rK  rU   c                 <   Ub  UOU R                   R                  nU R                  UUUUS9nUS   nU R                  U5      nSn	UGb  U R                   R                  c  U R
                  S:X  a  SU R                   l        OoU R
                  S:  aN  UR                  [        R                  :X  d  UR                  [        R                  :X  a  SU R                   l        OSU R                   l        U R                   R                  S:X  aI  [        5       n
U R
                  S:X  a&  U
" UR                  5       UR                  5       5      n	OU
" X5      n	OU R                   R                  S:X  a=  [        5       n
U
" UR                  SU R
                  5      UR                  S5      5      n	O,U R                   R                  S:X  a  [        5       n
U
" X5      n	U(       d  U4USS -   nU	b  U	4U-   $ U$ [        U	UUR                   UR"                  UR$                  S	9$ )
ab  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
Nr  r   
regressionsingle_label_classificationmulti_label_classificationr   ra   )r?   r@   r*   r+   r,   )rQ   r  rY  r  problem_typer  r   r3   longr   r	   squeezer   r   r   r=   r*   r+   r,   )rP   rT   r  r   rI  rK  r   r  r@   r?   loss_fctr   s               r!   rX   #DinatForImageClassification.forward  s    &1%<k$++B]B]**/!5#	  
  
/{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#F3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE)!//))#*#A#A
 	
r#   )r  rY  r  )NNNNN)r.   r/   r0   r1   rF   r   r   r3   r4   
LongTensorr   r   r6   r=   rX   r7   r[   r\   s   @r!   r  r    s       59-1,0/3&*<
u001<
 ))*<
 $D>	<

 'tn<
 d^<
 
u00	1<
 <
r#   r  zL
    NAT backbone, to be used with frameworks like DETR and MaskFormer.
    c                      ^  \ rS rSrU 4S jrS r\   SS\R                  S\	\
   S\	\
   S\	\
   S\4
S	 jj5       rS
rU =r$ )DinatBackbonei  c           	      6  > [         TU ]  U5        [         TU ]	  U5        [        U S/5        [	        U5      U l        [        U5      U l        UR                  /[        [        UR                  5      5       Vs/ sH  n[        UR                  SU-  -  5      PM      sn-   U l        0 n[        U R                  U R                   5       H  u  pE["        R$                  " U5      X4'   M     ["        R&                  " U5      U l        U R+                  5         g s  snf )Nrk  ra   )rE   rF   _init_backboner   rB   rW   r<  rm  rJ   r-  r   r?  r   rl  zip_out_featuresr   r   rI   
ModuleDicthidden_states_normsrq  )rP   rQ   r3  r  stageri   rR   s         r!   rF   DinatBackbone.__init__  s     v&$
+)&1#F+#--.X]^abhbobo^pXq1rXqST#f6F6FA6M2NXq1rr !#&t'9'94==#IE)+l)C& $J#%==1D#E  	 2ss   9$Dc                 .    U R                   R                  $ r   ru  r   s    r!   rv  "DinatBackbone.get_input_embeddings  rx  r#   rT   rI  r   rK  rU   c                 $   Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nU R	                  U5      nU R                  UUSSSS9nUR                  nSn[        U R                  U5       H  u  pXR                  ;   d  M  U
R                  u  ppU
R                  SSSS5      R                  5       n
U
R                  XU-  U5      n
U R                  U	   " U
5      n
U
R                  XX5      n
U
R                  SSSS5      R                  5       n
X4-  nM     U(       d  U4nU(       a  XR                  4-  nU$ [!        UU(       a  UR                  OSUR"                  S	9$ )
a  
Examples:

```python
>>> from transformers import AutoImageProcessor, AutoBackbone
>>> import torch
>>> from PIL import Image
>>> import requests

>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)

>>> processor = AutoImageProcessor.from_pretrained("shi-labs/nat-mini-in1k-224")
>>> model = AutoBackbone.from_pretrained(
...     "shi-labs/nat-mini-in1k-224", out_features=["stage1", "stage2", "stage3", "stage4"]
... )

>>> inputs = processor(image, return_tensors="pt")

>>> outputs = model(**inputs)

>>> feature_maps = outputs.feature_maps
>>> list(feature_maps[-1].shape)
[1, 512, 7, 7]
```NT)r   rI  rJ  rK  r-   r   ra   r
   r   )feature_mapsr*   r+   )rQ   r  rI  r   rW   rm  r,   r  stage_namesout_featuresrq   rr   r   r   r  r*   r   r+   )rP   rT   rI  r   rK  r  r   r*   r  r  hidden_stater   ri   rt   ru   r   s                   r!   rX   DinatBackbone.forward!  s   B &1%<k$++B]B]$8$D $++JjJj 	 2C1N-TXT_T_TqTq??<8,,/!%59  
  66#&t'7'7#GE))):F:L:L7
&+33Aq!Q?JJL+00e^\Z#77>|L+00UY+33Aq!Q?JJL/ $H "_F#0022M%3G'//T))
 	
r#   )rW   rm  r  rl  )NNN)r.   r/   r0   r1   rF   rv  r   r3   rZ   r   r   r   rX   r7   r[   r\   s   @r!   r  r    ss    &0  04,0&*G
llG
 'tnG
 $D>	G

 d^G
 
G
 G
r#   r  )r  ri  rX  r  )r   F)Ar2   r   dataclassesr   typingr   r   r3   torch.utils.checkpointr   torch.nnr   r   r	   activationsr   modeling_outputsr   modeling_utilsr   pytorch_utilsr   r   utilsr   r   r   r   r   r   utils.backbone_utilsr   configuration_dinatr   natten.functionalr   r   
get_loggerr.   loggerr'   r9   r=   r   rB   rG   rx   rZ   r   r   r   r   r   r   r   r   r   r  r*  r<  rX  ri  r  r  __all__r-   r#   r!   <module>r     s   @  ! "    A A ! . - Q  2 , ;;// 
		H	% 
K K K  
K{ K K& 
K K K*bii ,!299 !Hryy 0U\\ e T V[VbVb *%BII %CBII CL
")) 
!")) !H		 	")) 	D DN, ,^C
299 C
L *? * *$ O
% O
 O
d N
"6 N
N
b 
_
(- _

_
D ar#   