
    <he              	          S r SSKrSSKJr  SSKJrJr  SSKrSSK	rSSKJ
r
  SSKJrJrJr  SSKJrJr  SS	KJrJrJr  SS
KJrJr  SSKJr  \R6                  " \5      r\\" SS9 " S S\5      5       5       rS:S\R>                  S\ S\!S\R>                  4S jjr" " S S\
RF                  5      r$ " S S\
RF                  5      r% " S S\
RF                  5      r& " S S\
RF                  5      r' " S S\
RF                  5      r( " S  S!\
RF                  5      r) " S" S#\
RF                  5      r* " S$ S%\
RF                  5      r+ " S& S'\
RF                  5      r, " S( S)\
RF                  5      r- " S* S+\
RF                  5      r. " S, S-\
RF                  5      r/ " S. S/\
RF                  5      r0 " S0 S1\
RF                  5      r1\ " S2 S3\5      5       r2\ " S4 S5\25      5       r3\" S6S9 " S7 S8\25      5       r4/ S9Qr5g);zPyTorch CvT model.    N)	dataclass)OptionalUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )$ImageClassifierOutputWithNoAttentionModelOutput)PreTrainedModel find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringlogging   )	CvtConfigzV
    Base class for model's outputs, with potential hidden states and attentions.
    )custom_introc                       \ rS rSr% SrSr\\R                     \	S'   Sr
\\R                     \	S'   Sr\\\R                  S4      \	S'   Srg)	BaseModelOutputWithCLSToken#   z
cls_token_value (`torch.FloatTensor` of shape `(batch_size, 1, hidden_size)`):
    Classification token at the output of the last layer of the model.
Nlast_hidden_statecls_token_value.hidden_states )__name__
__module____qualname____firstlineno____doc__r   r   torchFloatTensor__annotations__r   r   tuple__static_attributes__r       \/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/cvt/modeling_cvt.pyr   r   #   sS    
 6:x 1 12937OXe//07=AM8E%"3"3S"89:Ar&   r   input	drop_probtrainingreturnc                    US:X  d  U(       d  U $ SU-
  nU R                   S   4SU R                  S-
  -  -   nU[        R                  " X@R                  U R
                  S9-   nUR                  5         U R                  U5      U-  nU$ )a*  
Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
argument.
        r   r   )r   )dtypedevice)shapendimr!   randr.   r/   floor_div)r(   r)   r*   	keep_probr0   random_tensoroutputs          r'   	drop_pathr8   5   s     CxII[[^

Q 77E

5ELL YYMYYy!M1FMr&   c                      ^  \ rS rSrSrSS\\   SS4U 4S jjjrS\R                  S\R                  4S jr
S\4S	 jrS
rU =r$ )CvtDropPathJ   zXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nr)   r+   c                 .   > [         TU ]  5         Xl        g N)super__init__r)   )selfr)   	__class__s     r'   r?   CvtDropPath.__init__M   s    "r&   r   c                 B    [        XR                  U R                  5      $ r=   )r8   r)   r*   )r@   r   s     r'   forwardCvtDropPath.forwardQ   s    FFr&   c                      SU R                    3$ )Nzp=r)   )r@   s    r'   
extra_reprCvtDropPath.extra_reprT   s    DNN#$$r&   rG   r=   )r   r   r   r   r    r   floatr?   r!   TensorrD   strrH   r%   __classcell__rA   s   @r'   r:   r:   J   sQ    b#(5/ #T # #GU\\ Gell G%C % %r&   r:   c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )CvtEmbeddingsX   z
Construct the CvT embeddings.
c                 x   > [         TU ]  5         [        XX4US9U l        [        R
                  " U5      U l        g )N)
patch_sizenum_channels	embed_dimstridepadding)r>   r?   CvtConvEmbeddingsconvolution_embeddingsr   Dropoutdropout)r@   rS   rT   rU   rV   rW   dropout_raterA   s          r'   r?   CvtEmbeddings.__init__]   s5    &7!	jq'
# zz,/r&   c                 J    U R                  U5      nU R                  U5      nU$ r=   rY   r[   )r@   pixel_valueshidden_states      r'   rD   CvtEmbeddings.forwardd   s&    22<@||L1r&   r_   	r   r   r   r   r    r?   rD   r%   rM   rN   s   @r'   rP   rP   X   s    0 r&   rP   c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )rX   j   z
Image to Conv Embedding.
c                    > [         TU ]  5         [        U[        R                  R
                  5      (       a  UOX4nXl        [        R                  " X#XUS9U l	        [        R                  " U5      U l        g )N)kernel_sizerV   rW   )r>   r?   
isinstancecollectionsabcIterablerS   r   Conv2d
projection	LayerNormnormalization)r@   rS   rT   rU   rV   rW   rA   s         r'   r?   CvtConvEmbeddings.__init__o   sZ    #-j+//:R:R#S#SZZdYq
$))Llst\\)4r&   c                    U R                  U5      nUR                  u  p#pEXE-  nUR                  X#U5      R                  SSS5      nU R                  (       a  U R	                  U5      nUR                  SSS5      R                  X#XE5      nU$ Nr      r   )rm   r0   viewpermutero   )r@   r`   
batch_sizerT   heightwidthhidden_sizes          r'   rD   CvtConvEmbeddings.forwardv   s    |42>2D2D/
&n#((;OWWXY[\^_`--l;L#++Aq!499*TZbr&   )ro   rS   rm   rc   rN   s   @r'   rX   rX   j   s    5
 
r&   rX   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )CvtSelfAttentionConvProjection   c           
         > [         TU ]  5         [        R                  " UUUUUSUS9U l        [        R
                  " U5      U l        g )NF)rg   rW   rV   biasgroups)r>   r?   r   rl   convolutionBatchNorm2dro   )r@   rU   rg   rW   rV   rA   s        r'   r?   'CvtSelfAttentionConvProjection.__init__   sG    99#
  ^^I6r&   c                 J    U R                  U5      nU R                  U5      nU$ r=   r   ro   r@   ra   s     r'   rD   &CvtSelfAttentionConvProjection.forward   s(    ''5)),7r&   r   r   r   r   r   r?   rD   r%   rM   rN   s   @r'   r|   r|      s    7 r&   r|   c                       \ rS rSrS rSrg) CvtSelfAttentionLinearProjection   c                 r    UR                   u  p#pEXE-  nUR                  X#U5      R                  SSS5      nU$ rr   )r0   rt   ru   )r@   ra   rv   rT   rw   rx   ry   s          r'   rD   (CvtSelfAttentionLinearProjection.forward   sC    2>2D2D/
&n#((;OWWXY[\^_`r&   r   N)r   r   r   r   rD   r%   r   r&   r'   r   r      s    r&   r   c                   2   ^  \ rS rSrSU 4S jjrS rSrU =r$ )CvtSelfAttentionProjection   c                 n   > [         TU ]  5         US:X  a  [        XX45      U l        [	        5       U l        g )Ndw_bn)r>   r?   r|   convolution_projectionr   linear_projection)r@   rU   rg   rW   rV   projection_methodrA   s         r'   r?   #CvtSelfAttentionProjection.__init__   s1    '*Hah*qD'!A!Cr&   c                 J    U R                  U5      nU R                  U5      nU$ r=   r   r   r   s     r'   rD   "CvtSelfAttentionProjection.forward   s(    22<@--l;r&   r   )r   r   rN   s   @r'   r   r      s    D r&   r   c                   :   ^  \ rS rSr SU 4S jjrS rS rSrU =r$ )CvtSelfAttention   c                   > [         TU ]  5         US-  U l        Xl        X l        Xl        [        UUUUUS:X  a  SOUS9U l        [        X#XWUS9U l        [        X#XWUS9U l	        [        R                  " X"U	S9U l        [        R                  " X"U	S9U l        [        R                  " X"U	S9U l        [        R                  " U
5      U l        g )Ng      avglinear)r   )r   )r>   r?   scalewith_cls_tokenrU   	num_headsr   convolution_projection_queryconvolution_projection_keyconvolution_projection_valuer   Linearprojection_queryprojection_keyprojection_valuerZ   r[   )r@   r   rU   rg   	padding_q
padding_kvstride_q	stride_kvqkv_projection_methodqkv_biasattention_drop_rater   kwargsrA   s                r'   r?   CvtSelfAttention.__init__   s     	_
,"",F*?5*HhNc-
) +EJMb+
' -GJMb-
) !#		)X N ii	8L "		)X Nzz"56r&   c                     UR                   u  p#nU R                  U R                  -  nUR                  X#U R                  U5      R	                  SSSS5      $ )Nr   rs   r   r
   )r0   rU   r   rt   ru   )r@   ra   rv   ry   _head_dims         r'   "rearrange_for_multi_head_attention3CvtSelfAttention.rearrange_for_multi_head_attention   sR    %1%7%7"
>>T^^3  $..(S[[\]_`bcefggr&   c                 R   U R                   (       a  [        R                  " USX#-  /S5      u  pAUR                  u  pVnUR	                  SSS5      R                  XWX#5      nU R                  U5      nU R                  U5      n	U R                  U5      n
U R                   (       aC  [        R                  " WU	4SS9n	[        R                  " XH4SS9n[        R                  " XJ4SS9n
U R                  U R                  -  nU R                  U R                  U	5      5      n	U R                  U R                  U5      5      nU R                  U R                  U
5      5      n
[        R                   " SX/5      U R"                  -  n[        R$                  R&                  R)                  USS9nU R+                  U5      n[        R                   " SX/5      nUR                  u    pnUR	                  SSSS5      R-                  5       R                  XVU R                  U-  5      nU$ )	Nr   r   rs   dimzbhlk,bhtk->bhltzbhlt,bhtv->bhlvr
   )r   r!   splitr0   ru   rt   r   r   r   catrU   r   r   r   r   r   einsumr   r   
functionalsoftmaxr[   
contiguous)r@   ra   rw   rx   	cls_tokenrv   ry   rT   keyqueryvaluer   attention_scoreattention_probscontextr   s                   r'   rD   CvtSelfAttention.forward   s   &+kk,FN@SUV&W#I0<0B0B-
#++Aq!499*TZb--l;11,?11,?IIy%0a8E))Y,!4CIIy0a8E>>T^^3778M8Me8TU55d6I6I#6NO778M8Me8TU,,'85,G$**T((--55o25N,,7,,0?2JK&}}11//!Q1-88:??
Y]YgYgjrYrsr&   )r   r   r   r[   rU   r   r   r   r   r   r   T)	r   r   r   r   r?   r   rD   r%   rM   rN   s   @r'   r   r      s     '7Rh r&   r   c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )CvtSelfOutput   z
The residual connection is defined in CvtLayer instead of here (as is the case with other models), due to the
layernorm applied before each block.
c                    > [         TU ]  5         [        R                  " X5      U l        [        R
                  " U5      U l        g r=   )r>   r?   r   r   denserZ   r[   )r@   rU   	drop_raterA   s      r'   r?   CvtSelfOutput.__init__  s.    YYy4
zz),r&   c                 J    U R                  U5      nU R                  U5      nU$ r=   r   r[   r@   ra   input_tensors      r'   rD   CvtSelfOutput.forward	  s$    zz,/||L1r&   r   rc   rN   s   @r'   r   r      s    
-
 r&   r   c                   :   ^  \ rS rSr SU 4S jjrS rS rSrU =r$ )CvtAttentioni  c                    > [         TU ]  5         [        UUUUUUUUU	U
U5      U l        [	        X+5      U l        [        5       U l        g r=   )r>   r?   r   	attentionr   r7   setpruned_heads)r@   r   rU   rg   r   r   r   r   r   r   r   r   r   rA   s                r'   r?   CvtAttention.__init__  sU     	)!
 $I9Er&   c                 6   [        U5      S:X  a  g [        XR                  R                  U R                  R                  U R
                  5      u  p[        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l	        [        U R                  R                  USS9U R                  l        U R                  R                  [        U5      -
  U R                  l        U R                  R                  U R                  R                  -  U R                  l        U R
                  R                  U5      U l        g )Nr   r   r   )lenr   r   num_attention_headsattention_head_sizer   r   r   r   r   r7   r   all_head_sizeunion)r@   headsindexs      r'   prune_headsCvtAttention.prune_heads0  s   u:?7>>55t~~7Y7Y[_[l[l

  2$..2F2FN/0B0BEJ1$..2F2FN.t{{/@/@%QO .2^^-O-ORUV[R\-\*'+~~'I'IDNNLnLn'n$ --33E:r&   c                 L    U R                  XU5      nU R                  XA5      nU$ r=   )r   r7   )r@   ra   rw   rx   self_outputattention_outputs         r'   rD   CvtAttention.forwardB  s'    nn\5A;;{Ar&   )r   r7   r   r   )	r   r   r   r   r?   r   rD   r%   rM   rN   s   @r'   r   r     s     "@;$   r&   r   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )CvtIntermediateiH  c                    > [         TU ]  5         [        R                  " U[	        X-  5      5      U l        [        R                  " 5       U l        g r=   )r>   r?   r   r   intr   GELU
activation)r@   rU   	mlp_ratiorA   s      r'   r?   CvtIntermediate.__init__I  s5    YYy#i.C*DE
'')r&   c                 J    U R                  U5      nU R                  U5      nU$ r=   )r   r   r   s     r'   rD   CvtIntermediate.forwardN  s$    zz,/|4r&   )r   r   r   rN   s   @r'   r   r   H  s    $
 r&   r   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )	CvtOutputiT  c                    > [         TU ]  5         [        R                  " [	        X-  5      U5      U l        [        R                  " U5      U l        g r=   )r>   r?   r   r   r   r   rZ   r[   )r@   rU   r   r   rA   s       r'   r?   CvtOutput.__init__U  s8    YYs9#899E
zz),r&   c                 R    U R                  U5      nU R                  U5      nX-   nU$ r=   r   r   s      r'   rD   CvtOutput.forwardZ  s,    zz,/||L1#2r&   r   r   rN   s   @r'   r   r   T  s    -
 r&   r   c                   8   ^  \ rS rSrSr SU 4S jjrS rSrU =r$ )CvtLayeria  zZ
CvtLayer composed by attention layers, normalization and multi-layer perceptrons (mlps).
c                 X  > [         TU ]  5         [        UUUUUUUUU	U
UU5      U l        [	        X,5      U l        [        X,U5      U l        US:  a	  [        US9O[        R                  " 5       U l        [        R                  " U5      U l        [        R                  " U5      U l        g )Nr-   rG   )r>   r?   r   r   r   intermediater   r7   r:   r   Identityr8   rn   layernorm_beforelayernorm_after)r@   r   rU   rg   r   r   r   r   r   r   r   r   r   drop_path_rater   rA   s                  r'   r?   CvtLayer.__init__f  s    " 	%!
 ,IA	i@BPSVBV~>\^\g\g\i "Y 7!||I6r&   c                     U R                  U R                  U5      UU5      nUnU R                  U5      nXQ-   nU R                  U5      nU R	                  U5      nU R                  Xa5      nU R                  U5      nU$ r=   )r   r  r8   r  r   r7   )r@   ra   rw   rx   self_attention_outputr   layer_outputs          r'   rD   CvtLayer.forward  s     $!!,/!

 1>>*:; (6 ++L9((6 {{<>~~l3r&   )r   r8   r   r  r  r7   r   rc   rN   s   @r'   r   r   a  s    & %7N r&   r   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )CvtStagei  c                 ~  > [         TU ]  5         Xl        X l        U R                  R                  U R                     (       aH  [
        R                  " [        R                  " SSU R                  R                  S   5      5      U l        [        UR                  U R                     UR                  U R                     U R                  S:X  a  UR                  OUR                  U R                  S-
     UR                  U R                     UR                  U R                     UR                  U R                     S9U l        [        R"                  " SUR$                  U R                     UR&                  U   SS9 Vs/ sH  o3R)                  5       PM     nn[
        R*                  " [-        UR&                  U R                     5       Vs/ sGHQ  n[/        UR0                  U R                     UR                  U R                     UR2                  U R                     UR4                  U R                     UR6                  U R                     UR8                  U R                     UR:                  U R                     UR<                  U R                     UR>                  U R                     UR@                  U R                     UR                  U R                     X@R                     URB                  U R                     UR                  U R                     S9PGMT     sn6 U l"        g s  snf s  snf )Nr   r   r   )rS   rV   rT   rU   rW   r\   cpu)r/   )r   rU   rg   r   r   r   r   r   r   r   r   r  r   r   )#r>   r?   configstager   r   	Parameterr!   randnrU   rP   patch_sizespatch_striderT   patch_paddingr   	embeddinglinspacer  depthitem
Sequentialranger   r   
kernel_qkvr   r   r   r   r   r   r   r   layers)r@   r  r  xdrop_path_ratesr   rA   s         r'   r?   CvtStage.__init__  s   
;;  ,\\%++aDKK<Q<QRT<U*VWDN&))$**5&&tzz204

a,,VEUEUVZV`V`cdVdEe&&tzz2((4))$**5
 $nnQ0E0Edjj0QSYS_S_`eSfotu
uFFHu 	 
 mm$ v||DJJ78#" 9A! $..tzz:$..tzz: & 1 1$** =$..tzz:%00<$..tzz:#__TZZ8*0*F*Ftzz*R#__TZZ8(.(B(B4::(N$..tzz:#2::#>$..tzz:#)#3#3DJJ#?  9#
	

s   L5EL:c                 Z   S nU R                  U5      nUR                  u  p4pVUR                  X4XV-  5      R                  SSS5      nU R                  R
                  U R                     (       a3  U R
                  R                  USS5      n[        R                  " X!4SS9nU R                   H  nU" XU5      nUnM     U R                  R
                  U R                     (       a  [        R                  " USXV-  /S5      u  p!UR                  SSS5      R                  X4XV5      nX4$ )Nr   rs   r   r   r   )r  r0   rt   ru   r  r   r  expandr!   r   r  r   )	r@   ra   r   rv   rT   rw   rx   layerlayer_outputss	            r'   rD   CvtStage.forward  s   	~~l32>2D2D/
&#((6>RZZ[\^_abc;;  ,--j"bAI 99i%>AFL[[E!,>M(L ! ;;  ,&+kk,FN@SUV&W#I#++Aq!499*TZb&&r&   )r   r  r  r  r  r   rN   s   @r'   r
  r
    s    (
T' 'r&   r
  c                   2   ^  \ rS rSrU 4S jrSS jrSrU =r$ )
CvtEncoderi  c                    > [         TU ]  5         Xl        [        R                  " / 5      U l        [        [        UR                  5      5       H'  nU R
                  R                  [        X5      5        M)     g r=   )r>   r?   r  r   
ModuleListstagesr  r   r  appendr
  )r@   r  	stage_idxrA   s      r'   r?   CvtEncoder.__init__  sR    mmB's6<<01IKKx:; 2r&   c                     U(       a  SOS nUnS n[        U R                  5       H  u  pxU" U5      u  pVU(       d  M  XE4-   nM     U(       d  [        S XVU4 5       5      $ [        UUUS9$ )Nr   c              3   ,   #    U H  oc  M  Uv   M     g 7fr=   r   ).0vs     r'   	<genexpr>%CvtEncoder.forward.<locals>.<genexpr>  s     b$Pq$Ps   	r   r   r   )	enumerater(  r$   r   )	r@   r`   output_hidden_statesreturn_dictall_hidden_statesra   r   r   stage_modules	            r'   rD   CvtEncoder.forward  s|    "6BD#	!*4;;!7A&2<&@#L##$5$G! "8
 b\>O$Pbbb**%+
 	
r&   )r  r(  )FTr   rN   s   @r'   r%  r%    s    <
 
r&   r%  c                   4    \ rS rSr% \\S'   SrSrS/rS r	Sr
g)	CvtPreTrainedModeli  r  cvtr`   r   c                 p   [        U[        R                  [        R                  45      (       a  [        R                  R                  UR                  R                  SU R                  R                  S9UR                  l        UR                  b%  UR                  R                  R                  5         gg[        U[        R                  5      (       aJ  UR                  R                  R                  5         UR                  R                  R                  S5        g[        U[        5      (       a  U R                  R                  UR                      (       aW  [        R                  R                  UR                  R                  SU R                  R                  S9UR                  l        ggg)zInitialize the weightsr-   )meanstdNg      ?)rh   r   r   rl   inittrunc_normal_weightdatar  initializer_ranger   zero_rn   fill_r
  r   r  )r@   modules     r'   _init_weights CvtPreTrainedModel._init_weights  s&   fryy"))455!#!6!6v}}7I7IPSY]YdYdYvYv!6!wFMM{{&  &&( '--KK""$MM$$S))){{$$V\\2(*(=(=$$))9V9V )> )  % 3 *r&   r   N)r   r   r   r   r   r#   base_model_prefixmain_input_name_no_split_modulesrG  r%   r   r&   r'   r:  r:    s     $O#r&   r:  c                      ^  \ rS rSrS
U 4S jjrS r\   SS\\R                     S\\
   S\\
   S\\\4   4S jj5       rS	rU =r$ )CvtModeli  c                 p   > [         TU ]  U5        Xl        [        U5      U l        U R                  5         g)z^
add_pooling_layer (bool, *optional*, defaults to `True`):
    Whether to add a pooling layer
N)r>   r?   r  r%  encoder	post_init)r@   r  add_pooling_layerrA   s      r'   r?   CvtModel.__init__  s-    
 	 !&)r&   c                     UR                  5        H7  u  p#U R                  R                  U   R                  R	                  U5        M9     g)z
Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
class PreTrainedModel
N)itemsrO  r!  r   r   )r@   heads_to_pruner!  r   s       r'   _prune_headsCvtModel._prune_heads!  s<    
 +002LELLu%//;;EB 3r&   r`   r4  r5  r+   c                    Ub  UOU R                   R                  nUb  UOU R                   R                  nUc  [        S5      eU R	                  UUUS9nUS   nU(       d	  U4USS  -   $ [        UUR                  UR                  S9$ )Nz You have to specify pixel_valuesr4  r5  r   r   r2  )r  r4  use_return_dict
ValueErrorrO  r   r   r   )r@   r`   r4  r5  encoder_outputssequence_outputs         r'   rD   CvtModel.forward)  s     %9$D $++JjJj 	 &1%<k$++B]B]?@@,,!5# ' 

 *!,#%(;;;*-+;;)77
 	
r&   )r  rO  r   )NNN)r   r   r   r   r?   rV  r   r   r!   rK   boolr   r$   r   rD   r%   rM   rN   s   @r'   rM  rM    so    C  04/3&*	
u||,
 'tn
 d^	

 
u11	2
 
r&   rM  z
    Cvt Model transformer with an image classification head on top (a linear layer on top of the final hidden state of
    the [CLS] token) e.g. for ImageNet.
    c                      ^  \ rS rSrU 4S jr\    S
S\\R                     S\\R                     S\\	   S\\	   S\
\\4   4
S jj5       rS	rU =r$ )CvtForImageClassificationiI  c                   > [         TU ]  U5        UR                  U l        [        USS9U l        [
        R                  " UR                  S   5      U l        UR                  S:  a.  [
        R                  " UR                  S   UR                  5      O[
        R                  " 5       U l        U R                  5         g )NF)rQ  r   r   )r>   r?   
num_labelsrM  r;  r   rn   rU   	layernormr   r   
classifierrP  )r@   r  rA   s     r'   r?   "CvtForImageClassification.__init__P  s      ++Fe<f&6&6r&:; CIBSBSVWBWBIIf&&r*F,=,=>]_]h]h]j 	
 	r&   r`   labelsr4  r5  r+   c                 n   Ub  UOU R                   R                  nU R                  UUUS9nUS   nUS   nU R                   R                  S   (       a  U R	                  U5      nOEUR
                  u  ppUR                  XX-  5      R                  SSS5      nU R	                  U5      nUR                  SS9nU R                  U5      nSnUGb  U R                   R                  c  U R                   R                  S:X  a  SU R                   l
        OyU R                   R                  S:  aN  UR                  [        R                  :X  d  UR                  [        R                  :X  a  S	U R                   l
        OS
U R                   l
        U R                   R                  S:X  aS  [!        5       nU R                   R                  S:X  a&  U" UR#                  5       UR#                  5       5      nOU" X5      nOU R                   R                  S	:X  aG  [%        5       nU" UR                  SU R                   R                  5      UR                  S5      5      nO,U R                   R                  S
:X  a  ['        5       nU" X5      nU(       d  U4USS -   nUb  U4U-   $ U$ [)        XUR*                  S9$ )ab  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
NrY  r   r   r   rs   r   
regressionsingle_label_classificationmulti_label_classification)losslogitsr   )r  rZ  r;  r   rd  r0   rt   ru   r=  re  problem_typerc  r.   r!   longr   r	   squeezer   r   r   r   )r@   r`   rg  r4  r5  outputsr]  r   rv   rT   rw   rx   sequence_output_meanrm  rl  loss_fctr7   s                    r'   rD   !CvtForImageClassification.forward^  sQ    &1%<k$++B]B]((!5#  
 "!*AJ	;;  $"nnY7O6E6K6K3Jf-22:V^\ddefhiklmO"nn_=O.333:!56{{''/;;))Q./;DKK,[[++a/V\\UZZ5OSYS_S_chclclSl/LDKK,/KDKK,{{''<7"9;;))Q.#FNN$4fnn6FGD#F3D))-JJ+-B0F0F GUWY))-II,./Y,F)-)9TGf$EvE3\c\q\qrrr&   )re  r;  rd  rc  )NNNN)r   r   r   r   r?   r   r   r!   rK   r_  r   r$   r   rD   r%   rM   rN   s   @r'   ra  ra  I  s      04)-/3&*<su||,<s &<s 'tn	<s
 d^<s 
u::	;<s <sr&   ra  )ra  rM  r:  )r-   F)6r    collections.abcri   dataclassesr   typingr   r   r!   torch.utils.checkpointr   torch.nnr   r   r	   modeling_outputsr   r   modeling_utilsr   r   r   utilsr   r   configuration_cvtr   
get_loggerr   loggerr   rK   rJ   r_  r8   Moduler:   rP   rX   r|   r   r   r   r   r   r   r   r   r
  r%  r:  rM  ra  __all__r   r&   r'   <module>r     s     ! "    A A Q c c , ( 
		H	% 
B+ B BU\\ e T V[VbVb *%")) %BII $		 2RYY (ryy 
 
Nryy NbBII "6 299 6 r	bii 	
		 
?ryy ?D<'ryy <'~
 
8   , 0
! 0
 0
f Ls 2 LsLs^ Jr&   