
    <hd              	          S r SSKrSSKrSSKJr  SSKJrJr  SSKrSSK	J
s  Jr  SSKrSSKJ
r
  SSKJrJrJr  SSKJr  SS	KJrJr  SS
KJr  SSKJrJr  SSKJrJr  SSKJ r   \RB                  " \"5      r#S-S\RH                  S\%S\&S\RH                  4S jjr' " S S\
RP                  5      r) " S S\
RP                  5      r* " S S\
RP                  5      r+ " S S\
RP                  5      r, " S S\
RP                  5      r- " S S\
RP                  5      r. " S  S!\
RP                  5      r/ " S" S#\
RP                  5      r0\ " S$ S%\5      5       r1\ " S& S'\15      5       r2\" S(S)9 " S* S+\15      5       r3/ S,Qr4g).zPyTorch PVT model.    N)Iterable)OptionalUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BaseModelOutputImageClassifierOutput)PreTrainedModel) find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringlogging   )	PvtConfiginput	drop_probtrainingreturnc                    US:X  d  U(       d  U $ SU-
  nU R                   S   4SU R                  S-
  -  -   nU[        R                  " X@R                  U R
                  S9-   nUR                  5         U R                  U5      U-  nU$ )a*  
Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
argument.
        r   r   )r   )dtypedevice)shapendimtorchrandr   r   floor_div)r   r   r   	keep_probr   random_tensoroutputs          \/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/pvt/modeling_pvt.py	drop_pathr'   *   s     CxII[[^

Q 77E

5ELL YYMYYy!M1FM    c                      ^  \ rS rSrSrSS\\   SS4U 4S jjjrS\R                  S\R                  4S jr
S\4S	 jrS
rU =r$ )PvtDropPath?   zXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nr   r   c                 .   > [         TU ]  5         Xl        g N)super__init__r   )selfr   	__class__s     r&   r/   PvtDropPath.__init__B   s    "r(   hidden_statesc                 B    [        XR                  U R                  5      $ r-   )r'   r   r   r0   r3   s     r&   forwardPvtDropPath.forwardF   s    FFr(   c                      SU R                    3$ )Nzp=r   )r0   s    r&   
extra_reprPvtDropPath.extra_reprI   s    DNN#$$r(   r9   r-   )__name__
__module____qualname____firstlineno____doc__r   floatr/   r   Tensorr6   strr:   __static_attributes____classcell__r1   s   @r&   r*   r*   ?   sQ    b#(5/ #T # #GU\\ Gell G%C % %r(   r*   c                      ^  \ rS rSrSr SS\S\\\\   4   S\\\\   4   S\S\S\S	\	4U 4S
 jjjr
S\R                  S\S\S\R                  4S jrS\R                  S\\R                  \\4   4S jrSrU =r$ )PvtPatchEmbeddingsM   z
This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
`hidden_states` (patch embeddings) of shape `(batch_size, seq_length, hidden_size)` to be consumed by a
Transformer.
config
image_size
patch_sizestridenum_channelshidden_size	cls_tokenc                   > [         T	U ]  5         Xl        [        U[        R
                  R                  5      (       a  UOX"4n[        U[        R
                  R                  5      (       a  UOX34nUS   US   -  US   US   -  -  nX l        X0l        XPl	        Xl
        [        R                  " [        R                  " SU(       a  US-   OUU5      5      U l        U(       a,  [        R                  " [        R                   " SSU5      5      OS U l        [        R$                  " XVXCS9U l        [        R(                  " XaR*                  S9U l        [        R.                  " UR0                  S9U l        g )Nr   r   kernel_sizerM   eps)p)r.   r/   rJ   
isinstancecollectionsabcr   rK   rL   rN   num_patchesr   	Parameterr   randnposition_embeddingszerosrP   Conv2d
projection	LayerNormlayer_norm_eps
layer_normDropouthidden_dropout_probdropout)
r0   rJ   rK   rL   rM   rN   rO   rP   rZ   r1   s
            r&   r/   PvtPatchEmbeddings.__init__T   s    	#-j+//:R:R#S#SZZdYq
#-j+//:R:R#S#SZZdYq
!!}
15*Q-:VW=:XY$$(&#%<<KKi;?[+V$
  JSekk!Q&DEX\))L6e,,{8M8MNzzF$>$>?r(   
embeddingsheightwidthr   c                    X#-  n[         R                  R                  5       (       d<  X@R                  R                  U R                  R                  -  :X  a  U R
                  $ UR                  SX#S5      R                  SSSS5      n[        R                  " XU4SS9nUR                  SSX#-  5      R                  SSS5      nU$ )Nr   r   r
      bilinear)sizemode)
r   jit
is_tracingrJ   rK   r]   reshapepermuteFinterpolate)r0   rh   ri   rj   rZ   interpolated_embeddingss         r&   interpolate_pos_encoding+PvtPatchEmbeddings.interpolate_pos_encodingp   s    n yy##%%+9O9ORVR]R]RhRh9h*h+++''6"=EEaAqQ
"#--
%Wa"b"9"A"A!R"X"`"`abdegh"i&&r(   pixel_valuesc                 ~   UR                   u  p#pEX0R                  :w  a  [        S5      eU R                  U5      nUR                   Gt ptnUR	                  S5      R                  SS5      nU R                  U5      nU R                  b  U R                  R                  USS5      n	[        R                  " X4SS9nU R                  U R                  S S 2SS 24   XE5      n
[        R                  " U R                  S S 2S S24   U
4SS9n
OU R                  U R                  XE5      n
U R                  X-   5      nXU4$ )NzeMake sure that the channel dimension of the pixel values match with the one set in the configuration.rm   r   rl   dim)r   rN   
ValueErrorr`   flatten	transposerc   rP   expandr   catrx   r]   rf   )r0   rz   
batch_sizerN   ri   rj   patch_embed_rh   rP   r]   s              r&   r6   PvtPatchEmbeddings.forward{   s8   2>2D2D/
&,,,w  ool3'--E!))!,66q!<__[1
>>%--j"bAII#:BJ"&"?"?@X@XYZ\]\^Y^@_ag"o"'))T-E-Ea!e-LNa,bhi"j"&"?"?@X@XZ`"h\\*"BC
5((r(   )
rP   rJ   rf   rK   rc   rN   rZ   rL   r]   r`   F)r<   r=   r>   r?   r@   r   r   intr   boolr/   r   rB   rx   tupler6   rD   rE   rF   s   @r&   rH   rH   M   s      @@ #x},-@ #x},-	@
 @ @ @ @ @8	'5<< 	' 	'UX 	']b]i]i 	')ELL )U5<<c;Q5R ) )r(   rH   c                   n   ^  \ rS rSrS\S\4U 4S jjrS\R                  S\R                  4S jr	Sr
U =r$ )	PvtSelfOutput   rJ   rO   c                    > [         TU ]  5         [        R                  " X"5      U l        [        R
                  " UR                  5      U l        g r-   )r.   r/   r   Lineardenserd   re   rf   )r0   rJ   rO   r1   s      r&   r/   PvtSelfOutput.__init__   s4    YY{8
zz&"<"<=r(   r3   r   c                 J    U R                  U5      nU R                  U5      nU$ r-   r   rf   r5   s     r&   r6   PvtSelfOutput.forward   s$    

=1]3r(   r   )r<   r=   r>   r?   r   r   r/   r   rB   r6   rD   rE   rF   s   @r&   r   r      s6    >y >s >
U\\ ell  r(   r   c                      ^  \ rS rSrSrS\S\S\S\4U 4S jjrS\S	\	R                  4S
 jr SS\	R                  S\S\S\S	\\	R                     4
S jjrSrU =r$ )PvtEfficientSelfAttention   zxEfficient self-attention mechanism with reduction of the sequence [PvT paper](https://huggingface.co/papers/2102.12122).rJ   rO   num_attention_headssequences_reduction_ratioc                 ~  > [         TU ]  5         X l        X0l        U R                  U R                  -  S:w  a&  [	        SU R                   SU R                   S35      e[        U R                  U R                  -  5      U l        U R                  U R                  -  U l        [        R                  " U R                  U R                  UR                  S9U l        [        R                  " U R                  U R                  UR                  S9U l        [        R                  " U R                  U R                  UR                  S9U l        [        R                  " UR                  5      U l        X@l        US:  a>  [        R$                  " X"XDS9U l        [        R(                  " X!R*                  S9U l        g g )	Nr   zThe hidden size (z6) is not a multiple of the number of attention heads ())biasr   rR   rT   )r.   r/   rO   r   r~   r   attention_head_sizeall_head_sizer   r   qkv_biasquerykeyvaluerd   attention_probs_dropout_probrf   r   r_   sequence_reductionra   rb   rc   r0   rJ   rO   r   r   r1   s        r&   r/   "PvtEfficientSelfAttention.__init__   se    	&#6 d666!;#D$4$4#5 622316 
 $'t'7'7$:R:R'R#S !558P8PPYYt//1C1C&//Z
99T--t/A/AXYYt//1C1C&//Z
zz&"E"EF)B&$q(&(ii6O'D# !ll;<Q<QRDO	 )r(   r3   r   c                     UR                  5       S S U R                  U R                  4-   nUR                  U5      nUR	                  SSSS5      $ )Nrl   r   rm   r   r
   )ro   r   r   viewrt   )r0   r3   	new_shapes      r&   transpose_for_scores.PvtEfficientSelfAttention.transpose_for_scores   sT    !&&("-1I1I4KcKc0dd	%**95$$Q1a00r(   ri   rj   output_attentionsc                    U R                  U R                  U5      5      nU R                  S:  aw  UR                  u  pgnUR	                  SSS5      R                  XhX#5      nU R                  U5      nUR                  XhS5      R	                  SSS5      nU R                  U5      nU R                  U R                  U5      5      n	U R                  U R                  U5      5      n
[        R                  " XYR                  SS5      5      nU[        R                  " U R                  5      -  n[         R"                  R%                  USS9nU R'                  U5      n[        R                  " X5      nUR	                  SSSS5      R)                  5       nUR+                  5       S S U R,                  4-   nUR/                  U5      nU(       a  X4nU$ U4nU$ )Nr   r   rm   rl   r|   r
   )r   r   r   r   rt   rs   r   rc   r   r   r   matmulr   mathsqrtr   r   
functionalsoftmaxrf   
contiguousro   r   r   )r0   r3   ri   rj   r   query_layerr   seq_lenrN   	key_layervalue_layerattention_scoresattention_probscontext_layernew_context_layer_shapeoutputss                   r&   r6   !PvtEfficientSelfAttention.forward   s    //

=0IJ))A-0=0C0C-J)11!Q:BB:]ckM 33MBM)11*BOWWXY[\^_`M OOM:M--dhh}.EF	//

=0IJ !<<5H5HR5PQ+dii8P8P.QQ --//0@b/I ,,7_B%--aAq9DDF"/"4"4"6s";t?Q?Q>S"S%**+BC6G=2 O\M]r(   )r   r   rf   rO   r   rc   r   r   r   r   r   r   )r<   r=   r>   r?   r@   r   r   rA   r/   r   rB   r   r   r   r6   rD   rE   rF   s   @r&   r   r      s     CSS.1SHKShmS:1# 1%,, 1 #(*||* * 	*
  * 
u||	* *r(   r   c                      ^  \ rS rSrS\S\S\S\4U 4S jjrS r SS\	R                  S	\S
\S\S\\	R                     4
S jjrSrU =r$ )PvtAttention   rJ   rO   r   r   c                 ~   > [         TU ]  5         [        UUUUS9U l        [	        XS9U l        [        5       U l        g )N)rO   r   r   )rO   )r.   r/   r   r0   r   r%   setpruned_headsr   s        r&   r/   PvtAttention.__init__   s@     	-# 3&?	
	 $FDEr(   c                 6   [        U5      S:X  a  g [        XR                  R                  U R                  R                  U R
                  5      u  p[        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l	        [        U R                  R                  USS9U R                  l        U R                  R                  [        U5      -
  U R                  l        U R                  R                  U R                  R                  -  U R                  l        U R
                  R                  U5      U l        g )Nr   r   r|   )lenr   r0   r   r   r   r   r   r   r   r%   r   r   union)r0   headsindexs      r&   prune_headsPvtAttention.prune_heads   s   u:?79900$))2O2OQUQbQb

 -TYY__eD		*499==%@		,TYY__eD		.t{{/@/@%QO )-		(E(EE
(R		%"&))"?"?$))B_B_"_		 --33E:r(   r3   ri   rj   r   r   c                 d    U R                  XX45      nU R                  US   5      nU4USS  -   nU$ )Nr   r   )r0   r%   )r0   r3   ri   rj   r   self_outputsattention_outputr   s           r&   r6   PvtAttention.forward  s@     yyQ;;|A7#%QR(88r(   )r%   r   r0   r   )r<   r=   r>   r?   r   r   rA   r/   r   r   rB   r   r   r6   rD   rE   rF   s   @r&   r   r      ss    "".1"HK"hm";& _d"\\36?BW[	u||	 r(   r   c            
          ^  \ rS rSr  SS\S\S\\   S\\   4U 4S jjjrS\R                  S\R                  4S	 jr
S
rU =r$ )PvtFFNi  rJ   in_featureshidden_featuresout_featuresc                 x  > [         TU ]  5         Ub  UOUn[        R                  " X#5      U l        [        UR                  [        5      (       a  [        UR                     U l	        OUR                  U l	        [        R                  " X45      U l
        [        R                  " UR                  5      U l        g r-   )r.   r/   r   r   dense1rW   
hidden_actrC   r   intermediate_act_fndense2rd   re   rf   )r0   rJ   r   r   r   r1   s        r&   r/   PvtFFN.__init__  s     	'3'?|[ii=f''--'-f.?.?'@D$'-'8'8D$ii>zz&"<"<=r(   r3   r   c                     U R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU$ r-   )r   r   rf   r   r5   s     r&   r6   PvtFFN.forward+  sP    M200?]3M2]3r(   )r   r   rf   r   )NN)r<   r=   r>   r?   r   r   r   r/   r   rB   r6   rD   rE   rF   s   @r&   r   r     sc    
 *.&*>> > "#	>
 sm> >"U\\ ell  r(   r   c                   v   ^  \ rS rSrS\S\S\S\S\S\4U 4S jjrSS	\R                  S
\S\S\
4S jjrSrU =r$ )PvtLayeri4  rJ   rO   r   r'   r   	mlp_ratioc                 ^  > [         TU ]  5         [        R                  " X!R                  S9U l        [        UUUUS9U l        US:  a  [        U5      O[        R                  " 5       U l
        [        R                  " X!R                  S9U l        [        X&-  5      n[        XUS9U l        g )NrT   )rJ   rO   r   r   r   )rJ   r   r   )r.   r/   r   ra   rb   layer_norm_1r   	attentionr*   Identityr'   layer_norm_2r   r   mlp)	r0   rJ   rO   r   r'   r   r   mlp_hidden_sizer1   s	           r&   r/   PvtLayer.__init__5  s     	LL:O:OP%# 3&?	
 4=s?Y/LL:O:OPk56Rabr(   r3   ri   rj   r   c                     U R                  U R                  U5      UUUS9nUS   nUSS  nU R                  U5      nXa-   nU R                  U R	                  U5      5      nU R                  U5      nX-   n	U	4U-   nU$ )N)r3   ri   rj   r   r   r   )r   r   r'   r   r   )
r0   r3   ri   rj   r   self_attention_outputsr   r   
mlp_outputlayer_outputs
             r&   r6   PvtLayer.forwardK  s    !%++M:/	 "0 "
 2!4(,>>*:;(8XXd//>?
^^J/
$1/G+r(   )r   r'   r   r   r   r   )r<   r=   r>   r?   r   r   rA   r/   r   rB   r   r6   rD   rE   rF   s   @r&   r   r   4  st    cc c !	c
 c $)c c,U\\ 3 s _c  r(   r   c                      ^  \ rS rSrS\4U 4S jjr   SS\R                  S\\	   S\\	   S\\	   S\
\\4   4
S	 jjrS
rU =r$ )
PvtEncoderib  rJ   c                   > [         T	U ]  5         Xl        [        R                  " SUR
                  [        UR                  5      SS9R                  5       n/ n[        UR                  5       H  nUR                  [        UUS:X  a  UR                  OU R                  R                  SUS-   -  -  UR                  U   UR                  U   US:X  a  UR                   OUR"                  US-
     UR"                  U   XAR                  S-
  :H  S95        M     [$        R&                  " U5      U l        / nSn[        UR                  5       H  n/ nUS:w  a  XaR                  US-
     -  n[        UR                  U   5       HY  nUR                  [+        UUR"                  U   UR,                  U   X&U-      UR.                  U   UR0                  U   S95        M[     UR                  [$        R&                  " U5      5        M     [$        R&                  " U5      U l        [$        R4                  " UR"                  S   UR6                  S	9U l        g )
Nr   cpu)r   rm   r   )rJ   rK   rL   rM   rN   rO   rP   )rJ   rO   r   r'   r   r   rl   rT   )r.   r/   rJ   r   linspacedrop_path_ratesumdepthstolistrangenum_encoder_blocksappendrH   rK   patch_sizesstridesrN   hidden_sizesr   
ModuleListpatch_embeddingsr   r   sequence_reduction_ratios
mlp_ratiosblockra   rb   rc   )
r0   rJ   drop_path_decaysrh   iblockscurlayersjr1   s
            r&   r/   PvtEncoder.__init__c  s    !>>!V-B-BCDV_delln 
v001A"!45Fv00@V@V[\abefaf[g@h%11!4!>>!,89Q!4!4FDWDWXY\]X]D^ & 3 3A 6#<#<q#@@
 2 !#j 9 v001AFAv}}QU++6==+,%$*$7$7$:,2,F,Fq,I"27";282R2RST2U"("3"3A"6	 - MM"--/0! 2$ ]]6*
 ,,v':':2'>FDYDYZr(   rz   r   output_hidden_statesreturn_dictr   c                 d   U(       a  SOS nU(       a  SOS nUR                   S   n[        U R                  5      nUn	[        [	        U R
                  U R                  5      5       H  u  n
u  pU" U	5      u  pnU H/  nU" XX5      nUS   n	U(       a	  UUS   4-   nU(       d  M*  XY4-   nM1     XS-
  :w  d  MR  U	R                  X}US5      R                  SSSS5      R                  5       n	M     U R                  U	5      n	U(       a  XY4-   nU(       d  [        S XU4 5       5      $ [        U	UUS9$ )	N r   r   rl   r
   rm   c              3   ,   #    U H  oc  M  Uv   M     g 7fr-   r  ).0vs     r&   	<genexpr>%PvtEncoder.forward.<locals>.<genexpr>  s     m$[q$[s   	last_hidden_stater3   
attentions)r   r   r  	enumeratezipr   rs   rt   r   rc   r   r   )r0   rz   r   r	  r
  all_hidden_statesall_self_attentionsr   
num_blocksr3   idxembedding_layerblock_layerri   rj   r  layer_outputss                    r&   r6   PvtEncoder.forward  sK    #7BD$5b4!''*
_
$3<SAVAVX\XbXb=c3d/C//+:=+I(M5$ %mU V -a 0$*=qAQ@S*S'''(9<L(L% % 1n$ - 5 5j%QS T \ \]^`acdfg h s s u 4e 6 14D Dm]GZ$[mmm++*
 	
r(   )r  rJ   rc   r   )FFT)r<   r=   r>   r?   r   r/   r   FloatTensorr   r   r   r   r   r6   rD   rE   rF   s   @r&   r   r   b  ss    0[y 0[j -2/4&*#
''#
 $D>#
 'tn	#

 d^#
 
uo%	&#
 #
r(   r   c                   R    \ rS rSr% \\S'   SrSr/ rS\	R                  SS4S jrS	rg)
PvtPreTrainedModeli  rJ   pvtrz   moduler   Nc                 |   U R                   R                  n[        U[        R                  [        R
                  45      (       af  [        R                  R                  UR                  R                  SUS9  UR                  b%  UR                  R                  R                  5         gg[        U[        R                  5      (       aJ  UR                  R                  R                  5         UR                  R                  R                  S5        g[        U[        5      (       a  [        R                  R                  UR                  R                  SUS9UR                  l	        UR                   bC  [        R                  R                  UR                   R                  SUS9UR                   l	        ggg)zInitialize the weightsr   )meanstdNg      ?)rJ   initializer_rangerW   r   r   r_   inittrunc_normal_weightdatar   zero_ra   fill_rH   r]   rP   )r0   r#  r&  s      r&   _init_weights PvtPreTrainedModel._init_weights  sI   kk++fryy"))455 GG!!&--"4"43C!H{{&  &&( '--KK""$MM$$S) 233.0gg.C.C**// /D /F&&+
 +(*(=(=$$)) )> )  % , 4r(   r  )r<   r=   r>   r?   r   __annotations__base_model_prefixmain_input_name_no_split_modulesr   Moduler.  rD   r  r(   r&   r!  r!    s0    $OBII $ r(   r!  c                      ^  \ rS rSrS\4U 4S jjrS r\   SS\R                  S\
\   S\
\   S\
\   S	\\\4   4
S
 jj5       rSrU =r$ )PvtModeli  rJ   c                 p   > [         TU ]  U5        Xl        [        U5      U l        U R                  5         g r-   )r.   r/   rJ   r   encoder	post_initr0   rJ   r1   s     r&   r/   PvtModel.__init__  s/      "&) 	r(   c                     UR                  5        H7  u  p#U R                  R                  U   R                  R	                  U5        M9     g)z
Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
class PreTrainedModel
N)itemsr8  layerr   r   )r0   heads_to_pruner>  r   s       r&   _prune_headsPvtModel._prune_heads  s<    
 +002LELLu%//;;EB 3r(   rz   r   r	  r
  r   c                 0   Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nU R	                  UUUUS9nUS   nU(       d	  U4USS  -   $ [        UUR                  UR                  S9$ )Nrz   r   r	  r
  r   r   r  )rJ   r   r	  use_return_dictr8  r   r3   r  )r0   rz   r   r	  r
  encoder_outputssequence_outputs          r&   r6   PvtModel.forward  s     2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B],,%/!5#	 ' 
 *!,#%(;;;-)77&11
 	
r(   )rJ   r8  )NNN)r<   r=   r>   r?   r   r/   r@  r   r   r  r   r   r   r   r   r6   rD   rE   rF   s   @r&   r6  r6    s    y C  -1/3&*
''
 $D>
 'tn	

 d^
 
uo%	&
 
r(   r6  z
    Pvt Model transformer with an image classification head on top (a linear layer on top of the final hidden state of
    the [CLS] token) e.g. for ImageNet.
    )custom_introc                      ^  \ rS rSrS\SS4U 4S jjr\    SS\\R                     S\\R                     S\\
   S	\\
   S
\\
   S\\\4   4S jj5       rSrU =r$ )PvtForImageClassificationi  rJ   r   Nc                 6  > [         TU ]  U5        UR                  U l        [        U5      U l        UR                  S:  a.  [
        R                  " UR                  S   UR                  5      O[
        R                  " 5       U l	        U R                  5         g )Nr   rl   )r.   r/   
num_labelsr6  r"  r   r   r   r   
classifierr9  r:  s     r&   r/   "PvtForImageClassification.__init__  sy      ++F# FLEVEVYZEZBIIf))"-v/@/@A`b`k`k`m 	
 	r(   rz   labelsr   r	  r
  c                 :   Ub  UOU R                   R                  nU R                  UUUUS9nUS   nU R                  USS2SSS24   5      nSn	UGb  U R                   R                  c  U R
                  S:X  a  SU R                   l        OoU R
                  S:  aN  UR                  [        R                  :X  d  UR                  [        R                  :X  a  SU R                   l        OSU R                   l        U R                   R                  S:X  aI  [        5       n
U R
                  S:X  a&  U
" UR                  5       UR                  5       5      n	OU
" X5      n	OU R                   R                  S:X  a=  [        5       n
U
" UR                  SU R
                  5      UR                  S5      5      n	O,U R                   R                  S:X  a  [        5       n
U
" X5      n	U(       d  U4USS -   nU	b  U	4U-   $ U$ [        U	UUR                   UR"                  S	9$ )
ab  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
NrC  r   r   
regressionsingle_label_classificationmulti_label_classificationrl   )losslogitsr3   r  )rJ   rD  r"  rM  problem_typerL  r   r   longr   r	   squeezer   r   r   r   r3   r  )r0   rz   rO  r   r	  r
  r   rF  rU  rT  loss_fctr%   s               r&   r6   !PvtForImageClassification.forward%  s    &1%<k$++B]B]((%/!5#	  
 "!*Aq!9:{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#F3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE$!//))	
 	
r(   )rM  rL  r"  )NNNN)r<   r=   r>   r?   r   r/   r   r   r   rB   r   r   r   r   r6   rD   rE   rF   s   @r&   rJ  rJ    s    y T   *.,0/3&*;
u||,;
 &;
 $D>	;

 'tn;
 d^;
 
u++	,;
 ;
r(   rJ  )rJ  r6  r!  )r   F)5r@   rX   r   collections.abcr   typingr   r   r   torch.nn.functionalr   r   ru   torch.utils.checkpointtorch.nnr   r   r	   activationsr   modeling_outputsr   r   modeling_utilsr   pytorch_utilsr   r   utilsr   r   configuration_pvtr   
get_loggerr<   loggerrB   rA   r   r'   r4  r*   rH   r   r   r   r   r   r   r!  r6  rJ  __all__r  r(   r&   <module>ri     sq  "    $ "      A A ! F - Q , ( 
		H	%U\\ e T V[VbVb *%")) %A) A)H	BII 	O		 Od'299 'TRYY 6+ryy +\V
 V
r   @ 0
! 0
 0
f K
 2 K
K
\ Jr(   