
    <h8                        S r SSKJrJr  SSKrSSKJr  SSKJr  SSKJ	r	  SSK
Jr  SS	KJr  SS
KJr  SSKJr   " S S\R$                  5      r " S S\R$                  5      r " S S\R$                  5      r " S S\R$                  5      r " S S\R$                  5      r\ " S S\5      5       r\" SS9 " S S\5      5       rSS/rg)zrPyTorch UperNet model. Based on OpenMMLab's implementation, found in https://github.com/open-mmlab/mmsegmentation.    )OptionalUnionN)nn)CrossEntropyLoss   )SemanticSegmenterOutput)PreTrainedModel)auto_docstring)load_backbone   )UperNetConfigc                      ^  \ rS rSrSr   SS\S\S\\\\\4   4   S\\\\\4   \4   S\	S\\\\\4   4   S	S
4U 4S jjjr
S\R                  S	\R                  4S jrSrU =r$ )UperNetConvModule   z
A convolutional block that bundles conv/norm/activation layers. This block simplifies the usage of convolution
layers, which are commonly used with a norm layer (e.g., BatchNorm) and activation layer (e.g., ReLU).
in_channelsout_channelskernel_sizepaddingbiasdilationreturnNc           	         > [         TU ]  5         [        R                  " UUUUUUS9U l        [        R
                  " U5      U l        [        R                  " 5       U l        g )N)r   r   r   r   r   r   )	super__init__r   Conv2dconvBatchNorm2d
batch_normReLU
activation)selfr   r   r   r   r   r   	__class__s          d/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/upernet/modeling_upernet.pyr   UperNetConvModule.__init__$   sQ     	II#%#
	 ..6'')    inputc                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ N)r   r   r    )r!   r&   outputs      r#   forwardUperNetConvModule.forward9   s1    5!((r%   )r    r   r   )r   Fr   )__name__
__module____qualname____firstlineno____doc__intr   tuplestrboolr   torchTensorr*   __static_attributes____classcell__r"   s   @r#   r   r      s     5601$$ $ 3c3h/0	$
 sE#s(OS01$ $ U38_,-$ 
$ $*U\\ ell  r%   r   c                   v   ^  \ rS rSrS\S\S\SS4U 4S jjrS\R                  S\R                  4S	 jrS
r	U =r
$ )UperNetPyramidPoolingBlockA   
pool_scaler   channelsr   Nc                    > [         TU ]  5         [        R                  " U5      [	        X#SS9/U l        [        U R
                  5       H   u  pEU R                  [        U5      U5        M"     g )Nr   r   )	r   r   r   AdaptiveAvgPool2dr   layers	enumerate
add_moduler3   )r!   r=   r   r>   ilayerr"   s         r#   r   #UperNetPyramidPoolingBlock.__init__B   sX      ,kC
 "$++.HAOOCFE* /r%   r&   c                 @    UnU R                    H  nU" U5      nM     U$ r(   rB   )r!   r&   hidden_staterF   s       r#   r*   "UperNetPyramidPoolingBlock.forwardK   s%    [[E .L !r%   rI   )r,   r-   r.   r/   r1   r   r5   r6   r*   r7   r8   r9   s   @r#   r;   r;   A   sD    +3 +S +C +D +U\\ ell  r%   r;   c            
          ^  \ rS rSrSrS\\S4   S\S\S\SS	4
U 4S
 jjrS\	R                  S\\	R                     4S jrSrU =r$ )UperNetPyramidPoolingModuleR   aQ  
Pyramid Pooling Module (PPM) used in PSPNet.

Args:
    pool_scales (`tuple[int]`):
        Pooling scales used in Pooling Pyramid Module.
    in_channels (`int`):
        Input channels.
    channels (`int`):
        Channels after modules, before conv_seg.
    align_corners (`bool`):
        align_corners argument of F.interpolate.
pool_scales.r   r>   align_cornersr   Nc                   > [         TU ]  5         Xl        X@l        X l        X0l        / U l        [        U5       HE  u  pV[        XbUS9nU R                  R                  U5        U R                  [        U5      U5        MG     g )N)r=   r   r>   )r   r   rO   rP   r   r>   blocksrC   r;   appendrD   r3   )	r!   rO   r   r>   rP   rE   r=   blockr"   s	           r#   r   $UperNetPyramidPoolingModule.__init__a   sn    &*& &{3MA.*hpqEKKu%OOCFE* 4r%   xc                     / nU R                    HV  nU" U5      n[        R                  R                  XAR	                  5       SS  SU R
                  S9nUR                  U5        MX     U$ )N   bilinearsizemoderP   )rR   r   
functionalinterpolater[   rP   rS   )r!   rV   ppm_outsppmppm_outupsampled_ppm_outs         r#   r*   #UperNetPyramidPoolingModule.forwardm   sg    ;;C!fG " 9 9ffhqrl4K]K] !: ! OO-.  r%   )rP   rR   r>   r   rO   )r,   r-   r.   r/   r0   r2   r1   r4   r   r5   r6   listr*   r7   r8   r9   s   @r#   rM   rM   R   s`    
+E#s(O 
+# 
+QT 
+ei 
+nr 
+ $u||*<  r%   rM   c                   l   ^  \ rS rSrSrU 4S jrS rS\R                  S\R                  4S jr	Sr
U =r$ )	UperNetHeadx   z
Unified Perceptual Parsing for Scene Understanding. This head is the implementation of
[UPerNet](https://huggingface.co/papers/1807.10221).
c                   > [         TU ]  5         Xl        UR                  U l        X l        UR
                  U l        SU l        [        R                  " U R                  UR                  SS9U l        [        U R                  U R                  S   U R                  U R                  S9U l        [        U R                  S   [        U R                  5      U R                  -  -   U R                  SSS9U l        [        R"                  " 5       U l        [        R"                  " 5       U l        U R                  S S  Hm  n[        X R                  SS9n[        U R                  U R                  SSS9nU R$                  R)                  U5        U R&                  R)                  U5        Mo     [        [        U R                  5      U R                  -  U R                  SSS9U l        g )NFr   r@   )rP   r   r   r   )r   r   configrO   r   hidden_sizer>   rP   r   r   
num_labels
classifierrM   psp_modulesr   len
bottleneck
ModuleListlateral_convs	fpn_convsrS   fpn_bottleneck)r!   rk   r   l_convfpn_convr"   s        r#   r   UperNetHead.__init__~   s   !--&**"))DMM63D3DRST 7R MM,,	
 ,R 3t'7'7#84==#HHMM	
  ]]_++CR0K&{MMqQF(ST^_`H%%f-NN!!(+	 1 0  !DMM1MM	
r%   c                     US   nU/nUR                  U R                  U5      5        [        R                  " USS9nU R	                  U5      nU$ )Nri   r   dim)extendro   r5   catrq   )r!   inputsrV   psp_outsr)   s        r#   psp_forwardUperNetHead.psp_forward   sL    2J3((+,99X1-*r%   encoder_hidden_statesr   c           	      <   [        U R                  5       VVs/ sH  u  p#U" X   5      PM     nnnUR                  U R                  U5      5        [	        U5      n[        US-
  SS5       HP  nXBS-
     R                  SS  nXBS-
     [        R                  R                  XB   USU R                  S9-   XBS-
  '   MR     [        US-
  5       Vs/ sH  o R                  U   " XB   5      PM     nnUR                  US   5        [        US-
  SS5       HA  n[        R                  R                  Xr   US   R                  SS  SU R                  S9Xr'   MC     [        R                  " USS9nU R                  U5      nU R                  U5      nU$ s  snnf s  snf )Nr   r   ri   rX   rY   rZ   rz   )rC   rs   rS   r   rp   rangeshaper   r]   r^   rP   rt   r5   r}   ru   rn   )	r!   r   rE   lateral_convlateralsused_backbone_levels
prev_shapefpn_outsr)   s	            r#   r*   UperNetHead.forward   s   R[\`\n\nRopRoqL!6!9:Rop(()>?@  #8}+a/B7A!a%..qr2J&1uo0I0I*:TM_M_ 1J 1 HUO 8 =BBVYZBZ<[\<[qNN1%hk2<[\%+a/B7A--33(1+"3"3AB"7jX\XjXj 4 HK 8 99X1-$$X.(3 q ]s   FF)rP   rq   r>   rn   rk   ru   rt   r   rs   rO   ro   )r,   r-   r.   r/   r0   r   r   r5   r6   r*   r7   r8   r9   s   @r#   rf   rf   x   s3    
%
NU\\ ell  r%   rf   c                      ^  \ rS rSrSr SS\S\S\\\\\4   4   SS4U 4S jjjrS	\	R                  S\	R                  4S
 jrSrU =r$ )UperNetFCNHead   a  
Fully Convolution Networks for Semantic Segmentation. This head is the implementation of
[FCNNet](https://huggingface.co/papers/1411.4038>).

Args:
    config:
        Configuration.
    in_channels (int):
        Number of input channels.
    kernel_size (int):
        The kernel size for convs in the head. Default: 3.
    dilation (int):
        The dilation rate for convs in the head. Default: 1.
in_indexr   r   r   Nc                 `  > [         T	U ]  5         Xl        UR                  c  X#   OUR                  U l        UR
                  U l        UR                  U l        UR                  U l
        X0l        US-  U-  n/ nUR                  [        U R                  U R                  XFUS95        [        U R                  S-
  5       H2  nUR                  [        U R                  U R                  XFUS95        M4     U R                  S:X  a  [        R                   " 5       U l        O[        R$                  " U6 U l        U R                  (       a4  [        U R                  U R                  -   U R                  XDS-  S9U l        [        R(                  " U R                  UR*                  SS9U l        g )NrX   )r   r   r   r   r   rj   r@   )r   r   rk   auxiliary_in_channelsr   auxiliary_channelsr>   auxiliary_num_convs	num_convsauxiliary_concat_inputconcat_inputr   rS   r   r   r   Identityconvs
Sequentialconv_catr   rm   rn   )
r!   rk   r   r   r   r   conv_paddingr   rE   r"   s
            r#   r   UperNetFCNHead.__init__   sb    	%+%A%A%IK!vOkOk 	 1133"99 #q(H4  $--[iq	

 t~~)*ALL!MM4==kjr + >>QDJ.DJ-  4==0$--[qrbrDM ))DMM63D3DRSTr%   r   c                     XR                      nU R                  U5      nU R                  (       a%  U R                  [        R
                  " X#/SS95      nU R                  U5      nU$ )Nr   rz   )r   r   r   r   r5   r}   rn   )r!   r   hidden_statesr)   s       r#   r*   UperNetFCNHead.forward  sT    -mm<M*]]599m-D!#LMF(r%   )	r>   rn   r   rk   r   r   r   r   r   )rX   r   r   )r,   r-   r.   r/   r0   r1   r   r2   r   r5   r6   r*   r7   r8   r9   s   @r#   r   r      sx      uv$U-0$UCF$UV[\_afgjlogoap\pVq$U	$U $ULU\\ ell  r%   r   c                   .    \ rS rSr% \\S'   Sr/ rS rSr	g)UperNetPreTrainedModeli  rk   pixel_valuesc                    [        U[        R                  5      (       ak  UR                  R                  R                  SU R                  R                  S9  UR                  b%  UR                  R                  R                  5         g g [        U[        R                  5      (       aJ  UR                  R                  R                  S5        UR                  R                  R                  5         g g )Ng        )meanstdg      ?)
isinstancer   r   weightdatanormal_rk   initializer_ranger   zero_r   fill_)r!   modules     r#   _init_weights$UperNetPreTrainedModel._init_weights  s    fbii((MM&&CT[[5R5R&S{{&  &&( '//MM$$S)KK""$ 0r%    N)
r,   r-   r.   r/   r   __annotations__main_input_name_no_split_modulesr   r7   r   r%   r#   r   r     s    $O%r%   r   zW
    UperNet framework leveraging any vision backbone e.g. for ADE20k, CityScapes.
    )custom_introc                      ^  \ rS rSrU 4S jr\     SS\\R                     S\\	   S\\	   S\\R                     S\\	   S\
\\4   4S	 jj5       rS
rU =r$ )UperNetForSemanticSegmentationi  c                   > [         TU ]  U5        [        U5      U l        [	        XR                  R
                  S9U l        UR                  (       a  [        XR                  R
                  S9OS U l	        U R                  5         g )N)r   )r   r   r   backbonerf   r>   decode_headuse_auxiliary_headr   auxiliary_head	post_init)r!   rk   r"   s     r#   r   'UperNetForSemanticSegmentation.__init__"  sf     %f- 'v==;Q;QRJPJcJcN6}}/E/EFim 	
 	r%   r   output_attentionsoutput_hidden_stateslabelsreturn_dictr   c                    Ub%  U R                   R                  S:X  a  [        S5      eUb  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R
                  nU R                  R                  XUS9nUR                  nU R                  U5      n[        R                  R                  XR                  SS SSS9nSn	U R                  b=  U R                  U5      n	[        R                  R                  XR                  SS SSS9n	Sn
UbK  [        U R                   R                   S	9nU" X5      n
U	b#  U" X5      nXR                   R"                  U-  -  n
U(       d%  U(       a
  U4USS -   nO	U4USS -   nU
b  U
4U-   $ U$ [%        U
UUR&                  UR(                  S
9$ )a(  
labels (`torch.LongTensor` of shape `(batch_size, height, width)`, *optional*):
    Ground truth semantic segmentation maps for computing the loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels > 1`, a classification loss is computed (Cross-Entropy).

Examples:
```python
>>> from transformers import AutoImageProcessor, UperNetForSemanticSegmentation
>>> from PIL import Image
>>> from huggingface_hub import hf_hub_download

>>> image_processor = AutoImageProcessor.from_pretrained("openmmlab/upernet-convnext-tiny")
>>> model = UperNetForSemanticSegmentation.from_pretrained("openmmlab/upernet-convnext-tiny")

>>> filepath = hf_hub_download(
...     repo_id="hf-internal-testing/fixtures_ade20k", filename="ADE_val_00000001.jpg", repo_type="dataset"
... )
>>> image = Image.open(filepath).convert("RGB")

>>> inputs = image_processor(images=image, return_tensors="pt")

>>> outputs = model(**inputs)

>>> logits = outputs.logits  # shape (batch_size, num_labels, height, width)
>>> list(logits.shape)
[1, 150, 512, 512]
```Nr   z/The number of labels should be greater than one)r   r   rX   rY   FrZ   )ignore_index)losslogitsr   
attentions)rk   rm   
ValueErroruse_return_dictr   r   r   forward_with_filtered_kwargsfeature_mapsr   r   r]   r^   r   r   r   loss_ignore_indexauxiliary_loss_weightr   r   r   )r!   r   r   r   r   r   outputsfeaturesr   auxiliary_logitsr   loss_fctauxiliary_lossr)   s                 r#   r*   &UperNetForSemanticSegmentation.forward0  s   H $++"8"8A"=NOO%0%<k$++B]B]$8$D $++JjJj 	 2C1N-TXT_T_TqTq--<<Wh = 
 ''!!(+**68J8J128NU_ot*u*#228<!}}88 '9'9!"'=J^c  9   'T[[5R5RSHF+D+!)*:!C99NJJ# WQR[0 WQR[0)-)9TGf$EvE&!//))	
 	
r%   )r   r   r   )NNNNN)r,   r-   r.   r/   r   r
   r   r5   r6   r4   r   r2   r   r*   r7   r8   r9   s   @r#   r   r     s      04,0/3)-&*P
u||,P
 $D>P
 'tn	P

 &P
 d^P
 
u--	.P
 P
r%   r   )r0   typingr   r   r5   r   torch.nnr   modeling_outputsr   modeling_utilsr	   utilsr
   utils.backbone_utilsr   configuration_upernetr   Moduler   r;   rM   rf   r   r   r   __all__r   r%   r#   <module>r      s    y "   % 7 - # 1 0 		  F "#")) #LQ")) Qh=RYY =@ %_ % % 
`
%; `

`
F ,-E
Fr%   