ó
    <±hŒÙ  ã                   óÜ  • S SK rS SKrS SKJr  S SKJrJrJr  S SK	r
S SKrS SKJs  Jr  S SKJrJr  SSKJr  SSKJrJrJr  SSKJr  SS	KJrJr  SS
KJrJrJr  SSK J!r!  \" 5       (       a  S SK"J#r#  \" 5       (       a  S SK$J%r%  S SK&J'r'  \\" SS9 " S S\5      5       5       r( SLS\R                   S\R                   S\R                   4S jjr)S\S\S\4S jr*S\R                   S\R                   S\R                   4S jr+ " S S\RX                  5      r-S\S\S\.S\4S jr/S\R                   S\R                   S\.S\R                   4S  jr0 " S! S"\RX                  5      r1 " S# S$\RX                  5      r2 " S% S&\RX                  5      r3 SMS'\RX                  S(\R                   S)\R                   S*\R                   S+\\R                      S,\4S-\44S. jjr5 " S/ S0\RX                  5      r6 " S1 S2\RX                  5      r7SNS3\R                   S4\4S5\8S\R                   4S6 jjr9 " S7 S8\RX                  5      r: " S9 S:\RX                  5      r; " S; S<\RX                  5      r< " S= S>\5      r= " S? S@\R|                  5      r? " SA SB\RX                  5      r@ " SC SD\RX                  5      rA " SE SF\RX                  5      rB\ " SG SH\5      5       rC\" SIS9 " SJ SK\C5      5       rDSHSK/rEg)Oé    N)Ú	dataclass)ÚCallableÚOptionalÚUnion)ÚTensorÚnné   )ÚACT2FN)ÚModelOutputÚis_scipy_availableÚrequires_backends)ÚGradientCheckpointingLayer)ÚALL_ATTENTION_FUNCTIONSÚPreTrainedModel)Úauto_docstringÚcan_return_tupleÚis_accelerate_availableé   )Ú
EomtConfig)Úlinear_sum_assignment)ÚPartialState)Úreducea˜  
    Class for outputs of [`EomtForUniversalSegmentationOutput`].

    This output can be directly passed to [`~EomtImageProcessor.post_process_semantic_segmentation`] or
    [`~EomtImageProcessor.post_process_instance_segmentation`] or
    [`~EomtImageProcessor.post_process_panoptic_segmentation`] to compute final segmentation maps. Please, see
    [`~EomtImageProcessor] for details regarding usage.
    )Úcustom_introc                   óD  • \ rS rSr% SrSr\\R                     \	S'   Sr
\\R                     \	S'   Sr\\R                     \	S'   Sr\\R                     \	S'   Sr\\\R                        \	S'   Sr\\\R                        \	S	'   Sr\\\R$                        \	S
'   Srg)Ú"EomtForUniversalSegmentationOutputé0   aß  
loss (`torch.Tensor`, *optional*):
    The computed loss, returned when labels are present.
class_queries_logits (`torch.FloatTensor`):
    A tensor of shape `(batch_size, num_queries, num_labels + 1)` representing the proposed classes for each
    query. Note the `+ 1` is needed because we incorporate the null class.
masks_queries_logits (`torch.FloatTensor`):
    A tensor of shape `(batch_size, num_queries, height, width)` representing the proposed masks for each
    query.
last_hidden_state (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
    Last hidden states (final feature map) of the last layer.
hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
    Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
    shape `(batch_size, sequence_length, hidden_size)`. Hidden-states all layers of the model.
attentions (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
    Tuple of `tuple(torch.FloatTensor)` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
    sequence_length)`. Self and Cross Attentions weights from transformer decoder.
patch_offsets (`list[torch.Tensor]`, *optional*):
    list of tuples indicating the image index and start and end positions of patches for semantic segementation.
NÚlossÚclass_queries_logitsÚmasks_queries_logitsÚlast_hidden_stateÚhidden_statesÚ
attentionsÚpatch_offsets© )Ú__name__Ú
__module__Ú__qualname__Ú__firstlineno__Ú__doc__r   r   ÚtorchÚFloatTensorÚ__annotations__r   r   r    r!   Útupler"   r#   Úlistr   Ú__static_attributes__r$   ó    Ú^/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/eomt/modeling_eomt.pyr   r   0   s¬   ‡ ñð* )-€Dˆ(5×$Ñ$Ñ
%Ó,Ø8<Ð˜( 5×#4Ñ#4Ñ5Ó<Ø8<Ð˜( 5×#4Ñ#4Ñ5Ó<Ø59Ðx × 1Ñ 1Ñ2Ó9Ø8<€M8˜E %×"3Ñ"3Ñ4Ñ5Ó<Ø59€J˜˜u×0Ñ0Ñ1Ñ2Ó9Ø26€M8˜D §¡Ñ.Ñ/Ö6r0   r   Úinput_featuresÚpoint_coordinatesÚreturnc                 óè   • UR                  5       S:X  a  SnUR                  S5      n[        R                  R                  R
                  " U SU-  S-
  40 UD6nU(       a  UR                  S5      nU$ )að  
A wrapper around `torch.nn.functional.grid_sample` to support 3D point_coordinates tensors.

Args:
    input_features (`torch.Tensor` of shape (batch_size, channels, height, width)):
        A tensor that contains features map on a height * width grid
    point_coordinates (`torch.Tensor` of shape (batch_size, num_points, 2) or (batch_size, grid_height, grid_width,:
    2)):
        A tensor that contains [0, 1] * [0, 1] normalized point coordinates
    add_dim (`bool`):
        boolean value to keep track of added dimension

Returns:
    point_features (`torch.Tensor` of shape (batch_size, channels, num_points) or (batch_size, channels,
    height_grid, width_grid):
        A tensor that contains features for points in `point_coordinates`.
r	   Té   g       @ç      ð?)ÚdimÚ	unsqueezer*   r   Ú
functionalÚgrid_sampleÚsqueeze)r2   r3   Úadd_dimÚkwargsÚpoint_featuress        r1   Úsample_pointr@   [   st   € ð( ×ÑÓ !Ó#ØˆØ-×7Ñ7¸Ó:Ðô —X‘X×(Ñ(×4Ò4°^ÀSÐK\ÑE\Ð_bÑEbÑmÐflÑm€NÞØ'×/Ñ/°Ó2ˆàÐr0   ÚinputsÚlabelsc                 ó  • U R                  5       R                  S5      n S[        R                  " XR                  5      -  nU R                  S5      SS2S4   UR                  S5      SSS24   -   nSUS-   US-   -  -
  nU$ )a   
A pair wise version of the dice loss, see `dice_loss` for usage.

Args:
    inputs (`torch.Tensor`):
        A tensor representing a mask
    labels (`torch.Tensor`):
        A tensor with the same shape as inputs. Stores the binary classification labels for each element in inputs
        (0 for the negative class and 1 for the positive class).

Returns:
    `torch.Tensor`: The computed loss between each pairs.
r   r6   éÿÿÿÿN)ÚsigmoidÚflattenr*   ÚmatmulÚTÚsum)rA   rB   Ú	numeratorÚdenominatorr   s        r1   Úpair_wise_dice_lossrL   {   sz   € ð ^‰^Ó×%Ñ% aÓ(€FØ”E—L’L ¯©Ó2Ñ2€Ià—*‘*˜R“.¢ D Ñ)¨F¯J©J°r«N¸4Â¸7Ñ,CÑC€KØ	˜A‘ +°¡/Ñ2Ñ2€DØ€Kr0   c                 óZ  • U R                   S   n[        R                  " SS9nU" U [        R                  " U 5      5      nU" U [        R
                  " U 5      5      n[        R                  " XB-  UR                  5      n[        R                  " XR-  SU-
  R                  5      nXg-   nU$ )aÂ  
A pair wise version of the cross entropy loss, see `sigmoid_cross_entropy_loss` for usage.

Args:
    inputs (`torch.Tensor`):
        A tensor representing a mask.
    labels (`torch.Tensor`):
        A tensor with the same shape as inputs. Stores the binary classification labels for each element in inputs
        (0 for the negative class and 1 for the positive class).

Returns:
    loss (`torch.Tensor`): The computed loss between each pairs.
r   Únone©Ú	reduction)Úshaper   ÚBCEWithLogitsLossr*   Ú	ones_likeÚ
zeros_likerG   rH   )	rA   rB   Úheight_and_widthÚ	criterionÚcross_entropy_loss_posÚcross_entropy_loss_negÚloss_posÚloss_negr   s	            r1   Ú$pair_wise_sigmoid_cross_entropy_lossr[   ‘   s‘   € ð —|‘| A‘Ðä×$Ò$¨vÑ6€IÙ& v¬u¯ª¸vÓ/FÓGÐÙ& v¬u×/?Ò/?ÀÓ/GÓHÐä|Š|Ð2ÑEÀvÇxÁxÓP€HÜ|Š|Ð2ÑEÈÈFÉ
Ç~Á~ÓV€HØÑ€DØ€Kr0   c                   óè   ^ • \ rS rSrSr SS\S\S\S\4U 4S jjjr\R                  " 5       S\R                  S	\R                  S
\R                  S\R                  S\\\
      4
S j5       rSrU =r$ )ÚEomtHungarianMatcheré­   aa  This class computes an assignment between the labels and the predictions of the network.

For efficiency reasons, the labels don't include the no_object. Because of this, in general, there are more
predictions than labels. In this case, we do a 1-to-1 matching of the best predictions, while the others are
un-matched (and thus treated as non-objects).
Ú
cost_classÚ	cost_maskÚ	cost_diceÚ
num_pointsc                 óŒ   >• [         TU ]  5         US:X  a  US:X  a  US:X  a  [        S5      eX@l        Xl        X l        X0l        g)aè  Creates the matcher

Params:
    cost_class (`float`, *optional*, defaults to 1.0):
        Relative weight of the classification error in the matching cost.
    cost_mask (`float`, *optional*,  defaults to 1.0):
        This is the relative weight of the focal loss of the binary mask in the matching cost.
    cost_dice (`float`, *optional*, defaults to 1.0):
        This is the relative weight of the dice loss of the binary mask in the matching cost.
    num_points (`int`, *optional*, defaults to 12544):
        No. of points to sample on which the mask loss will be calculated. The same set of K points are
        uniformly sampled for all prediction and ground truth masks to construct the cost matrix for bipartite
        matching.
r   zAll costs can't be 0N)ÚsuperÚ__init__Ú
ValueErrorrb   r_   r`   ra   )Úselfr_   r`   ra   rb   Ú	__class__s        €r1   re   ÚEomtHungarianMatcher.__init__µ   sC   ø€ ô" 	‰ÑÔØ˜‹?˜y¨A›~°)¸q³.ÜÐ3Ó4Ð4à$ŒØ$ŒØ"ŒØ"r0   r   r   Úmask_labelsÚclass_labelsr4   c           
      ó’  • / nUR                   S   n[        U5       GHÅ  nX'   R                  S5      nX   n	USS2XG   4   * n
X7   R                  U	5      nUSS2S4   nU	SS2S4   n	[        R
                  " SU R                  SU	R                  S9nUR                  UR                   S   SS5      n[        X½SS9R                  S5      nUR                  U	R                   S   SS5      n[        XžSS9R                  S5      n	[        X›5      n[        X›5      nU R                  U-  U R                  U
-  -   U R                  U-  -   n[        R                   " U[        R"                  " S	5      5      n[        R$                  " U[        R"                  " S
5      5      n[        R&                  " US5      n[)        UR+                  5       5      nUR-                  U5        GMÈ     U VVs/ sHL  u  nn[        R.                  " U[        R0                  S9[        R.                  " U[        R0                  S94PMN     nnnU$ s  snnf )aß  
Params:
    masks_queries_logits (`torch.Tensor`):
        A tensor of dim `batch_size, num_queries, num_labels` with the classification logits.
    class_queries_logits (`torch.Tensor`):
        A tensor of dim `batch_size, num_queries, height, width` with the predicted masks.
    class_labels (`torch.Tensor`):
        A tensor of dim `num_target_boxes` (where num_target_boxes is the number of ground-truth objects in the
        target) containing the class labels.
    mask_labels (`torch.Tensor`):
        A tensor of dim `num_target_boxes, height, width` containing the target masks.

Returns:
    matched_indices (`list[tuple[Tensor]]`): A list of size batch_size, containing tuples of (index_i, index_j)
    where:
        - index_i is the indices of the selected predictions (in order)
        - index_j is the indices of the corresponding selected labels (in order)
    For each batch element, it holds:
        len(index_i) = len(index_j) = min(num_queries, num_target_boxes).
r   rD   Nr   r6   ©ÚdeviceF©Úalign_cornersg    _ Bg    _ Â©Údtype)rQ   ÚrangeÚsoftmaxÚtor*   Úrandrb   rn   Úrepeatr@   r<   r[   rL   r`   r_   ra   ÚminimumÚtensorÚmaximumÚ
nan_to_numr   ÚcpuÚappendÚ	as_tensorÚint64)rg   r   r   rj   rk   ÚindicesÚ
batch_sizeÚiÚ
pred_probsÚ	pred_maskr_   Útarget_maskr3   Útarget_coordinatesÚpred_coordinatesr`   ra   Úcost_matrixÚassigned_indicesÚjÚmatched_indicess                        r1   ÚforwardÚEomtHungarianMatcher.forwardÏ   s  € ð8 *,ˆð *×/Ñ/°Ñ2ˆ
Üz×"ˆAØ-Ñ0×8Ñ8¸Ó<ˆJØ,Ñ/ˆIð %¢Q¨©Ð%7Ñ8Ð8ˆJØ%™.×+Ñ+¨IÓ6ˆKØ%¢a¨ gÑ.ˆKØ!¢! T 'Ñ*ˆIô !&§
¢
¨1¨d¯o©o¸qÈ×IYÑIYÑ ZÐà!2×!9Ñ!9¸+×:KÑ:KÈAÑ:NÐPQÐSTÓ!UÐÜ& {ÐV[Ñ\×dÑdÐefÓgˆKà0×7Ñ7¸	¿¹ÈÑ8JÈAÈqÓQÐÜ$ YÐPUÑV×^Ñ^Ð_`ÓaˆIô =¸YÓTˆIä+¨IÓCˆIàŸ.™.¨9Ñ4°t·±ÈÑ7SÑSÐVZ×VdÑVdÐgpÑVpÑpˆKäŸ-š-¨´U·\²\À$Ó5GÓHˆKÜŸ-š-¨´U·\²\À%Ó5HÓIˆKÜ×*Ò*¨;¸Ó:ˆKä0EÀkÇoÁoÓFWÓ0XÐØN‰NÐ+×,ñ? #ñF hoô
ÙgnÑ_cÐ_`ÐbcŒU_Š_˜Q¤e§k¡kÑ2´E·O²OÀAÌUÏ[É[Ñ4YÓZÑgnð 	ñ 
ð Ðùó
s   Ç,AI©r_   ra   r`   rb   )r7   r7   r7   i 1  )r%   r&   r'   r(   r)   ÚfloatÚintre   r*   Úno_gradr   r.   r-   rŒ   r/   Ú__classcell__©rh   s   @r1   r]   r]   ­   s£   ø† ñð joñ#Øð#Ø27ð#ØJOð#Øcf÷#ð #ð4 ‡]‚]ƒ_ðDà#Ÿl™lðDð $Ÿl™lðDð —\‘\ð	Dð
 —l‘lðDð 
ˆeF‰mÑ	óDó öDr0   r]   Ú	num_masksc                 óø   • U R                  5       R                  S5      nSX1-  R                  S5      -  nUR                  S5      UR                  S5      -   nSUS-   US-   -  -
  nUR                  5       U-  nU$ )aø  
Compute the DICE loss, similar to generalized IOU for masks as follows:

$$ \mathcal{L}_{\text{dice}(x, y) = 1 - \frac{2 * x \cap y }{x \cup y + 1}} $$

In practice, since `labels` is a binary mask, (only 0s and 1s), dice can be computed as follow

$$ \mathcal{L}_{\text{dice}(x, y) = 1 - \frac{2 * x * y }{x + y + 1}} $$

Args:
    inputs (`torch.Tensor`):
        A tensor representing a mask.
    labels (`torch.Tensor`):
        A tensor with the same shape as inputs. Stores the binary classification labels for each element in inputs
        (0 for the negative class and 1 for the positive class).
    num_masks (`int`):
        The number of masks present in the current batch, used for normalization.

Returns:
    `torch.Tensor`: The computed loss.
r   r6   rD   )rE   rF   rI   )rA   rB   r”   ÚprobsrJ   rK   r   s          r1   Ú	dice_lossr—     sx   € ð, N‰NÓ×$Ñ$ QÓ'€EØU‘^×(Ñ(¨Ó,Ñ,€IØ—)‘)˜B“- &§*¡*¨R£.Ñ0€KØ	˜A‘ +°¡/Ñ2Ñ2€DØ8‰8‹:˜	Ñ!€DØ€Kr0   c                 ó‚   • [         R                  " SS9nU" X5      nUR                  S5      R                  5       U-  nU$ )aX  
Args:
    inputs (`torch.Tensor`):
        A float tensor of arbitrary shape.
    labels (`torch.Tensor`):
        A tensor with the same shape as inputs. Stores the binary classification labels for each element in inputs
        (0 for the negative class and 1 for the positive class).

Returns:
    loss (`torch.Tensor`): The computed loss.
rN   rO   r   )r   rR   ÚmeanrI   )rA   rB   r”   rV   Úcross_entropy_lossr   s         r1   Úsigmoid_cross_entropy_lossr›   5  sB   € ô ×$Ò$¨vÑ6€IÙ" 6Ó2Ðà×"Ñ" 1Ó%×)Ñ)Ó+¨iÑ7€DØ€Kr0   c                   óð  ^ • \ rS rSrS\S\\\4   4U 4S jjrS\	\	\
      S\	\
   4S jrS\	\   S\\\4   4S	 jrS
\S\	\   S\\R                      S\\\4   4S jrS\R                  S\	\R                     S\\R                      S\
S\\\R                  4   4
S jrS rS rS\R                  S\R                  4S jrS\R                  S\
S\
S\S\R                  4
S jr SS\R                  S
\R                  S\	\R                     S\	\R                     S\\\\R                  4      S\\\R                  4   4S jjrS\R                  S\R4                  S\R                  4S jrSrU =r$ ) ÚEomtLossiI  ÚconfigÚweight_dictc                 óì  >• [         TU ]  5         [        U S/5        UR                  U l        X l        UR
                  U l        [        R                  " U R                  S-   5      nU R                  US'   U R                  SU5        UR                  U l        UR                  U l        UR                  U l        [        UR                  UR                   UR"                  U R                  S9U l        g)a   
The Eomt Loss. The loss is computed very similar to DETR. The process happens in two steps: 1) we
compute hungarian assignment between ground truth masks and the outputs of the model 2) we supervise each pair
of matched ground-truth / prediction (supervise class and mask)

Args:
    config (`EomtConfig`):
        The configuration for Eomt model also containing loss calculation specific parameters.
    weight_dict (`dict[str, float]`):
        A dictionary of weights to be applied to the different losses.
Úscipyr   rD   Úempty_weightrŽ   N)rd   re   r   Ú
num_labelsrŸ   Úno_object_weightÚeos_coefr*   ÚonesÚregister_bufferÚtrain_num_pointsrb   Úoversample_ratioÚimportance_sample_ratior]   Úclass_weightÚdice_weightÚmask_weightÚmatcher)rg   rž   rŸ   r¢   rh   s       €r1   re   ÚEomtLoss.__init__J  sÌ   ø€ ô 	‰ÑÔÜ˜$  	Ô*Ø ×+Ñ+ˆŒØ&Ôð ×/Ñ/ˆŒÜ—z’z $§/¡/°AÑ"5Ó6ˆØŸ=™=ˆRÑØ×Ñ˜^¨\Ô:ð !×1Ñ1ˆŒØ &× 7Ñ 7ˆÔØ'-×'EÑ'EˆÔ$ä+Ø×*Ñ*Ø×(Ñ(Ø×(Ñ(Ø—‘ñ	
ˆr0   Úsizesr4   c                 óp   • US   nUSS   H'  n[        U5       H  u  pE[        X$   U5      X$'   M     M)     U$ )Nr   r   )Ú	enumerateÚmax)rg   r°   ÚmaxesÚsublistÚindexÚitems         r1   Ú_max_by_axisÚEomtLoss._max_by_axism  sC   € Øa‘ˆØ˜Q˜R“yˆGÜ(¨Ö1‘Ü" 5¡<°Ó6“ó  2ñ !ð ˆr0   Útensorsc                 óR  • U R                  U Vs/ sH  n[        UR                  5      PM     sn5      n[        U5      /U-   nUu  pVpxUS   R                  n	US   R
                  n
[        R                  " XIU
S9n[        R                  " XWU4[        R                  U
S9n[        XU5       Ho  u  p-nUS UR                  S   2S UR                  S   2S UR                  S   24   R                  U5        SUS UR                  S   2S UR                  S   24'   Mq     X¼4$ s  snf )Nr   ©rr   rn   r   r6   F)r¸   r.   rQ   Úlenrr   rn   r*   Úzerosr¦   ÚboolÚzipÚcopy_)rg   rº   ry   Úmax_sizeÚbatch_shaper   Ú_ÚheightÚwidthrr   rn   Úpadded_tensorsÚpadding_masksÚpadded_tensorÚpadding_masks                  r1   Ú_pad_images_to_max_in_batchÚ$EomtLoss._pad_images_to_max_in_batchu  s'  € à×$Ñ$ÁwÓ%OÁw¸V¤d¨6¯<©<Ö&8ÁwÑ%OÓPˆä˜7“|n xÑ/ˆØ'2Ñ$ˆ
vØ˜‘
× Ñ ˆØ˜‘×"Ñ"ˆÜŸš [ÀfÑMˆÜŸ
š
 J¸Ð#>ÄeÇjÁjÐY_Ñ`ˆä36°wÐP]Ö3^Ñ/ˆF <ØÐ+˜FŸL™L¨™OÐ+Ð->¨v¯|©|¸A©Ð->Ð@QÀ&Ç,Á,ÈqÁ/Ð@QÐQÑR×XÑXÐY_Ô`ØAFˆLÐ*˜6Ÿ<™<¨™?Ð*Ð,=¨f¯l©l¸1©oÐ,=Ð=Ó>ñ 4_ð Ð,Ð,ùò &Ps   D$r   rk   r€   c           	      óÀ  • UnUR                   u  pVn[        R                  " U R                  S9nU R	                  U5      n	[
        R                  " [        X#5       V
VVs/ sH  u  n
u  p{X«   PM     snnn
5      n[
        R                  " XV4U R                  [
        R                  UR                  S9nXÍU	'   UR                  SS5      nU" Xí5      nSU0nU$ s  snnn
f )a-  Compute the losses related to the labels using cross entropy.

Args:
    class_queries_logits (`torch.Tensor`):
        A tensor of shape `batch_size, num_queries, num_labels`
    class_labels (`list[torch.Tensor]`):
        List of class labels of shape `(labels)`.
    indices (`tuple[np.array])`:
        The indices computed by the Hungarian matcher.

Returns:
    `dict[str, Tensor]`: A dict of `torch.Tensor` containing the following key:
    - **loss_cross_entropy** -- The loss computed using cross entropy on the predicted and ground truth labels.
)Úweight)Ú
fill_valuerr   rn   r   r6   Úloss_cross_entropy)rQ   r   ÚCrossEntropyLossr¢   Ú$_get_predictions_permutation_indicesr*   ÚcatrÀ   Úfullr£   r   rn   Ú	transpose)rg   r   rk   r€   Úpred_logitsr   Únum_queriesrÄ   rV   ÚidxÚtargetrŠ   Útarget_classes_oÚtarget_classesÚpred_logits_transposedÚloss_ceÚlossess                    r1   Úloss_labelsÚEomtLoss.loss_labels†  s×   € ð" +ˆØ%0×%6Ñ%6Ñ"ˆ
 Ü×'Ò'¨t×/@Ñ/@ÑAˆ	Ø×7Ñ7¸Ó@ˆÜ Ÿ9š9Ü-0°Ô-GÕHÑ-G™>˜6¡6 AˆVŒYÑ-GÓHó
Ðô ŸšØÐ%°$·/±/ÌÏÉÐ]h×]oÑ]oñ
ˆð /sÑà!,×!6Ñ!6°q¸!Ó!<ÐÙÐ2ÓCˆØ&¨Ð0ˆØˆùô Is   Á Cr   rj   r”   c                 ó  ^ • T R                  U5      nT R                  U5      nX   nT R                  U5      u  p‰X†   nUSS2S4   nUSS2S4   n[        R                  " 5          T R                  UU 4S jT R                  T R                  T R                  5      n
[        XŠSS9R                  S5      nSSS5        [        UW
SS9R                  S5      n[        UWU5      [        XËU5      S.nAAU$ ! , (       d  f       NF= f)a$  Compute the losses related to the masks using sigmoid_cross_entropy_loss and dice loss.

Args:
    masks_queries_logits (`torch.Tensor`):
        A tensor of shape `(batch_size, num_queries, height, width)`.
    mask_labels (`torch.Tensor`):
        List of mask labels of shape `(labels, height, width)`.
    indices (`tuple[np.array])`:
        The indices computed by the Hungarian matcher.
    num_masks (`int)`:
        The number of masks, used for normalization.

Returns:
    losses (`dict[str, Tensor]`): A dict of `torch.Tensor` containing two keys:
    - **loss_mask** -- The loss computed using sigmoid cross entropy loss on the predicted and ground truth.
      masks.
    - **loss_dice** -- The loss computed using dice loss on the predicted on the predicted and ground truth,
      masks.
Nc                 ó&   >• TR                  U 5      $ ©N)Úcalculate_uncertainty)Úlogitsrg   s    €r1   Ú<lambda>Ú%EomtLoss.loss_masks.<locals>.<lambda>Ó  s   ø€ ˜t×9Ñ9¸&ÔAr0   Fro   r   )Ú	loss_maskÚ	loss_dice)rÒ   Ú _get_targets_permutation_indicesrË   r*   r‘   Úsample_points_using_uncertaintyrb   r©   rª   r@   r<   r›   r—   )rg   r   rj   r€   r”   Úsrc_idxÚtgt_idxÚ
pred_masksÚtarget_masksrÄ   r3   Úpoint_labelsÚpoint_logitsrÞ   s   `             r1   Ú
loss_masksÚEomtLoss.loss_masks¨  s  ø€ ð4 ×;Ñ;¸GÓDˆØ×7Ñ7¸Ó@ˆà)Ñ2ˆ
ð ×:Ñ:¸;ÓG‰ˆØ#Ñ,ˆð  ¢ 4 Ñ(ˆ
Ø#¢A t GÑ,ˆô ]Š]_Ø $× DÑ DØÜAØ—‘Ø×%Ñ%Ø×,Ñ,ó!Ðô (¨ÐW\Ñ]×eÑeÐfgÓhˆL÷ ô $ JÐ0AÐQVÑW×_Ñ_Ð`aÓbˆô 4°LÀ,ÐPYÓZÜ" <¸yÓIñ
ˆð
 ØØˆ÷) _ús   Á&AC7Ã7
Dc                 ó  • [         R                  " [        U5       VVVs/ sH  u  nu  p4[         R                  " X25      PM      snnn5      n[         R                  " U VVs/ sH  u  p4UPM	     snn5      nXV4$ s  snnnf s  snnf rã   ©r*   rÓ   r²   Ú	full_like)rg   r€   r‚   ÚsrcrÄ   Úbatch_indicesÚpredictions_indicess          r1   rÒ   Ú-EomtLoss._get_predictions_permutation_indicesæ  sh   € äŸ	š	ÌiÐX_ÔN`Õ"aÑN`¹{¸qÁ(À3¤5§?¢?°3Ö#:ÑN`Ó"aÓbˆÜ#Ÿiši¹WÔ(E¹W±°#«¹WÒ(EÓFÐØÐ1Ð1ùô #bùÛ(Eó    $A:Á"B
c                 ó  • [         R                  " [        U5       VVVs/ sH  u  nu  p4[         R                  " XB5      PM      snnn5      n[         R                  " U VVs/ sH  u  p4UPM	     snn5      nXV4$ s  snnnf s  snnf rã   rõ   )rg   r€   r‚   rÄ   Útgtrø   Útarget_indicess          r1   rê   Ú)EomtLoss._get_targets_permutation_indicesì  sg   € äŸ	š	ÌiÐX_ÔN`Õ"aÑN`¹{¸qÁ(À1¤5§?¢?°3Ö#:ÑN`Ó"aÓbˆÜŸš¹Ô#@¹©H¨Q£C¹Ò#@ÓAˆØÐ,Ð,ùô #bùÛ#@rû   rå   c                 ó4   • [         R                  " U5      * nU$ )a2  
In Eomt paper, uncertainty is estimated as L1 distance between 0.0 and the logit prediction in 'logits'
for the foreground class in `classes`.

Args:
    logits (`torch.Tensor`):
    A tensor of shape (R, 1, ...) for class-specific or class-agnostic, where R is the total number of predicted masks in all images and C is:
    the number of foreground classes. The values are logits.

Returns:
    scores (`torch.Tensor`): A tensor of shape (R, 1, ...) that contains uncertainty scores with the most
    uncertain locations having the highest uncertainty score.
)r*   Úabs)rg   rå   Úuncertainty_scoress      r1   rä   ÚEomtLoss.calculate_uncertaintyò  s   € ô  %Ÿyšy¨Ó0Ð1ÐØ!Ð!r0   rb   r©   rª   c           
      óh  • UR                   S   n[        X4-  5      n[        R                  " XgSUR                  S9n[        XSS9n	U" U	5      n
[        XS-  5      nX;-
  n[        R                  " U
SS2SSS24   USS9S   nU[        R                  " U[        R                  UR                  S	9-  nXÞSS2S4   -  nUR                  S
S5      UR                  S
5      SS24   R                  XkS5      nUS:”  a5  [        R                  " U[        R                  " XlSUR                  S9/SS9nU$ )að  
This function is meant for sampling points in [0, 1] * [0, 1] coordinate space based on their uncertainty. The
uncertainty is calculated for each point using the passed `uncertainty function` that takes points logit
prediction as input.

Args:
    logits (`float`):
        Logit predictions for P points.
    uncertainty_function:
        A function that takes logit predictions for P points and returns their uncertainties.
    num_points (`int`):
        The number of points P to sample.
    oversample_ratio (`int`):
        Oversampling parameter.
    importance_sample_ratio (`float`):
        Ratio of points that are sampled via importance sampling.

Returns:
    point_coordinates (`torch.Tensor`):
        Coordinates for P sampled points.
r   r6   rm   Fro   Nr   )Úkr8   r¼   rD   ©r8   )rQ   r   r*   rv   rn   r@   ÚtopkÚarangeÚlongÚviewrÓ   )rg   rå   Úuncertainty_functionrb   r©   rª   Ú	num_boxesÚnum_points_sampledr3   rñ   Úpoint_uncertaintiesÚnum_uncertain_pointsÚnum_random_pointsrØ   Úshifts                  r1   rë   Ú(EomtLoss.sample_points_using_uncertainty  s3  € ð< —L‘L ‘Oˆ	Ü  Ñ!>Ó?Ðô "ŸJšJ yÀaÐPV×P]ÑP]Ñ^Ðä# FÈUÑSˆá2°<Ó@Ðä"Ð#:Ñ#GÓHÐØ&Ñ=ÐäjŠjÐ,ªQ°²1¨WÑ5Ð9MÐSTÑUÐVWÑXˆØ"¤U§\¢\°)Ä5Ç:Á:ÐV\×VcÑVcÑ%dÑdˆØ’Q˜W‰~ÑˆØ-×2Ñ2°2°qÓ9¸#¿(¹(À2»,Ê¸/ÑJ×OÑOÐPYÐqrÓsÐà˜qÓ Ü %§	¢	Ø"¤E§J¢J¨yÈQÐW]×WdÑWdÑ$eÐfØñ!Ðð !Ð r0   Úauxiliary_predictionsc                 ó”  • U R                  XX45      nU R                  XDS   R                  S9n0 U R                  XXg5      EU R	                  X$U5      EnUbj  [        U5       H[  u  pšU
S   nU
S   nU R                  XX45      nUR                  5        VVs0 sH  u  pÍU SU	 3U_M     nnnUR                  U5        M]     U$ s  snnf )aú  
This performs the loss computation.

Args:
    masks_queries_logits (`torch.Tensor`):
        A tensor of shape `(batch_size, num_queries, height, width)`.
    class_queries_logits (`torch.Tensor`):
        A tensor of shape `(batch_size, num_queries, num_labels)`.
    mask_labels (`torch.Tensor`):
        List of mask labels of shape `(labels, height, width)`.
    class_labels (`list[torch.Tensor]`):
        List of class labels of shape `(labels)`.
    auxiliary_predictions (`dict[str, torch.Tensor]`, *optional*):
        if `use_auxiliary_loss` was set to `true` in [`EomtConfig`], then it contains the logits from
        the inner layers of the EomtMaskedAttentionDecoder.

Returns:
    losses (`dict[str, Tensor]`): A dict of `torch.Tensor` containing three keys:
    - **loss_cross_entropy** -- The loss computed using cross entropy on the predicted and ground truth labels.
    - **loss_mask** -- The loss computed using sigmoid cross_entropy loss on the predicted and ground truth
      masks.
    - **loss_dice** -- The loss computed using dice loss on the predicted on the predicted and ground truth
      masks.
    if `use_auxiliary_loss` was set to `true` in [`EomtConfig`], the dictionary contains additional
    losses for each auxiliary predictions.
r   rm   r   r   rÄ   )	r®   Úget_num_masksrn   rò   rß   r²   rŒ   ÚitemsÚupdate)rg   r   r   rj   rk   r  r€   r”   rÞ   rØ   Úaux_outputsÚ	loss_dictÚkeyÚvalues                 r1   rŒ   ÚEomtLoss.forward:  sñ   € ðH —,‘,Ð3È;Óeˆà×&Ñ& |È¹O×<RÑ<RÐ&ÐSˆ	ð%
Øo‰oÐ2ÀÓTð%
à×ÑÐ3À7ÓKð%
ˆð
 !Ñ,Ü$-Ð.CÖ$DÑ Ø'2Ð3IÑ'JÐ$Ø'2Ð3IÑ'JÐ$Ø ŸL™LÐ)=ÐU`Óo	ØENÇ_Á_ÔEVÔWÑEV±z°s ˜u A c U˜^¨UÒ2ÑEV	ÑWØ—‘˜iÖ(ñ %Eð ˆùó Xs   ÂCrn   c                 óV  • [        U Vs/ sH  n[        U5      PM     sn5      n[        R                  " U[        R                  US9nSn[        5       (       a3  [        R                  0 :w  a  [        U5      n[        5       R                  n[        R                  " XE-  SS9nU$ s  snf )z[
Computes the average number of target masks across the batch, for normalization purposes.
r¼   r   )Úmin)rI   r½   r*   r~   r   r   r   Ú_shared_stater   Únum_processesÚclamp)rg   rk   rn   Úclassesr”   Ú
world_sizes         r1   r  ÚEomtLoss.get_num_masksq  s‰   € ô ±\ÓB±\¨'œ˜Wž±\ÑBÓCˆ	Ü—O’O I´U·[±[ÈÑPˆ	Øˆ
Ü"×$Ñ$Ü×)Ñ)¨RÓ/Ü" 9Ó-	Ü)›^×9Ñ9
ä—K’K 	Ñ 6¸AÑ>ˆ	ØÐùò Cs   ŠB&)r¥   rª   r®   r£   rb   r©   rŸ   rã   )r%   r&   r'   r(   r   ÚdictÚstrr   re   r.   r   r¸   r   r-   rË   ÚnpÚarrayrß   r*   rò   rÒ   rê   rä   rë   r   rŒ   rn   r  r/   r’   r“   s   @r1   r   r   I  s  ø† ð!
˜zð !
¸¸SÀ%¸ZÑ8H÷ !
ðF $ t¨C¡y¡/ð °d¸3±iô ð-°4¸±<ð -ÀEÈ&ÐRXÈ.ÑDYô -ð" Ø$*ð Ø:>¸v¹,ð ØQVÐWY×W_ÑW_ÑQ`ð à	ˆc6ˆkÑ	ô ðD<à#Ÿl™lð<ð ˜%Ÿ,™,Ñ'ð<ð r—x‘x‘ð	<ð
 ð<ð 
ˆc5—<‘<ÐÑ	 ô<ò|2ò-ð"¨E¯L©Lð "¸U¿\¹\ô "ð"5!à—‘ð5!ð ð	5!ð
 ð5!ð "'ð5!ð 
‰ô5!ðz DHñ5à#Ÿl™lð5ð $Ÿl™lð5ð ˜%Ÿ,™,Ñ'ð	5ð
 ˜5Ÿ<™<Ñ(ð5ð  (¨¨S°%·,±,Ð->Ñ(?Ñ@ð5ð 
ˆc5—<‘<ÐÑ	 õ5ðn¨%¯,©,ð ÀÇÁð ÐQV×Q]ÑQ]÷ ò r0   r   c                   óf   ^ • \ rS rSrSrU 4S jrS\R                  S\R                  4S jrSr	U =r
$ )ÚEomtPatchEmbeddingsi  zÜ
This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
`hidden_states` (patch embeddings) of shape `(batch_size, seq_length, hidden_size)` to be consumed by a
Transformer.
c                 óÈ  >• [         TU ]  5         UR                  UR                  p2UR                  UR
                  pT[        U[        R                  R                  5      (       a  UOX"4n[        U[        R                  R                  5      (       a  UOX34nUS   US   -  US   US   -  -  nX l        X0l        X@l        X`l
        [        R                  " XEX3S9U l        g )Nr   r   ©Úkernel_sizeÚstride)rd   re   Ú
image_sizeÚ
patch_sizeÚnum_channelsÚhidden_sizeÚ
isinstanceÚcollectionsÚabcÚIterableÚnum_patchesr   ÚConv2dÚ
projection)rg   rž   r/  r0  r1  r2  r7  rh   s          €r1   re   ÚEomtPatchEmbeddings.__init__ˆ  sÃ   ø€ Ü‰ÑÔØ!'×!2Ñ!2°F×4EÑ4EJØ$*×$7Ñ$7¸×9KÑ9Kkä#-¨j¼+¿/¹/×:RÑ:R×#SÑ#S‘ZÐZdÐYqˆ
Ü#-¨j¼+¿/¹/×:RÑ:R×#SÑ#S‘ZÐZdÐYqˆ
Ø! !‘}¨
°1©Ñ5¸*ÀQ¹-È:ÐVWÉ=Ñ:XÑYˆØ$ŒØ$ŒØ(ÔØ&ÔäŸ)š) LÈ:Ñiˆr0   Úpixel_valuesr4   c                 óÚ   • UR                   S   nX R                  :w  a  [        SU R                   SU S35      eU R                  U5      R	                  S5      R                  SS5      nU$ )Nr   zoMake sure that the channel dimension of the pixel values match with the one set in the configuration. Expected z	 but got Ú.r6   )rQ   r1  rf   r9  rF   rÕ   )rg   r;  r1  Ú
embeddingss       r1   rŒ   ÚEomtPatchEmbeddings.forward—  sx   € Ø#×)Ñ)¨!Ñ,ˆØ×,Ñ,Ó,ÜðØ!×.Ñ.Ð/¨y¸¸ÀaðIóð ð —_‘_ \Ó2×:Ñ:¸1Ó=×GÑGÈÈ1ÓMˆ
ØÐr0   )r/  r1  r7  r0  r9  )r%   r&   r'   r(   r)   re   r*   r   rŒ   r/   r’   r“   s   @r1   r*  r*    s.   ø† ñõjð E§L¡Lð °U·\±\÷ ò r0   r*  c                   ór   ^ • \ rS rSrSrS\SS4U 4S jjrS\R                  S\R                  4S jr	S	r
U =r$ )
ÚEomtEmbeddingsi¢  zE
Construct the CLS token, mask token, position and patch embeddings.
rž   r4   Nc                 óÀ  >• [         TU ]  5         Xl        UR                  U l        [        R
                  " [        R                  " SSUR                  5      5      U l	        [        R
                  " [        R                  " SUR                  UR                  5      5      U l        [        U5      U l        U R                  R                  n[        R                   " UR"                  5      U l        SUR                  -   U l        [        R(                  " X!R                  5      U l        U R-                  S[        R.                  " U5      R1                  S5      SS9  g )Nr   Úposition_ids)r   rD   F)Ú
persistent)rd   re   rž   r0  r   Ú	Parameterr*   Úrandnr2  Ú	cls_tokenr¾   Únum_register_tokensÚregister_tokensr*  Úpatch_embeddingsr7  ÚDropoutÚhidden_dropout_probÚdropoutÚnum_prefix_tokensÚ	EmbeddingÚposition_embeddingsr§   r  Úexpand)rg   rž   r7  rh   s      €r1   re   ÚEomtEmbeddings.__init__§  sö   ø€ Ü‰ÑÔàŒØ ×+Ñ+ˆŒäŸš¤e§k¢k°!°Q¸×8JÑ8JÓ&KÓLˆŒÜ!Ÿ|š|¬E¯KªK¸¸6×;UÑ;UÐW]×WiÑWiÓ,jÓkˆÔä 3°FÓ ;ˆÔØ×+Ñ+×7Ñ7ˆÜ—z’z &×"<Ñ"<Ó=ˆŒØ!" V×%?Ñ%?Ñ!?ˆÔÜ#%§<¢<°×=OÑ=OÓ#PˆÔ Ø×Ñ˜^¬U¯\ª\¸+Ó-F×-MÑ-MÈgÓ-VÐchÐÒir0   r;  c                 ó¶  • UR                   u  n    nU R                  R                  R                  R                  nU R                  UR                  US95      nU R                  R                  USS5      nU R                  R                  USS5      nXPR                  U R                  5      -   n[        R                  " XgU/SS9nU R                  U5      nU$ )Nrq   rD   r   r  )rQ   rJ  r9  rÎ   rr   ru   rG  rQ  rI  rP  rC  r*   rÓ   rM  )rg   r;  r   rÄ   Útarget_dtyper>  Ú
cls_tokensrI  s           r1   rŒ   ÚEomtEmbeddings.forward·  sÂ   € Ø*×0Ñ0Ñˆ
Aq˜!Ø×,Ñ,×7Ñ7×>Ñ>×DÑDˆØ×*Ñ*¨<¯?©?À¨?Ð+NÓOˆ
à—^‘^×*Ñ*¨:°r¸2Ó>ˆ
Ø×.Ñ.×5Ñ5°jÀ"ÀbÓIˆà×":Ñ":¸4×;LÑ;LÓ"MÑMˆ
Ü—Y’Y 
¸ZÐHÈaÑPˆ
à—\‘\ *Ó-ˆ
àÐr0   )rG  rž   rM  rN  rJ  r0  rP  rI  )r%   r&   r'   r(   r)   r   re   r*   r   rŒ   r/   r’   r“   s   @r1   rA  rA  ¢  s>   ø† ñðj˜zð j¨d÷ jð  E§L¡Lð °U·\±\÷ ò r0   rA  ÚmoduleÚqueryr  r  Úattention_maskÚscalingrM  c                 ó°  • [         R                  " XR                  SS5      5      U-  nUb  X„-   n[        R                  R                  US[         R                  S9R                  UR                  5      n[        R                  R                  X†U R                  S9n[         R                  " Xƒ5      n	U	R                  SS5      R                  5       n	X˜4$ )NrD   éþÿÿÿ)r8   rr   )ÚpÚtrainingr   r6   )r*   rG   rÕ   r   r:   rt   Úfloat32ru   rr   rM  r^  Ú
contiguous)
rW  rX  r  r  rY  rZ  rM  r>   Úattn_weightsÚattn_outputs
             r1   Úeager_attention_forwardrc  Ç  s°   € ô —<’< §}¡}°R¸Ó'<Ó=ÀÑG€LØÑ!Ø#Ñ4ˆä—=‘=×(Ñ(¨¸2ÄUÇ]Á]Ð(ÐS×VÑVÐW\×WbÑWbÓc€LÜ—=‘=×(Ñ(¨È6Ï?É?Ð(Ð[€Lä—,’,˜|Ó3€KØ×'Ñ'¨¨1Ó-×8Ñ8Ó:€KàÐ$Ð$r0   c            
       ó®   ^ • \ rS rSrSrU 4S jr S	S\R                  S\\R                     S\	\R                  \\R                     4   4S jjr
SrU =r$ )
ÚEomtAttentioniÞ  z=Multi-headed attention from 'Attention Is All You Need' paperc                 ó   >• [         TU ]  5         Xl        UR                  U l        UR
                  U l        U R                  U R                  -  U l        U R                  U R                  -  U R                  :w  a&  [        SU R                   SU R                   S35      eU R                  S-  U l	        UR                  U l        SU l        [        R                  " U R                  U R                  5      U l        [        R                  " U R                  U R                  5      U l        [        R                  " U R                  U R                  5      U l        [        R                  " U R                  U R                  5      U l        g )Nz;embed_dim must be divisible by num_heads (got `embed_dim`: z and `num_heads`: z).g      à¿F)rd   re   rž   r2  Ú	embed_dimÚnum_attention_headsÚ	num_headsÚhead_dimrf   ÚscaleÚattention_dropoutrM  Ú	is_causalr   ÚLinearÚk_projÚv_projÚq_projÚout_proj©rg   rž   rh   s     €r1   re   ÚEomtAttention.__init__á  s  ø€ Ü‰ÑÔØŒØ×+Ñ+ˆŒØ×3Ñ3ˆŒØŸ™¨$¯.©.Ñ8ˆŒØ=‰=˜4Ÿ>™>Ñ)¨T¯^©^Ó;ÜØMÈdÏnÉnÐM]ð ^Ø—N‘NÐ# 2ð'óð ð —]‘] DÑ(ˆŒ
Ø×/Ñ/ˆŒØˆŒä—i’i §¡°·±Ó?ˆŒÜ—i’i §¡°·±Ó?ˆŒÜ—i’i §¡°·±Ó?ˆŒÜŸ	š	 $§.¡.°$·.±.ÓAˆr0   r!   rY  r4   c                 ó2  • UR                   u  pEnU R                  U5      nU R                  U5      nU R                  U5      n	UR	                  XEU R
                  U R                  5      R                  SS5      nUR	                  XEU R
                  U R                  5      R                  SS5      nU	R	                  XEU R
                  U R                  5      R                  SS5      n	[        n
U R                  R                  S:w  a  [        U R                  R                     n
U
" U UUU	UU R                  U R                  U R                  (       d  SOU R                  S9u  p¼UR!                  XEU5      R#                  5       nU R%                  U5      nX¼4$ )z#Input shape: Batch x Time x Channelr   r6   Úeagerç        )rm  rZ  rM  )rQ   rq  ro  rp  r
  ri  rj  rÕ   rc  rž   Ú_attn_implementationr   rm  rk  r^  rM  Úreshaper`  rr  )rg   r!   rY  r>   r   Ú
seq_lengthrg  ÚqueriesÚkeysÚvaluesÚattention_interfacerb  ra  s                r1   rŒ   ÚEomtAttention.forwardõ  sS  € ð -:×,?Ñ,?Ñ)ˆ
 	à—+‘+˜mÓ,ˆØ{‰{˜=Ó)ˆØ—‘˜]Ó+ˆà—,‘,˜z°t·~±~ÀtÇ}Á}ÓU×_Ñ_Ð`aÐcdÓeˆØy‰y˜°·±ÀÇÁÓO×YÑYÐZ[Ð]^Ó_ˆØ—‘˜Z°T·^±^ÀTÇ]Á]ÓS×]Ñ]Ð^_ÐabÓcˆä(?ÐØ;‰;×+Ñ+¨wÓ6Ü"9¸$¿+¹+×:ZÑ:ZÑ"[Ðá$7ØØØØØØ—n‘nØ—J‘JØ#Ÿ}Ÿ}‘C°$·,±,ñ	%
Ñ!ˆð "×)Ñ)¨*À)ÓL×WÑWÓYˆØ—m‘m KÓ0ˆàÐ(Ð(r0   )rž   rM  rg  rj  rm  ro  ri  rr  rq  rk  rp  rã   )r%   r&   r'   r(   r)   re   r*   r   r   r-   rŒ   r/   r’   r“   s   @r1   re  re  Þ  s[   ø† ÙGõBð. 26ñ$)à—|‘|ð$)ð ! §¡Ñ.ð$)ð
 
ˆu|‰|˜X e§l¡lÑ3Ð3Ñ	4÷$)ó $)r0   re  c                   óf   ^ • \ rS rSrSU 4S jjrS\R                  S\R                  4S jrSrU =r	$ )ÚEomtLayerScalei  r4   c                 ó®   >• [         TU ]  5         [        R                  " UR                  [
        R                  " UR                  5      -  5      U l        g rã   )	rd   re   r   rE  Úlayerscale_valuer*   r¦   r2  Úlambda1rs  s     €r1   re   ÚEomtLayerScale.__init__  s8   ø€ Ü‰ÑÔÜ—|’| F×$;Ñ$;¼e¿jºjÈ×I[ÑI[Ó>\Ñ$\Ó]ˆr0   Úhidden_statec                 ó   • XR                   -  $ rã   ©r„  ©rg   r†  s     r1   rŒ   ÚEomtLayerScale.forward!  s   € ØŸl™lÑ*Ð*r0   rˆ  ©r4   N©
r%   r&   r'   r(   re   r*   r   rŒ   r/   r’   r“   s   @r1   r  r    s)   ø† ÷^ð+ E§L¡Lð +°U·\±\÷ +ò +r0   r  ÚinputÚ	drop_probr^  c                 ó  • US:X  d  U(       d  U $ SU-
  nU R                   S   4SU R                  S-
  -  -   nU[        R                  " X@R                  U R
                  S9-   nUR                  5         U R                  U5      U-  nU$ )a*  
Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
argument.
rw  r   r   )r   r¼   )rQ   Úndimr*   rv   rr   rn   Úfloor_Údiv)r  rŽ  r^  Ú	keep_probrQ   Úrandom_tensorÚoutputs          r1   Ú	drop_pathr–  %  s   € ð CÓžxØˆØI‘€IØ[‰[˜‰^Ð ¨¯
©
°Q©Ñ 7Ñ7€EØ¤§
¢
¨5¿¹ÈEÏLÉLÑ YÑY€MØ×ÑÔØY‰YyÓ! MÑ1€FØ€Mr0   c                   óŠ   ^ • \ rS rSrSrSS\\   SS4U 4S jjjrS\R                  S\R                  4S jr
S\4S	 jrS
rU =r$ )ÚEomtDropPathi9  zXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).NrŽ  r4   c                 ó.   >• [         TU ]  5         Xl        g rã   )rd   re   rŽ  )rg   rŽ  rh   s     €r1   re   ÚEomtDropPath.__init__<  s   ø€ Ü‰ÑÔØ"r0   r!   c                 óB   • [        XR                  U R                  5      $ rã   )r–  rŽ  r^  ©rg   r!   s     r1   rŒ   ÚEomtDropPath.forward@  s   € Ü˜¯©¸¿¹ÓFÐFr0   c                 ó    • SU R                    3$ )Nzp=©rŽ  ©rg   s    r1   Ú
extra_reprÚEomtDropPath.extra_reprC  s   € ØD—N‘NÐ#Ð$Ð$r0   rŸ  rã   )r%   r&   r'   r(   r)   r   r   re   r*   r   rŒ   r&  r¡  r/   r’   r“   s   @r1   r˜  r˜  9  sQ   ø† Ùbñ# (¨5¡/ð #¸T÷ #ð #ðG U§\¡\ð G°e·l±lô Gð%˜C÷ %ò %r0   r˜  c                   óf   ^ • \ rS rSrSU 4S jjrS\R                  S\R                  4S jrSrU =r	$ )ÚEomtMLPiG  r4   c                 óz  >• [         TU ]  5         UR                  =p#[        UR                  UR                  -  5      n[
        R                  " X$SS9U l        [        UR                  [        5      (       a  [        UR                     U l        OUR                  U l        [
        R                  " XCSS9U l        g )NT©Úbias)rd   re   r2  r   Ú	mlp_ratior   rn  Úfc1r3  Ú
hidden_actr&  r
   Ú
activationÚfc2©rg   rž   Úin_featuresÚout_featuresÚhidden_featuresrh   s        €r1   re   ÚEomtMLP.__init__H  s   ø€ Ü‰ÑÔØ%+×%7Ñ%7Ð7ˆÜ˜f×0Ñ0°6×3CÑ3CÑCÓDˆÜ—9’9˜[ÀÑEˆŒÜf×'Ñ'¬×-Ñ-Ü$ V×%6Ñ%6Ñ7ˆDOà$×/Ñ/ˆDŒOÜ—9’9˜_ÀÑFˆr0   r†  c                 ól   • U R                  U5      nU R                  U5      nU R                  U5      nU$ rã   )r©  r«  r¬  r‰  s     r1   rŒ   ÚEomtMLP.forwardS  s2   € Ø—x‘x Ó-ˆØ—‘ |Ó4ˆØ—x‘x Ó-ˆØÐr0   )r«  r©  r¬  r‹  rŒ  r“   s   @r1   r¤  r¤  G  s)   ø† ÷	Gð E§L¡Lð °U·\±\÷ ò r0   r¤  c                   óf   ^ • \ rS rSrSU 4S jjrS\R                  S\R                  4S jrSrU =r	$ )ÚEomtSwiGLUFFNiZ  r4   c                 ó$  >• [         TU ]  5         UR                  =p#[        UR                  UR                  -  5      n[        US-  S-  5      S-   S-  S-  n[
        R                  " USU-  SS9U l        [
        R                  " XCSS9U l        g )Nr6   r	   é   é   Tr¦  )	rd   re   r2  r   r¨  r   rn  Ú
weights_inÚweights_outr­  s        €r1   re   ÚEomtSwiGLUFFN.__init__[  s†   ø€ Ü‰ÑÔØ%+×%7Ñ%7Ð7ˆÜ˜f×0Ñ0°6×3CÑ3CÑCÓDˆÜ˜°Ñ2°QÑ6Ó7¸!Ñ;ÀÑAÀAÑEˆäŸ)š) K°°_Ñ1DÈ4ÑPˆŒÜŸ9š9 _ÈÑNˆÕr0   r†  c                 ó®   • U R                  U5      nUR                  SSS9u  p#[        R                  R	                  U5      U-  nU R                  U5      $ )Nr6   rD   r  )r¹  Úchunkr   r:   Úsilurº  )rg   r†  Úx1Úx2Úhiddens        r1   rŒ   ÚEomtSwiGLUFFN.forwardd  sQ   € Ø—‘ |Ó4ˆØ×#Ñ# A¨2Ð#Ð.‰ˆÜ—‘×#Ñ# BÓ'¨"Ñ,ˆØ×Ñ Ó'Ð'r0   )r¹  rº  r‹  rŒ  r“   s   @r1   rµ  rµ  Z  s)   ø† ÷Oð( E§L¡Lð (°U·\±\÷ (ò (r0   rµ  c                   óÞ   ^ • \ rS rSrSrS\SS4U 4S jjr  SS\R                  S\	\R                     S	\
S\\\R                  \R                  4   \\R                     4   4S
 jjrSrU =r$ )Ú	EomtLayerik  zCThis corresponds to the Block class in the original implementation.rž   r4   Nc                 ó  >• [         TU ]  5         [        R                  " UR                  UR
                  S9U l        [        U5      U l        [        U5      U l
        UR                  S:”  a  [        UR                  5      O[        R                  " 5       U l        [        R                  " UR                  UR
                  S9U l        UR                   (       a  [#        U5      U l        O['        U5      U l        [        U5      U l        g )N©Úepsrw  )rd   re   r   Ú	LayerNormr2  Úlayer_norm_epsÚnorm1re  Ú	attentionr  Úlayer_scale1Údrop_path_rater˜  ÚIdentityr–  Únorm2Úuse_swiglu_ffnrµ  Úmlpr¤  Úlayer_scale2rs  s     €r1   re   ÚEomtLayer.__init__n  s¼   ø€ Ü‰ÑÔä—\’\ &×"4Ñ"4¸&×:OÑ:OÑPˆŒ
Ü& vÓ.ˆŒÜ*¨6Ó2ˆÔØ@F×@UÑ@UÐX[Ó@[œ f×&;Ñ&;Ô<Ôac×alÒalÓanˆŒä—\’\ &×"4Ñ"4¸&×:OÑ:OÑPˆŒ
à× × Ü$ VÓ,ˆDHä˜v“ˆDŒHÜ*¨6Ó2ˆÕr0   r!   Ú	head_maskÚoutput_attentionsc                 ó>  • U R                  U R                  U5      UUS9nUS   nU R                  U5      nUSS  nU R                  U5      U-   nU R	                  U5      nU R                  U5      nU R                  U5      nU R                  U5      U-   nU4U-   nU$ )N)rÕ  r   r   )rË  rÊ  rÌ  r–  rÏ  rÑ  rÒ  )rg   r!   rÔ  rÕ  Úself_attention_outputsÚattention_outputÚoutputsÚlayer_outputs           r1   rŒ   ÚEomtLayer.forward~  s¿   € ð "&§¡ØJ‰J}Ó%ØØ/ð "0ð "
Ðð
 2°!Ñ4Ðà×,Ñ,Ð-=Ó>ÐØ(¨¨Ð,ˆð Ÿ™Ð'7Ó8¸=ÑHˆð —z‘z -Ó0ˆØ—x‘x Ó-ˆØ×(Ñ(¨Ó6ˆð —~‘~ lÓ3°mÑCˆà/ GÑ+ˆàˆr0   )rË  r–  rÌ  rÒ  rÑ  rÊ  rÏ  )NF)r%   r&   r'   r(   r)   r   re   r*   r   r   r¿   r   r-   rŒ   r/   r’   r“   s   @r1   rÄ  rÄ  k  sƒ   ø† ÙMð3˜zð 3¨d÷ 3ð& -1Ø"'ñ	à—|‘|ðð ˜EŸL™LÑ)ðð  ð	ð
 
ˆuU—\‘\ 5§<¡<Ð/Ñ0°%¸¿¹Ñ2EÐEÑ	F÷ó r0   rÄ  c                   óf   ^ • \ rS rSrSU 4S jjrS\R                  S\R                  4S jrSrU =r	$ )ÚEomtLayerNorm2diž  c                 ó"   >• [         TU ]  XUS9  g )N)rÇ  Úelementwise_affine)rd   re   )rg   r1  rÇ  Úaffinerh   s       €r1   re   ÚEomtLayerNorm2d.__init__Ÿ  s   ø€ Ü‰Ñ˜À6ÐÒJr0   r†  r4   c                 óØ   • UR                  SSSS5      n[        R                  " XR                  U R                  U R
                  U R                  5      nUR                  SSSS5      nU$ )Nr   r6   r	   r   )ÚpermuteÚFÚ
layer_normÚnormalized_shaperÎ   r§  rÇ  r‰  s     r1   rŒ   ÚEomtLayerNorm2d.forward¢  s`   € Ø#×+Ñ+¨A¨q°!°QÓ7ˆÜ—|’| L×2GÑ2GÈÏÉÐVZ×V_ÑV_Ðae×aiÑaiÓjˆØ#×+Ñ+¨A¨q°!°QÓ7ˆØÐr0   r$   )gíµ ÷Æ°>TrŒ  r“   s   @r1   rÝ  rÝ  ž  s)   ø† ÷Kð E§L¡Lð °U·\±\÷ ò r0   rÝ  c                   ój   ^ • \ rS rSrS\4U 4S jjrS\R                  S\R                  4S jr	Sr
U =r$ )ÚEomtScaleLayeri©  rž   c           	      óü   >• [         TU ]  5         UR                  n[        R                  " X"SSS9U l        [        UR                     U l        [        R                  " UUSSUSS9U l
        [        U5      U l        g )Nr6   r,  r	   r   F)r-  ÚpaddingÚgroupsr§  )rd   re   r2  r   ÚConvTranspose2dÚconv1r
   rª  r«  r8  Úconv2rÝ  Úlayernorm2d©rg   rž   r2  rh   s      €r1   re   ÚEomtScaleLayer.__init__ª  ss   ø€ Ü‰ÑÔØ×(Ñ(ˆÜ×'Ò'¨ÈaÐXYÑZˆŒ
Ü  ×!2Ñ!2Ñ3ˆŒÜ—Y’YØØØØØØñ
ˆŒ
ô +¨;Ó7ˆÕr0   r!   r4   c                 óŽ   • U R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU$ rã   )rî  r«  rï  rð  rœ  s     r1   rŒ   ÚEomtScaleLayer.forwardº  sB   € ØŸ
™
 =Ó1ˆØŸ™¨Ó6ˆØŸ
™
 =Ó1ˆØ×(Ñ(¨Ó7ˆØÐr0   )r«  rî  rï  rð  )r%   r&   r'   r(   r   re   r*   ry   r   rŒ   r/   r’   r“   s   @r1   ré  ré  ©  s/   ø† ð8˜z÷ 8ð  U§\¡\ð °e·l±l÷ ò r0   ré  c                   ój   ^ • \ rS rSrS\4U 4S jjrS\R                  S\R                  4S jrSr	U =r
$ )ÚEomtScaleBlockiÂ  rž   c                 óÜ   >• [         TU ]  5         UR                  U l        [        R
                  " [        U R                  5       Vs/ sH  n[        U5      PM     sn5      U l        g s  snf rã   )	rd   re   Únum_upscale_blocksÚ
num_blocksr   Ú
ModuleListrs   ré  Úblock©rg   rž   rÄ   rh   s      €r1   re   ÚEomtScaleBlock.__init__Ã  sM   ø€ Ü‰ÑÔØ ×3Ñ3ˆŒÜ—]’]ÄEÈ$Ï/É/ÔDZÓ#[ÑDZ¸q¤N°6Ö$:ÑDZÑ#[Ó\ˆ
ùÒ#[s   ÁA)r!   r4   c                 ó<   • U R                    H  nU" U5      nM     U$ rã   )rû  )rg   r!   rû  s      r1   rŒ   ÚEomtScaleBlock.forwardÈ  s    € Ø—Z”ZˆEÙ! -Ó0ŠMñ  àÐr0   )rû  rù  ©r%   r&   r'   r(   r   re   r*   r   rŒ   r/   r’   r“   s   @r1   rö  rö  Â  s1   ø† ð]˜z÷ ]ð
 U§\¡\ð °e·l±l÷ ò r0   rö  c                   ój   ^ • \ rS rSrS\4U 4S jjrS\R                  S\R                  4S jrSr	U =r
$ )ÚEomtMaskHeadiÎ  rž   c                 ó  >• [         TU ]  5         UR                  n[        R                  " X"5      U l        [        R                  " X"5      U l        [        R                  " X"5      U l        [        UR                     U l
        g rã   )rd   re   r2  r   rn  r©  r¬  Úfc3r
   rª  r«  rñ  s      €r1   re   ÚEomtMaskHead.__init__Ï  s[   ø€ Ü‰ÑÔà×(Ñ(ˆÜ—9’9˜[Ó6ˆŒÜ—9’9˜[Ó6ˆŒÜ—9’9˜[Ó6ˆŒÜ  ×!2Ñ!2Ñ3ˆr0   r!   r4   c                 ó¨   • U R                  U R                  U5      5      nU R                  U R                  U5      5      nU R                  U5      nU$ rã   ©r«  r©  r¬  r  rœ  s     r1   rŒ   ÚEomtMaskHead.forwardØ  sD   € ØŸ™¨¯©°Ó(?Ó@ˆØŸ™¨¯©°Ó(?Ó@ˆØŸ™ Ó/ˆØÐr0   r  r   r“   s   @r1   r  r  Î  s/   ø† ð4˜z÷ 4ð U§\¡\ð °e·l±l÷ ò r0   r  c                   ód   • \ rS rSr% Sr\\S'   SrSrSr	S/r
SrSrS	\R                  S
S4S jrSrg)ÚEomtPreTrainedModeliß  zz
An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
models.
rž   Úeomtr;  FrÄ  TrW  r4   Nc                 óD  • U R                   R                  n[        U[        R                  [        R
                  [        R                  45      (       aÃ  [        R                  R                  UR                  [        R                  " S5      S9  UR                  by  [        R                  R                  UR                  5      u  p4US:”  a  S[        R                  " U5      -  OSn[        R                  R                  UR                  U* U5        g g [        U[        R                  5      (       aJ  UR                  R                   R#                  S5        UR                  R                   R%                  5         g [        U[        R&                  5      (       ad  UR                  R                   R)                  SSS9  UR*                  b2  UR                  R                   UR*                     R%                  5         g g [        U[,        5      (       aL  [/        US5      (       a:  UR0                  R                   R#                  U R                   R2                  5        g g [        U[4        5      (       a§  [        R                  R7                  UR8                  R                   R;                  [<        R>                  5      SUS9R;                  UR8                  R@                  5      UR8                  l        URB                  R                   R%                  5         g g )	Né   )Úar   r   r7   rw  )r™   Ústdr„  )"rž   Úinitializer_ranger3  r   rn  r8  rí  ÚinitÚkaiming_uniform_rÎ   ÚmathÚsqrtr§  Ú_calculate_fan_in_and_fan_outÚuniform_rÈ  ÚdataÚfill_Úzero_rO  Únormal_Úpadding_idxr  Úhasattrr„  rƒ  rA  Útrunc_normal_rG  ru   r*   r_  rr   rI  )rg   rW  r  Úfan_inrÄ   Úbounds         r1   Ú_init_weightsÚ!EomtPreTrainedModel._init_weightsî  s!  € Øk‰k×+Ñ+ˆÜfœrŸy™y¬"¯)©)´R×5GÑ5GÐH×IÑIÜG‰G×$Ñ$ V§]¡]´d·i²iÀ³lÐ$ÑCØ{‰{Ñ&ÜŸG™G×AÑAÀ&Ç-Á-ÓP‘	Ø17¸!³˜œDŸIšI fÓ-Ò-ÀÜ—‘× Ñ  §¡¨u¨f°eÕ<ð 'ô ˜¤§¡×-Ñ-ØM‰M×Ñ×$Ñ$ SÔ)ØK‰K×Ñ×"Ñ"Õ$Ü˜¤§¡×-Ñ-ØM‰M×Ñ×&Ñ&¨C°QÐ&Ñ7Ø×!Ñ!Ñ-Ø—‘×"Ñ" 6×#5Ñ#5Ñ6×<Ñ<Õ>ð .ä˜¤×/Ñ/Üv˜y×)Ñ)Ø—‘×#Ñ#×)Ñ)¨$¯+©+×*FÑ*FÕGð *ä˜¤×/Ñ/Ü$&§G¡G×$9Ñ$9Ø× Ñ ×%Ñ%×(Ñ(¬¯©Ó7¸cÀsð %:ð %ç‰b×!Ñ!×'Ñ'Ó(ð ×ÑÔ!ð ×"Ñ"×'Ñ'×-Ñ-Õ/ð	 0r0   r$   )r%   r&   r'   r(   r)   r   r,   Úbase_model_prefixÚmain_input_nameÚsupports_gradient_checkpointingÚ_no_split_modulesÚ_supports_sdpaÚ_supports_flash_attnr   ÚModuler   r/   r$   r0   r1   r
  r
  ß  sJ   ‡ ñð
 ÓØÐØ$€OØ&+Ð#Ø$˜ÐØ€NØÐð0 B§I¡Ið 0°$÷ 0r0   r
  zV
    The EoMT Model with head on top for instance/semantic/panoptic segmentation.
    c                   ó:  ^ • \ rS rSrSrS\SS4U 4S jjrS\S\S	\S
\S\\	\4   S\\	\4   4S jr
S\\	\4   S\4S jr\\     SS\S	\\\      S
\\\      S\\   S\\   S\\\      S\4S jj5       5       rS rS\R                  4S jr\S 5       rSrU =r$ )ÚEomtForUniversalSegmentationi  r;  rž   r4   Nc                 óÜ  >• [         TU ]  U5        Xl        UR                  U l        [	        U5      U l        [        R                  " UR                  UR                  S9U l
        [        R                  " UR                  UR                  5      U l        [        R                  " [        UR                  5       Vs/ sH  n[!        U5      PM     sn5      U l        [%        U5      U l        [)        U5      U l        [        R,                  " UR                  UR.                  S-   5      U l        UR2                  UR4                  -  UR2                  UR4                  -  4U l        UR8                  UR:                  UR<                  S.U l        [A        XR>                  S9U l!        U RE                  S[F        RH                  " URJ                  5      5        U RM                  5         g s  snf )NrÆ  r   )rÐ   rè   ré   )rž   rŸ   Úattn_mask_probs)'rd   re   rž   Únum_hidden_layersrA  r>  r   rÈ  r2  rÉ  Ú	layernormrO  r×   rX  rú  rs   rÄ  Úlayersrö  Úupscale_blockr  Ú	mask_headrn  r£   Úclass_predictorr/  r0  Ú	grid_sizer«   r­   r¬   rŸ   r   rV   r§   r*   r¦   rù  Ú	post_initrü  s      €r1   re   Ú%EomtForUniversalSegmentation.__init__  sr  ø€ Ü‰Ñ˜Ô ØŒØ!'×!9Ñ!9ˆÔÜ(¨Ó0ˆŒÜŸš f×&8Ñ&8¸f×>SÑ>SÑTˆŒä—\’\ &×"4Ñ"4°f×6HÑ6HÓIˆŒ
Ü—m’mÄÀf×F^ÑF^Ô@_Ó$`Ñ@_¸1¤Y¨vÖ%6Ñ@_Ñ$`ÓaˆŒä+¨FÓ3ˆÔÜ% fÓ-ˆŒä!Ÿyšy¨×);Ñ);¸V×=NÑ=NÐQRÑ=RÓSˆÔà ×+Ñ+¨v×/@Ñ/@Ñ@À&×BSÑBSÐW]×WhÑWhÑBhÐiˆŒà"(×"5Ñ"5Ø×+Ñ+Ø×+Ñ+ñ.
ˆÔô "¨×=MÑ=MÑNˆŒà×ÑÐ.´·
²
¸6×;LÑ;LÓ0MÔNà‰Õùò% %as   Â=G)r   r   rj   rk   r  c                 óº   • U R                  UUUUUS9nU R                  R                  5        H)  u  pxUR                  5        H  u  pšXy;   d  M  X¨-  n
M     M+     U$ )N©r   r   rj   rk   r  )rV   rŸ   r  )rg   r   r   rj   rk   r  r  r  rÎ   Úloss_keyr   s              r1   Úget_loss_dictÚ*EomtForUniversalSegmentation.get_loss_dict+  sj   € ð (,§~¡~Ø!5Ø!5Ø#Ø%Ø"7ð (6ð (
ˆ	ð  ×+Ñ+×1Ñ1Ö3‰KˆCØ"+§/¡/Ö"3‘Ø•?Ø‘N’Dó #4ñ 4ð
 Ðr0   r  c                 ó4   • [        UR                  5       5      $ rã   )rI   r}  )rg   r  s     r1   Úget_lossÚ%EomtForUniversalSegmentation.get_lossC  s   € Ü9×#Ñ#Ó%Ó&Ð&r0   Úoutput_hidden_statesrÕ  r#   c           
      ó  • Ub  UOU R                   R                  nUb  UOU R                   R                  nU(       a  SOSnU(       a  SOSnSu  pšSnUc  [        S5      eU R	                  U5      n[        U R                  5       GHì  u  pÞU(       a  X|4-  nXÐR                  U R                   R                  -
  :X  am  U R                  R                  SSS2SS24   R                  UR                  S   SS5      R                  UR                  5      n[        R                   " Xü4SS9nXÐR                  U R                   R                  -
  :¼  Gaÿ  U R"                  (       d7  U R$                  XÐR                  -
  U R                   R                  -      S:”  Ga·  U R'                  U5      nU R)                  U5      u  nnU	U4-  n	U
U4-  n
[        R*                  " UR                  S   UR                  S   UR                  S   UR                  [        R,                  S	9n[.        R0                  " UU R2                  S
S9nUR5                  UR7                  S5      UR7                  S5      S5      nU R                   R8                  nUU R                  R:                  -   nUS:„  USS2SU2US24'   U R=                  UU R$                  XÐR                  -
  U R                   R                  -      UUUR                  S9nUSS2SS4   R                  SU R                   R>                  SS5      nURA                  5       RC                  U) S5      nU" XËU5      nUS   nU(       d  GMã  UUS   4-  nGMï     U R'                  U5      nU(       a  UU4-  nU R)                  U5      u  nnU	U4-  n	U
U4-  n
SnUbA  Ub>  Sn[E        Xš5       H-  u  nnU RG                  UUUUSS9nUU RI                  U5      -  nM/     [K        UUUUUUUS9$ )a(  
mask_labels (`list[torch.Tensor]`, *optional*):
    list of mask labels of shape `(num_labels, height, width)` to be fed to a model
class_labels (`list[torch.LongTensor]`, *optional*):
    list of target class labels of shape `(num_labels, height, width)` to be fed to a model. They identify the
    labels of `mask_labels`, e.g. the label of `mask_labels[i][j]` if `class_labels[i][j]`.
patch_offsets (`list[torch.Tensor]`, *optional*):
    list of tuples indicating the image index and start and end positions of patches for semantic segementation.
Nr$   )r$   r$   z You have to specify pixel_valuesr   rD   r   r  )rn   rr   Úbilinear)ÚsizeÚmode)ÚprobÚnum_query_tokensÚencoder_start_tokensrn   .g    eÍÍÁrw  r7  )r   r   r   r    r!   r"   r#   )&rž   r>  rÕ  rf   r>  r²   r/  r-  rù  rX  rÎ   rQ  rQ   ru   rn   r*   rÓ   r^  r,  r.  Úpredictr¦   r¿   rä  Úinterpolater3  r
  rA  r×   rN  Ú_disable_attention_maskrh  r   Úmasked_fillrÀ   r9  r<  r   )rg   r;  rj   rk   r>  rÕ  r#   Úall_hidden_statesÚall_attentionsÚmasks_queries_logits_per_layerÚclass_queries_logits_per_layerrY  r!   rØ   Úlayer_modulerX  Únorm_hidden_statesr   r   Úinterpolated_logitsrD  rE  Úlayer_outputsÚsequence_outputr   r  s                             r1   rŒ   Ú$EomtForUniversalSegmentation.forwardF  s9  € ð* %9Ñ$DÑ È$Ï+É+×JjÑJjð 	ð 2CÑ1NÑ-ÐTX×T_ÑT_×TqÑTqÐæ"6™B¸DÐÞ0™°dˆàIOÑFÐ&ØˆàÑÜÐ?Ó@Ð@àŸ™¨Ó5ˆä!*¨4¯;©;×!7ÑˆCÞ#Ø!Ð%5Ñ5Ð!à×,Ñ,¨t¯{©{×/EÑ/EÑEÓEØŸ
™
×)Ñ)¨$²²1¨*Ñ5×<Ñ<¸]×=PÑ=PÐQRÑ=SÐUWÐY[Ó\×_Ñ_Ð`m×`tÑ`tÓuÜ %§	¢	¨5Ð*@ÀaÑ Hà×,Ñ,¨t¯{©{×/EÑ/EÑEÔEØ—— ×!5Ñ!5°c×<RÑ<RÑ6RÐUY×U`ÑU`×UkÑUkÑ6kÑ!lÐopÔ!pà%)§^¡^°MÓ%BÐ"Ø=A¿\¹\ÐJ\Ó=]Ñ:Ð$Ð&:à.Ð3GÐ2IÑIÐ.Ø.Ð3GÐ2IÑIÐ.ä!&§¢Ø!×'Ñ'¨Ñ*Ø!×'Ñ'¨Ñ*Ø!×'Ñ'¨Ñ*Ø(×/Ñ/ÜŸ*™*ñ"ô '(§m¢mÐ4HÈtÏ~É~ÐdnÑ&oÐ#Ø&9×&>Ñ&>Ø'×,Ñ,¨QÓ/Ð1D×1IÑ1IÈ!Ó1LÈbó'Ð#ð $(§;¡;×#:Ñ#:Ð Ø'7¸$¿/¹/×:[Ñ:[Ñ'[Ð$ð ObÐdeÑNešqÐ"3Ð#3Ð"3Ð5IÑ5JÐJÑKð "&×!=Ñ!=Ø"Ø×-Ñ-¨c×4JÑ4JÑ.JÈTÏ[É[×McÑMcÑ.cÑdØ%5Ø)=Ø)×0Ñ0ð ">ð "ð "0²°4¸°Ñ!=×!DÑ!DÀRÈÏÉ×IhÑIhÐjlÐnpÓ!qØ!/×!5Ñ!5Ó!7×!CÑ!CÀ^ÀOÐUYÓ!Zá(¨ÐHYÓZˆMØ)¨!Ñ,ˆMç Ñ Ø =°Ñ#3Ð"5Ñ5“ñk "8ðn Ÿ.™.¨Ó7ˆÞØ /Ð!3Ñ3Ðà59·\±\À/Ó5RÑ2ÐÐ2Ø&Ð+?Ð*AÑAÐ&Ø&Ð+?Ð*AÑAÐ&àˆØÑ" |Ñ'?ØˆDÜ>AØ.ö?Ñ:Ð$Ð&:ð !×.Ñ.Ø)=Ø)=Ø +Ø!-Ø*.ð /ð 	ð ˜Ÿ™ iÓ0Ñ0’ñ?ô 2ØØ!5Ø!5Ø-Ø+Ø%Ø'ñ
ð 	
r0   c                 ó.   • U R                   R                  $ rã   )r>  rJ  r   s    r1   Úget_input_embeddingsÚ1EomtForUniversalSegmentation.get_input_embeddingsÂ  s   € Ø‰×/Ñ/Ð/r0   rå   c                 óÌ  • US S 2S U R                   R                  2S S 24   nU R                  U5      nUS S 2U R                   R                  U R                  R                  -   S 2S S 24   nUR                  SS5      nUR                  " UR                  S   S/U R                  Q76 nU R                  U5      nU R                  U5      n[        R                  " SX$5      nXS4$ )Nr   r6   r   rD   zbqc, bchw -> bqhw)rž   r×   r2  r>  rN  rÕ   ry  rQ   r3  r1  r0  r*   Úeinsum)rg   rå   Úquery_tokensÚclass_logitsÚprefix_tokensÚmask_logitss         r1   rF  Ú$EomtForUniversalSegmentation.predictÅ  sÑ   € ØšaÐ!: 4§;¡;×#:Ñ#:Ð!:ºAÐ=Ñ>ˆØ×+Ñ+¨LÓ9ˆàšq $§+¡+×"9Ñ"9¸D¿O¹O×<]Ñ<]Ñ"]Ñ"_ÒabÐbÑcˆØ%×/Ñ/°°1Ó5ˆà%×-Ò-¨m×.AÑ.AÀ!Ñ.DÀbÐZÈ4Ï>É>ÒZˆà—~‘~ lÓ3ˆØ×*Ñ*¨=Ó9ˆä—l’lÐ#6¸ÓTˆàÐ(Ð(r0   c                 ó~   • US:  a6  [         R                  " U R                  S   X$S9U:„  nSU S S 2S U2US 24   U'   U $ )Nr   r   rm   )r*   rv   rQ   )Ú	attn_maskrC  rD  rE  rn   Úrandom_queriess         r1   rH  Ú4EomtForUniversalSegmentation._disable_attention_maskÕ  sT   € à!‹8ä"ŸZšZ¨	¯©¸Ñ(:Ð<LÑ\Ð_cÑcˆNð VWˆI’aÐ*Ð*Ð*Ð,@Ñ,AÐAÑBÀ>ÑRàÐr0   )r2  rž   rV   r>  r3  r.  r/  r1  r-  rX  r0  rŸ   )NNNNN)r%   r&   r'   r(   r#  r   re   r   r%  r&  r9  r<  r   r   r   r.   r¿   r   rŒ   rU  r*   rF  ÚstaticmethodrH  r/   r’   r“   s   @r1   r*  r*    sQ  ø† ð %€Oð˜zð ¨d÷ ð8à$ðð %ðð ð	ð
 ðð  $ C¨ KÑ0ðð 
ˆc6ˆkÑ	ôð0' $ s¨F {Ñ"3ð '¸ô 'ð Øð /3Ø/3Ø/3Ø,0Ø04ñx
àðx
ð ˜d 6™lÑ+ðx
ð ˜t F™|Ñ,ð	x
ð
 ' t™nðx
ð $ D™>ðx
ð    V¡Ñ-ðx
ð 
,ôx
ó ó ðx
òt0ð)˜eŸl™lô )ð  ñó ör0   r*  )F)rw  )rw  F)FÚcollections.abcr4  r  Údataclassesr   Útypingr   r   r   Únumpyr'  r*   Útorch.nn.functionalr   r:   rä  r   Úactivationsr
   Ú
file_utilsr   r   r   Úmodeling_layersr   Úmodeling_utilsr   r   Úutilsr   r   r   Úconfiguration_eomtr   Úscipy.optimizer   Ú
accelerater   Úaccelerate.utilsr   r   r@   rL   r[   r(  r]   r   r—   r›   r   r*  rA  r   rc  re  r  r¿   r–  r˜  r¤  rµ  rÄ  rÈ  rÝ  ré  rö  r  r
  r*  Ú__all__r$   r0   r1   Ú<module>rr     s  ðó, Û Ý !ß ,Ñ ,ã Û ß Ð ß å !ß LÑ LÝ 9ß Fß NÑ NÝ *ñ ×ÑÝ4á×ÑÝ'Ý'ð Ùðñ	ô7¨ó 7ó	ó ð7ðB LQñØ—L‘LðØ5:·\±\ðà
‡\\õð@ ð °ð ¸6ô ð,°·±ð ÀuÇ|Á|ð ÐX]×XdÑXdô ô8g˜2Ÿ9™9ô gðTfð  fð ¸ð Àô ð< u§|¡|ð ¸U¿\¹\ð ÐVYð Ð^c×^jÑ^jô ô(uˆry‰yô uôp	˜"Ÿ)™)ô ôB"R—Y‘Yô "ðX ñ%ØI‰Ið%à<‰<ð%ð 
‰ð%ð <‰<ð	%ð
 ˜UŸ\™\Ñ*ð%ð ð%ð õ%ô.;)B—I‘Iô ;)ô|+R—Y‘Yô +ñU—\‘\ð ¨eð ÀTð ÐV[×VbÑVbõ ô(%2—9‘9ô %ôˆbi‰iô ô&(B—I‘Iô (ô"0Ð*ô 0ôfb—l‘lô ôR—Y‘Yô ô2	R—Y‘Yô 	ô2—9‘9ô ð" ô$0˜/ó $0ó ð$0ñN ðñô
RÐ#6ó Róð
Rðj !Ð"@Ð
Ar0   