
    <ha|              
          S r SSKrSSKrSSKrSSKJrJr  SSKJr  SSK	J
r
Jr  SSKJrJrJrJrJr  SSKJr  SS	KJrJr  \\\\\4      \\\\\\4      \\\\\4         \\\\\\4         4   r " S
 S\SS9r " S S\SS9r " S S\SS9r " S S\5      rS\\\\\4   S\S\\\4   4S jr S\S\S\4S jr!S r"S r#S r$SS jr%S/r&g) zProcessor class for KOSMOS-2.    N)OptionalUnion   )BatchFeature)
ImageInput
is_batched)ImagesKwargsProcessingKwargsProcessorMixin
TextKwargsUnpack)
AddedToken)BatchEncoding	TextInputc                   L    \ rS rSr% \\\      \S'   \\   \S'   \\   \S'   Sr	g)Kosmos2ImagesKwargs%   bboxesnum_image_tokensfirst_image_token_id N)
__name__
__module____qualname____firstlineno__r   listfloat__annotations__int__static_attributes__r       f/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/kosmos2/processing_kosmos2.pyr   r   %   s%    T%[!!sm#"3-'r!   r   F)totalc                   &    \ rS rSr% \\   \S'   Srg)Kosmos2TextKwargs+   add_eos_tokenr   N)r   r   r   r   r   boolr   r    r   r!   r"   r%   r%   +   s    D>!r!   r%   c            
       L    \ rS rSr% \\S'   \\S'   SSSSSSSSSS.	SS	0S
.rSrg)Kosmos2ProcessorKwargs/   text_kwargsimages_kwargsTFr   )	add_special_tokenspaddingstridereturn_overflowing_tokensreturn_special_tokens_maskreturn_offsets_mappingreturn_token_type_idsverboser'   r   @   )r,   r-   r   N)	r   r   r   r   r%   r   r   	_defaultsr    r   r!   r"   r*   r*   /   sC    ""&& #').*/&+%*"

 
Ir!   r*   c                   |  ^  \ rS rSrSrSS/rSrSrSU 4S jjr    SS\	S	\
\\\   4   S
\\   S\4S jjrS rS r   SS\
\\\   4   S\	S\S\\   S\
\\\   4   4
S jjrS rS rSS jrSS jr\S 5       rS	\S\
\\\      \\\      4   S\4S jr S\
\\\4   \\\\\4   4   S\\\4   4S jr!Sr"U =r#$ ) Kosmos2ProcessorD   a  
Constructs an KOSMOS-2 processor which wraps a KOSMOS-2 image processor and a KOSMOS-2 tokenizer into a single
processor.

[`Kosmos2Processor`] offers all the functionalities of [`CLIPImageProcessor`] and some functionalities of
[`XLMRobertaTokenizerFast`]. See the docstring of [`~Kosmos2Processor.__call__`] and [`~Kosmos2Processor.decode`]
for more information.

Args:
    image_processor (`CLIPImageProcessor`):
        An instance of [`CLIPImageProcessor`]. The image processor is a required input.
    tokenizer (`XLMRobertaTokenizerFast`):
        An instance of ['XLMRobertaTokenizerFast`]. The tokenizer is a required input.
    num_patch_index_tokens (`int`, *optional*, defaults to 1024):
        The number of tokens that represent patch indices.
image_processor	tokenizer)CLIPImageProcessorCLIPImageProcessorFastAutoTokenizerc                   > SUl         SU l        SU l        SU l        SU l        SU l        SU l        SU l        S	U l        S
U l	        SU l
        SU l        U R                  U R                  U R                  U R                  U R
                  U R                  U R                  U R                  U R                  U R                  U R                  /U l        X0l        [        U R                  5       Vs/ sH!  nS[        U5      R!                  S5       S3PM#     nn/ nU R                  U-    H  nUR#                  [%        USSSS95        M      UR'                  U5        [(        T	U ]U  X5        g s  snf )NFz</doc>z<image>z</image>z</chunk>z</line>z<phrase>z	</phrase>z<object>z	</object></delimiter_of_multi_objects/>z<grounding><patch_index_   >T)lstriprstrip
normalized)r4   	eod_token	boi_token	eoi_token	eoc_token	eol_token	bop_token	eop_token	boo_token	eoo_token	dom_token	grd_token
tag_tokensnum_patch_index_tokensrangestrzfillappendr   
add_tokenssuper__init__)
selfr;   r<   rT   kwargsxpatch_index_tokenstokens_to_addtoken	__class__s
            r"   r[   Kosmos2Processor.__init__Z   sE   */	'!"##"#$#$9& NNNNNNNNNNNNNNNNNNNNNN
 '=#JOPTPkPkJlmJlQc!fll1o->a@Jlm__'99E  E$uY^!_` :]+4 ns   3'E0imagestextr]   returnc           
      
   Uc  Uc  [        S5      eU R                  " [        4SU R                  R                  0UD6nUS   R                  SS5      nUS   R                  SS5      nUS   R                  SS5      n	US	   R                  S
S5      n
US	   S   nUS	   S   nUS	   R                  SS5      n[        5       nUb'  U R                  " U40 US   D6nUR                  U5        Ub  U R                  X!XxS9nU(       au  U
(       dn  [        U[        5      (       a  U R                  R                   U 3nO>[        U[        5      (       a)  U Vs/ sH  nU R                  R                   U 3PM     nnUS	   S   =(       a    U
US	   S'   Uc  UOSUS	   S'   Uc  UOSUS	   S'   U R                  " SSU0US	   D6nUR                  U5        XS	   S'   XS	   S'   XS	   S'   UGbI  UGbE  U	c  U R                  R                  S-   n	Un[!        U5      S-   n[        [#        XU-   5      5      nS/S/U-  -   S/-   n/ n/ nUS   n[        U[        5      (       a  U/nUS   /US'   U Hw  nUSU U-   UUU-   S -   nUR%                  U5        [&        R&                  " U5      nU(       a  S/U-   nUS/[)        U5      [)        U5      -
  -  -  nUR%                  U5        My     [        U[        5      (       Ga  [+        [-        WR.                  5       VVs/ sH  u  nnU[)        U5      4PM     snnS S9nUS   u  nnUS   u  nnUS	   S   =(       a    U
US	   S'   SUS	   S'   U R                  " SSUU   /0US	   D6n[)        UR.                  S   5      n UU :w  GaC  U R                  R0                  S:X  a  U Vs/ sH,  nUU R                  R2                  /U [)        U5      -
  -  -   PM.     nnU Vs/ sH  nUS/U [)        U5      -
  -  -   PM     nnUS    Vs/ sH  nUS/U [)        U5      -
  -  -   PM     snUS'   OU R                  R0                  S:X  a  U Vs/ sH,  nU R                  R2                  /U [)        U5      -
  -  U-   PM.     nnU Vs/ sH  nS/U [)        U5      -
  -  U-   PM     nnUS    Vs/ sH  nS/U [)        U5      -
  -  U-   PM     snUS'   [        U[        5      (       a  Uc  US   nUS   S   US'   US   nUR                  [5        UUS   US.US95        U$ s  snf s  snnf s  snf s  snf s  snf s  snf s  snf s  snf )a  
This method uses [`CLIPImageProcessor.__call__`] method to prepare image(s) for the model, and
[`XLMRobertaTokenizerFast.__call__`] to prepare text for the model.

Please refer to the docstring of the above two methods for more information.

The rest of this documentation shows the arguments specific to `Kosmos2Processor`.

Args:
    bboxes (`Union[list[tuple[int]], list[tuple[float]], list[list[tuple[int]]], list[list[tuple[float]]]]`, *optional*):
        The bounding bboxes associated to `texts`.
    num_image_tokens (`int`, *optional* defaults to 64):
        The number of (consecutive) places that are used to mark the placeholders to store image information.
        This should be the same as `latent_query_num` in the instance of `Kosmos2Config` you are using.
    first_image_token_id (`int`, *optional*):
        The token id that will be used for the first place of the subsequence that is reserved to store image
        information. If unset, will default to `self.tokenizer.unk_token_id + 1`.
    add_eos_token (`bool`, defaults to `False`):
        Whether or not to include `EOS` token id in the encoding when `add_special_tokens=True`.
Nz*You have to specify either images or text.tokenizer_init_kwargsr-   r   r   r6   r   r,   r'   Fr.   r/   return_tensors)r   re      r   	input_idsattention_maskc                     U S   $ Nr   )r^   s    r"   <lambda>+Kosmos2Processor.__call__.<locals>.<lambda>   s    defhdir!   )keyro   rightleft)rk   rl   image_embeds_position_mask)datatensor_typer   )
ValueError_merge_kwargsr*   r<   init_kwargspop
setdefaultr   r;   updatepreprocess_examples
isinstancerV   	bos_tokenr   unk_token_idr   rU   rX   copylensorted	enumeraterk   padding_sidepad_token_idr   )!r\   rd   re   audiovideosr]   output_kwargsr   r   r   r'   r.   r/   ri   encodingimage_encodingstext_encodingwith_bosstart_indeximage_token_idsbase_image_embeds_position_maskrk   ru   all_input_idstext_idsmaskidxr^   sorted_length_min_len_not_paddedmax_len_paddeds!                                    r"   __call__Kosmos2Processor.__call__   sh   8 >dlIJJ**"
"&.."<"<
 
 /33HdC(9==>PRTU,_=AABXZ^_%m488%P*=9:NO.y9&}5@@AQSWX>!11&[M/<Z[NOON+++D&+dD!-dC(("nn667v>Dd++FJKdt~~778<dDKm,-ABT} -()=> BHgUZM-(3OU~^cgM-()9: NNUUm8TUMOOM*=Om$%9:29m$Y/9Gm$%56 2#+'+~~'B'BQ'F$ *H h-!+K #5)=Vf?f#ghO/0cQC:J4J.JaS.P+ I)+&$[1M$$$!..67G.H-I)*)#L[1OCh{]mOmOoFpp  *yy!@A3:Ds8}s4y899*11$7 * $%% &1:=;R;R1ST1SvsAc3q6]1STZi! )6a(8%%&r*Q!-01EFX= m,-AB BFm,-=> $ `T#YK `=Q^C_ `!$]%<%<Q%?!@%7~~22g=lu$vlughQ$..*E*E)F.[^_`[aJa)b%blu	$vIc6IcAA~A'> ??Ic 3 6 JRRbIc6IcAA~A'> ??Ic6!12 44>lu$vlughdnn&A&A%BnWZ[\W]F]%^ab%blu	$vIc6IcAQC>CF#:;a?Ic 3 6 JRRbIc6IcAQC>CF#:;a?Ic6!12
 $$$)?%aL	-56F-G-J)*-G-J* OO%.*23C*D6P
 !/	 K Lj U %w66 %w66s0   #T8T=
2U<U#U&2UUUc                    Uc  g[        U[        5      (       d  [        S5      eU H  nUc  M  [        U[        5      (       d  U/nU Hq  n[        U[        5      (       aP  [	        U5      S:X  a  [        S U 5       5      (       a  M@  [	        U5      S:X  a  [        S U 5       5      (       a  Mh  [        S5      e   M     g)a  
Check `bboxes` for a single text example. It could be
    - `None`: no bounding box associated to a text.
    - A list with each element being the bounding boxes associated to one `<phrase> ... </phrase>` pair found
      in a text. This could be:
          - `None`: no bounding box associated to a `<phrase> ... </phrase>` pair.
          - A tuple of 2 integers: A single bounding box specified by patch indices.
          - A tuple of 4 float point number: A single bounding box specified by (normalized) coordinates.
          - A list containing the above 2 tuple types: Multiple bounding boxes for a
           `<phrase> ... </phrase>` pair.
Nz@`bboxes` (for a single text example) should be `None` or a list.   c              3   @   #    U H  n[        U[        5      v   M     g 7fN)r   r   .0r^   s     r"   	<genexpr>AKosmos2Processor._check_bboxes_for_single_text.<locals>.<genexpr>B  s     .S7az!S/A/A7   rC   c              3   @   #    U H  n[        U[        5      v   M     g 7fr   )r   r   r   s     r"   r   r   C  s     1XPW1*Q2F2FPWr   a'  Each element in `bboxes` (for a single text example) should be either `None`, a tuple containing 2 integers or 4 float point numbers, or a list containing such tuples. Also make sure the arguments `texts` and `bboxes` passed to `preprocess_text` are both in batches or both for a single example.)r   r   rx   tupler   all)r\   r   bboxelements       r"   _check_bboxes_for_single_text.Kosmos2Processor._check_bboxes_for_single_text)  s     >FD))_`` D|d++v!'511\Q&3.S7.S+S+SG)c1XPW1X.X.X$@    r!   c                 \    UR                  5       nUb  U SU 3nU R                  X5      nU$ )N )strip_insert_patch_index_tokens)r\   re   imager   img_info_tokenss        r"   _preprocess_single_example+Kosmos2Processor._preprocess_single_exampleL  s;    zz|%&av.D ..t<r!   textsr   r   c                 P   U R                   /U-  nSR                  U R                   /U-   U R                  /-   5      nSn[        U[        5      (       a  SnU/nUc  S/[        U5      -  nO[        U5      (       d  U/n[        U5      [        U5      :w  a$  [        S[        U5       S[        U5       S35      eU(       d  U R                  U5        U/nOMUb;  [        U[        5      (       d  [        S5      eU H  nU R                  U5        M     OS/[        U5      -  n[        U5      [        U5      :w  a$  [        S	[        U5       S[        U5       S35      e[        XU5       V	V
Vs/ sH  u  pnU R                  XX5      PM     nn
n	nU(       d  US
   nU$ s  snn
n	f )a  Add image and bounding box information to `texts` as image and patch index tokens.

Args:
    texts (`Union[TextInput, list[TextInput]]`): The texts to be processed.
    images (`ImageInput`, *optional*): The images associated to `texts`.
    bboxes (`Union[list[tuple[int]], list[tuple[float]], list[list[tuple[int]]], list[list[tuple[float]]]]`, *optional*):
        The bounding bboxes associated to `texts`.
    num_image_tokens (`int`, *optional*, defaults to 64):
        The number of image tokens (used as latent queries). This should corresponds to the `latent_query_num`
        attribute in `Kosmos2Config`.

Returns:
    `Union[TextInput, list[TextInput]]`: The processed texts with image and patch index tokens.
r   TFNzGThe number of examples in `texts` and `images` should be the same. Got  v.s. 	 instead.zS`bboxes` should be `None` or a list (as a batch) when `texts` is passed as a batch.zGThe number of examples in `texts` and `bboxes` should be the same. Got r   )rI   joinrJ   r   rV   r   r   rx   r   r   zipr   )r\   r   rd   r   r   
img_tokensr   batchedr^   re   r   r   results                r"   r~   $Kosmos2Processor.preprocess_examplesV  s   , nn%(88
((DNN#3j#@DNNCS#ST eS!!GGE>Vc%j(FF##XFu:V$YZ]^cZdYeeklopvlwkx  yB  C  ..v6XFfd++ !vww2215  Vc%j(Fv;#e*$YZ]^cZdYeeklopvlwkx  yB  C  &)%?
%?!T ++DO%? 	 

 AYF
s   1F!c                 :    U R                   R                  " U0 UD6$ )z
This method forwards all its arguments to PreTrainedTokenizer's [`~PreTrainedTokenizer.batch_decode`]. Please
refer to the docstring of this method for more information.
)r<   batch_decoder\   argsr]   s      r"   r   Kosmos2Processor.batch_decode  s    
 ~~**D;F;;r!   c                 :    U R                   R                  " U0 UD6$ )z
This method forwards all its arguments to PreTrainedTokenizer's [`~PreTrainedTokenizer.decode`]. Please refer to
the docstring of this method for more information.
)r<   decoder   s      r"   r   Kosmos2Processor.decode  s    
 ~~$$d5f55r!   c                 f    UR                  U R                  5      S   nU(       a  [        U5      $ U$ rn   )splitrJ   +clean_text_and_extract_entities_with_bboxes)r\   re   cleanup_and_extractcaptions       r"   post_process_generation(Kosmos2Processor.post_process_generation  s,    **T^^,R0>wGGr!   c                 t    U R                   " U4SU0UD6nU Vs/ sH  oPR                  USS9PM     sn$ s  snf )ah  
Post-process the output of the model to decode the text.

Args:
    generated_outputs (`torch.Tensor` or `np.ndarray`):
        The output of the model `generate` function. The output is expected to be a tensor of shape `(batch_size, sequence_length)`
        or `(sequence_length,)`.
    skip_special_tokens (`bool`, *optional*, defaults to `True`):
        Whether or not to remove special tokens in the output. Argument passed to the tokenizer's `batch_decode` method.
    **kwargs:
        Additional arguments to be passed to the tokenizer's `batch_decode method`.

Returns:
    `list[str]`: The decoded text.
skip_special_tokensF)r   )r   r   )r\   generated_outputsr   r]   generated_textsre   s         r"   post_process_image_text_to_text0Kosmos2Processor.post_process_image_text_to_text  sJ      ++,=qSfqjpqZijZiRV,,Tu,MZijjjs   5c                     U R                   R                  nU R                  R                  n[        [        R                  X-   5      5      $ r   )r<   model_input_namesr;   r   dictfromkeys)r\   tokenizer_input_namesimage_processor_input_namess      r"   r   "Kosmos2Processor.model_input_names  s>     !% @ @&*&:&:&L&L#DMM"7"UVWWr!   c                    Ub  [        U5      S:X  a  U$ [        [        R                  " SUS95      n[        U5      [        U5      :w  a$  [	        S[        U5       S[        U5       S35      eSn/ n[        X25       H  u  pgUR                  5       u  pUR                  XU	 5        U	nUc  M1  [        U[        5      (       a  U/n/ n
[        S U 5       5      (       d  [	        S5      eU H,  nU R                  U5      u  pU
R                  U S	U 35        M.     [        U
5      S:X  a  M  S
R                  U
5      nUR                  SU S35        M     U[        U5      :  a  UR                  XS  5        SR                  U5      nU$ )Nr   z<phrase>.+?</phrase>)stringzuThe number of elements in `bboxes` should be the same as the number of `<phrase> ... </phrase>` pairs in `text`. Got r   r   c              3   &   #    U H  oS Lv   M
     g 7fr   r   )r   boxs     r"   r   >Kosmos2Processor._insert_patch_index_tokens.<locals>.<genexpr>  s     7$3$$s   zTThe multiple bounding boxes for a single phrase should not contain any `None` value.r   z  </delimiter_of_multi_objects/> z	<object> z
 </object> )r   r   refinditerrx   r   spanrX   r   r   r   #_convert_bbox_to_patch_index_tokensr   )r\   re   r   matched_phrasescurr_posbuffermatchedr   r   endpatch_index_stringsr   patch_index_1patch_index_2position_strs                  r"   r   +Kosmos2Processor._insert_patch_index_tokens  s   >S[A-Kr{{+B4PQ3v;. H  IL  M\  I]  H^  ^d  eh  io  ep  dq  qz  {   9MG\\^FAMM$,-H|$&&v"$7$777 j  /3/W/WX[/\,#**m_Am_+MN  &'1,=BBCVWLMMIl^:>?/ :2 c$iMM$y/*wwvr!   r   c                    [        U5      S:X  a  Uu  p#O6[        [        R                  " U R                  5      5      n[        X5      u  p#S[        U5      R                  S5       S3nS[        U5      R                  S5       S3nXV4$ )Nr   rB   rC   rD   )r   r   mathsqrtrT   coordinate_to_patch_indexrV   rW   )r\   r   idx_1idx_2num_patches_per_sidetoken_1token_2s          r"   r   4Kosmos2Processor._convert_bbox_to_patch_index_tokens  s     t9>LE5 $'tyy1L1L'M#N 4TPLE!#e*"2"21"5!6a8!#e*"2"21"5!6a8r!   )rI   rO   rM   rQ   rK   rH   rJ   rL   rP   rN   rR   rT   rS   )i   )NNNN)NNr6   )T)$r   r   r   r   __doc__
attributesimage_processor_classtokenizer_classr[   r   r   r   r   r   r*   r   r   r   r   	BboxInputr   r   rV   r~   r   r   r   r   propertyr   r   r   r   r   r    __classcell__)rb   s   @r"   r9   r9   D   s   " $[1JL%O+5^ "26`` ItI./` /0` 
`D!F " *,@YY/0@ @ 	@
 #3-@ 
sDI~	@F<6k& X X
+s +E$uSzBRTXY^_dYeTfBf<g +lo +Z %S/5ue1K+LLM 	sCx   r!   r9   r   r   rf   c                 $   U u  p#pEXB:  a  XS:  d  [        S5      e[        R                  " X!-  5      n[        R                  " X1-  5      n[        R                  " XA-  S-
  5      n[        R                  " XQ-  S-
  5      n	Xq-  U-   n
X-  U-   nX4$ )a  Convert a bounding box to a pair of patch indices.

Args:
    bbox (`tuple[float, float, float, float]`):
        The 4 coordinates of the bounding box, with the format being (x1, y1, x2, y2) specifying the upper-left and
        lower-right corners of the box. It should have x2 > x1 and y2 > y1.
    num_patches_per_side (`int`): the number of patches along each side.

Returns:
    `tuple[int, int]`: A pair of patch indices representing the upper-left patch and lower-right patch.
zTThe coordinates in `bbox` should be `(x1, y1, x2, y2)` with `x2 > x1` and `y2 > y1`.rj   )rx   r   floorceil)r   r   x1y1x2y2ul_xul_ylr_xlr_yul_idxlr_idxs               r"   r   r     s     RRGopp::b/0D::b/0D99R.23D99R.23D(4/F(4/F>r!   r  r  c                     SU-  nX-  nX-  nX-  nX-  nX:X  a  XC-  nXS-  n	Xc-  U-   n
Xs-  U-   nOIXF:X  d  XW:X  a  XC-  nXS-  n	Xc-  U-   n
Xs-  U-   nO(XC-  US-  -   nXS-  US-  -   n	Xc-  US-  -   n
Xs-  US-  -   nXX4$ )ak  
Given a grid of length `num_patches_per_side` and the indices of the upper-left and lower-right corners of a
bounding box, returns the normalized coordinates of the bounding box, in the form (x1, y1, x2, y2).

Args:
    ul_idx (`int`): the index of the grid cell that corresponds to the upper-left corner of the bounding box.
    lr_idx (`int`): the index of the grid cell that corresponds to the lower-right corner of the bounding box.
    num_patches_per_side (`int`): the number of patches along each side.

Returns:
    `tuple[float]`: the normalized coordinates of the bounding box, in the form (x1, y1, x2, y2).
g      ?r   r   )r  r  r   	cell_sizer  r  r	  r
  r  r  r  r  s               r"   patch_index_to_coordinater  &  s     **I (D)D(D)D 	)	)		)	)	A-	A-	A-	A-2>r!   c           
      h   Sn[         R                  " X5      n/ nU GH  nUR                  S5      nUR                  5       u  pgnU(       d*  SnUR                  S5      S   UR                  S5      S   4nUR	                  S5      n	/ n
U	 H  n[         R
                  " SU5      n[         R
                  " SUSS 5      nU(       d  M=  U(       d  MF  U(       aE  U
R                  [        UR                  S5      5      [        UR                  S5      5      45        M  U
R                  [        UR                  S5      5      [        UR                  S5      5      45        M     U(       a  UR                  XuU
45        GMd  U
 H&  nSUS    S	US    S
3nUR                  XU/45        M(     GM     U$ )a  Extract entities contained in `text`. The bounding bboxes is given in the form of patch indices.

This functioin is only intended to be used within `clean_text_and_extract_entities_with_bboxes` where further
processing happens, including converting to normalized coordinates and whitespace character cleaning up.

Examples:

```python
>>> text = "<grounding> An image of<phrase> a snowman</phrase><object><patch_index_0044><patch_index_0863></object> warming himself by<phrase> a fire</phrase><object><patch_index_0005><patch_index_0911></object>."
>>> entities = extract_entities_with_patch_indices(text)
>>> entities
[(' a snowman', (31, 41), [(44, 863)]), (' a fire', (130, 137), [(5, 911)])]
```z(?:(<phrase>([^<]+)</phrase>))?<object>((?:<patch_index_\d+><patch_index_\d+></delimiter_of_multi_objects/>)*<patch_index_\d+><patch_index_\d+>)</object>r   Nr   rA   z<patch_index_(\d+)>rj   rB   z><patch_index_rD   )	r   r   r   groupsr   searchrX   r   group)re   patternmatchesentities_with_patch_indicesmatchr   
phrase_tagphrasematch_contentpatch_index_pairsentity_bboxespairr^   yr   entitys                   r"   #extract_entities_with_patch_indicesr   S  s~    kG kk'(G #%zz!},1LLN)
MFJJqM!$ejjmA&67D *//0PQ%D		0$7A		0$qr(;AqQQ!((#aggaj/3qwwqz?)KL!((#aggaj/3qwwqz?)KL & '..m/LM%(a	QyJ+22F4&3IJ &7 @ '&r!   c           	          U u  nu  p4[        [        R                  " SSUSU 5      5      n[        [        R                  " SSUSU 5      5      nX%U44nU$ )zfAdjust the positions of the entities in `text` to be relative to the text with special fields removed.<.*?>r   N)r   r   sub)r  re   entity_namestartr   adjusted_startadjusted_endadjusted_entitys           r"   adjust_entity_positionsr)    s[     &K%T&5\:;Nrvvgr4:67L"\$BCOr!   c                    U R                  5       n[        U 5      [        U R                  5       5      -
  n/ nU H  u  nu  pgn[        U5      [        UR                  5       5      -
  n	[        U5      [        UR                  5       5      -
  n
Xc-
  U	-   nXs-
  U
-
  nUR                  5       nUR	                  XVU4U45        M     X$4$ )z9Remove the spaces around the text and the entities in it.)r   r   rE   rF   rX   )re   entitiesnew_textleading_spacesnew_entitiesr$  r%  r   r   entity_name_leading_spacesentity_name_trailing_spacess              r"   _cleanup_spacesr1    s    zz|HYT[[]!33NL-5)\e6%(%5K<N<N<P8Q%Q"&)+&6[=O=O=Q9R&R#&)CC"%@@!'')[#,?@ .6 !!r!   c           	         [         R                  " SSU 5      n[        U 5      n/ nU HM  nUSS US   pv[        X`5      nU V	s/ sH  n	[	        U	S   U	S   U5      PM     n
n	UR                  X4-   5        MO     [        X$5      $ s  sn	f )ay  Remove the tag tokens from `text`, extract entities in it with some cleaning up of white characters.

Examples:

```python
>>> text = "<grounding> An image of<phrase> a snowman</phrase><object><patch_index_0044><patch_index_0863></object> warming himself by<phrase> a fire</phrase><object><patch_index_0005><patch_index_0911></object>."
>>> clean_text, entities = clean_text_and_extract_entities_with_bboxes(text)
>>> clean_text
'An image of a snowman warming himself by a fire.'

>>> entities
[('a snowman', (12, 21), [(0.390625, 0.046875, 0.984375, 0.828125)]), ('a fire', (41, 47), [(0.171875, 0.015625, 0.484375, 0.890625)])]
```r"  r   r   r   rj   )r   r#  r   r)  r  rX   r1  )re   r   processed_textr  r+  itemr  r   r(  r   bboxes_in_coordss              r"   r   r     s     VVGR.N"Ed"KH+aDG1&?jpqjpbf5d1gtAwH\]jpq*==> , >44	 rs   B)    )'r   r   r   r   typingr   r   image_processing_utilsr   image_utilsr   r   processing_utilsr	   r
   r   r   r   tokenization_utilsr   tokenization_utils_baser   r   r   r   r   r   r   r   r%   r*   r9   r   r  r   r)  r1  r   __all__r   r!   r"   <module>r>     s7   $   	 " 2 1 b b , ? sCxueUE)	*+eCHo	eE5%'(	)*,	(,e ("
% "-U *@ ~ @ FE%u*D$E ]` ejknpskset >(c (3 (c (Z7't"*5: 
r!   