
    <hg2                        S r SSKJr  SSKJrJrJr  SSKJr  SSK	J
r
JrJr  SSKJrJrJrJr  SSKJrJr  SS	KJr  \(       a  SS
KJr  \R0                  " \5      rS\4S jrS r " S S\SS9r " S S\SS9r " S S\5      r S/r!g)z
Processor class for IDEFICS2.
    )
accumulate)TYPE_CHECKINGOptionalUnion   )BatchFeature)
ImageInputis_valid_image
load_image)ImagesKwargsProcessingKwargsProcessorMixinUnpack)
AddedToken	TextInput)logging)PreTokenizedInputreturnc                 R    [        U [        5      =(       a    U R                  S5      $ )Nhttp)
isinstancestr
startswith)vals    h/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/idefics2/processing_idefics2.pyis_urlr   )   s    c3:CNN6$::    c                 <    [        U 5      =(       d    [        U 5      $ N)r   r
   )elems    r   is_image_or_image_urlr!   -   s    $</>$//r   c                   &    \ rS rSr% \\   \S'   Srg)Idefics2ImagesKwargs1   image_seq_len N)__name__
__module____qualname____firstlineno__r   int__annotations____static_attributes__r&   r   r   r#   r#   1   s    C= r   r#   F)totalc                   2    \ rS rSr% \\S'   SSSS.0 S.rSrg)	Idefics2ProcessorKwargs5   images_kwargsTF)add_special_tokenspaddingis_split_into_words)text_kwargsr2   r&   N)r'   r(   r)   r*   r#   r,   	_defaultsr-   r&   r   r   r0   r0   5   s$    '' #'#(

 Ir   r0   c            
          ^  \ rS rSrSrSS/rSrSr SS\S\	\
   4U 4S	 jjjrS
 r    SS\\\\   \\\      4   S\\S\\   \S   4   S\\   S\4S jjrS rS r\S 5       rSrU =r$ )Idefics2ProcessorB   a  
Constructs a IDEFICS2 processor which wraps a LLama tokenizer and IDEFICS2 image processor into a single processor.

[`IdeficsProcessor`] offers all the functionalities of [`Idefics2ImageProcessor`] and [`LlamaTokenizerFast`]. See
the docstring of [`~IdeficsProcessor.__call__`] and [`~IdeficsProcessor.decode`] for more information.

Args:
    image_processor (`Idefics2ImageProcessor`):
        An instance of [`Idefics2ImageProcessor`]. The image processor is a required input.
    tokenizer (`PreTrainedTokenizerBase`, *optional*):
        An instance of [`PreTrainedTokenizerBase`]. This should correspond with the model's text model. The tokenizer is a required input.
    image_seq_len (`int`, *optional*, defaults to 64):
        The length of the image sequence i.e. the number of <image> tokens per image in the input.
        This parameter is used to build the string from the input prompt and image tokens and should match the
        config.perceiver_config.resampler_n_latents value for the model used.
    chat_template (`str`, *optional*): A Jinja template which will be used to convert lists of messages
        in a chat into a tokenizable string.
image_processor	tokenizerIdefics2ImageProcessorAutoTokenizerr%   chat_templatec                 J  > Uc  [        S5      eUc  [        S5      e[        US5      (       d  [        SSSS9R                  U l        [        SSSS9R                  U l        S	U R                  U R
                  /0nUR                  U5        UR                  U R
                  5      U l        O3UR                  U l        UR
                  U l        UR                  U l        [        S
SSS9U l
        UR                  S	U R                  /05        X0l        [        TU ]5  XUS9  g )Nz)You need to specify an `image_processor`.z"You need to specify a `tokenizer`.image_tokenz<fake_token_around_image>FT)
normalizedspecialz<image>additional_special_tokensz<end_of_utterance>)r?   )
ValueErrorhasattrr   contentfake_image_tokenrA   r3   convert_tokens_to_idsimage_token_idimage_boundary_tokenend_of_utterance_tokenr%   super__init__)selfr;   r<   r%   r?   kwargstokens_to_add	__class__s          r   rN   Idefics2Processor.__init__Z   s    "HIIABBy-00$./JW\fj$k$s$sD!))tT\\D84;P;PRVRbRb:cdM((7"+"A"A$BRBR"SD$-$B$BD!(44D"+":":D&01ERWae&f#$$&ADD_D_C`%ab*=Qr   c                     / nU Hn  n/ nU HR  n[        U5      (       a  UR                  U5        M&  [        U5      (       d  M8  UR                  [        U5      5        MT     UR                  U5        Mp     U$ r   )r
   appendr   r   )rO   promptsprompt_imagespromptimagesr    s         r   _extract_images_from_prompts.Idefics2Processor._extract_images_from_promptss   si    FF!$''MM$'D\\MM*T"23	 
   (  r   rY   textr   rP   r   c                    Uc  Uc  [        S5      eU R                  " [        4SU R                  R                  0UD6nUS   R                  SS5      nUb  UOU R                  nUS   R                  SS5      n/ n	0 n
UGb5  [        U[        5      (       a  U/nO8[        U[        5      (       d#  [        US   [        5      (       d  [        S	5      eU R                  nU R                  nU X-   U 3nU R                  R                  (       a
  US
-  nUS
-  n/ nU H\  nU	R                  UR                  U5      5        UR!                  X5      nUR!                  U U 3U 5      nUR                  U5        M^     U R                  " U40 US   D6nU R#                  UUS/S9  U
R%                  U5        UGb  ['        U5      (       a  U//nGO[        U[        [(        45      (       a  ['        US   5      (       a  Ub  [+        U	5      [-        U5      :w  a*  [        SW S[+        U	5       SU S[-        U5       S3	5      eS/[        [/        U	5      5      -   n[1        [-        U	5      5       Vs/ sH  nUUU   UUS-       PM     nnO^U/nOZ[        U[        [(        45      (       d?  [        US   [        [(        45      (       d!  ['        US   S   5      (       d  [        S5      eU Vs/ sH  n[-        U5      PM     nnUb  UU	:X  d  [        SU	 SU S35      eU VVs/ sH  o Vs/ sH  n[3        U5      PM     snPM     nnnU R                  " U40 US   D6nU
R%                  U5        [5        XS9$ s  snf s  snf s  snf s  snnf )a  
Processes the input prompts and returns a BatchEncoding.

Example:

```python
>>> import requests
>>> from transformers import Idefics2Processor
>>> from transformers.image_utils import load_image

>>> processor = Idefics2Processor.from_pretrained("HuggingFaceM4/idefics2-8b", image_seq_len=2)
>>> processor.image_processor.do_image_splitting = False  # Force as False to simplify the example

>>> url1 = "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
>>> url2 = "https://cdn.britannica.com/59/94459-050-DBA42467/Skyline-Chicago.jpg"

>>> image1, image2 = load_image(url1), load_image(url2)
>>> images = [[image1], [image2]]

>>> text = [
...     "<image>In this image, we see",
...     "bla bla bla<image>",
... ]
>>> outputs = processor(images=images, text=text, return_tensors="pt", padding=True)
>>> input_ids = outputs.input_ids
>>> input_tokens = processor.tokenizer.batch_decode(input_ids)
>>> print(input_tokens)
['<s><fake_token_around_image><image><image><fake_token_around_image> In this image, we see', '<s> bla bla bla<fake_token_around_image><image><image><fake_token_around_image>']
```

Args:
    images (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `list[PIL.Image.Image]`, `list[np.ndarray]`, `list[torch.Tensor]`, *optional*):
        The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch
        tensor. If is of type `list[ImageInput]`, it's assumed that this is for a single prompt i.e. of batch size 1.
    text (`Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]]`, *optional*):
        The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings
        (pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set
        `is_split_into_words=True` (to lift the ambiguity with a batch of sequences).

        Wherever an image token, `<image>` is encountered it is expanded to
        `<fake_token_around_image>` + `<image>` * `image_seq_len` * <fake_token_around_image>`.
    return_tensors (`Union[str, TensorType]`, *optional*):
        If set, will return tensors of a particular framework. See [`PreTrainedTokenizerFast.__call__`] for more
        information.

Nz+You must provide either `text` or `images`.tokenizer_init_kwargsr2   r%   r6   return_tensorsr   zAInvalid input text. Please provide a string, or a list of strings   image)
modalitieszThe total number of zP tokens in the prompts should be the same as the number of images passed. Found  z tokens and z images.   zdInvalid input images. Please provide a single image or a list of images or a list of list of images.z!The number of images in the text z and images  z should be the same.)tensor_type)rE   _merge_kwargsr0   r<   init_kwargspopr%   r   r   listrH   rA   r;   do_image_splittingrU   countreplace_check_special_mm_tokensupdater!   tuplesumlenr   ranger   r   )rO   rY   r\   audiovideosrP   output_kwargsr%   r_   n_images_in_textinputsrH   rA   	image_strprompt_stringssampletext_inputscumsum_images_in_textin_images_in_imagesimimage_inputss                         r   __call__Idefics2Processor.__call__   s   l <FNJKK**#
"&.."<"<
 

 &o6::?DQ)6)BHZHZ&}599:JDQ$$$vd++JtAw4L4L !dee  $44**K+,[-H,IJZI[\I##66%M	"N ''[(AB?+;*<=M<N(OTdSeg%%f-  ..X=;WXK)).+SZR[)\MM+&$V,,!(FT5M227LVTUY7W7W#+,F;(2;- @&&)*:&;%<Ak],WZ[aWbVcckm 
 ./C$zBR7S2T,T) "'s+;'<!=!=A 4Q7:OPQTUPU:VW!=  F
 %XF ve}55"6!9tUm<<-fQil;; z  =C!CF&#f+F!C(:>N(N 78H7IWiVjj~ 
 GMMfF7"z"~7fFM//Y-:XYLMM,'F??7  "D 8Ms$   2M:.M?$N	,N N	N	c                 :    U R                   R                  " U0 UD6$ )z
This method forwards all its arguments to LlamaTokenizerFast's [`~PreTrainedTokenizer.batch_decode`]. Please
refer to the docstring of this method for more information.
)r<   batch_decoderO   argsrP   s      r   r   Idefics2Processor.batch_decode	  s    
 ~~**D;F;;r   c                 :    U R                   R                  " U0 UD6$ )z
This method forwards all its arguments to LlamaTokenizerFast's [`~PreTrainedTokenizer.decode`]. Please refer to
the docstring of this method for more information.
)r<   decoder   s      r   r   Idefics2Processor.decode  s    
 ~~$$d5f55r   c                     U R                   R                  nU R                  R                  n[        [        R                  X-   5      5      $ r   )r<   model_input_namesr;   ri   dictfromkeys)rO   tokenizer_input_namesimage_processor_input_namess      r   r   #Idefics2Processor.model_input_names  s<     $ @ @&*&:&:&L&L#DMM"7"UVWWr   )rL   rH   r%   rA   rJ   )N@   N)NNNN)r'   r(   r)   r*   __doc__
attributesimage_processor_classtokenizer_classr+   r   r   rN   rZ   r   r	   ri   r   r   r0   r   r   r   r   propertyr   r-   __classcell__)rR   s   @r   r9   r9   B   s    & $[1J4%O hlR>ARW_`cWdR R2
 OSbfH@j$z"2Dj9I4JJKH@ I2DOTJ]E^^_H@ 01H@ 
H@T<6 X Xr   r9   N)"r   	itertoolsr   typingr   r   r   feature_extraction_utilsr   image_utilsr	   r
   r   processing_utilsr   r   r   r   tokenization_utils_baser   r   utilsr   r   
get_loggerr'   loggerboolr   r!   r#   r0   r9   __all__r&   r   r   <module>r      s    ! 1 1 4 A A  =  < 
		H	%;4 ;0!<u !
.e 
YX YXx 
r   