
    Ph"                         d Z ddlZddlmZmZ ddlmZ ddlmZ ddl	m
Z
mZmZ ddlmZmZmZ dd	lmZ d
dlmZ  ej*                  e      Z G d de
d      Z G d de      ZdgZy)zq
Processor class for InstructBLIP. Largely copy of Blip2Processor with addition of a tokenizer for the Q-Former.
    N)OptionalUnion   )BatchFeature)
ImageInput)ProcessingKwargsProcessorMixinUnpack)
AddedTokenPreTokenizedInput	TextInput)logging   )AutoTokenizerc            
       *    e Zd Zdddddddddd	i dZy)InstructBlipProcessorKwargsTFr   )	add_special_tokenspaddingstridereturn_overflowing_tokensreturn_special_tokens_maskreturn_offsets_mappingreturn_token_type_idsreturn_lengthverbose)text_kwargsimages_kwargsN)__name__
__module____qualname__	_defaults     r/var/www/html/saasai/venv/lib/python3.12/site-packages/transformers/models/instructblip/processing_instructblip.pyr   r   !   s0     #').*/&+%*"

 Ir#   r   F)totalc            
            e Zd ZdZg dZdZdZdZd fd	Z	 	 	 	 dde	e
   deeeee   ee   f   dee   d	efd
Zed        Z fdZe fd       Z xZS )InstructBlipProcessora  
    Constructs an InstructBLIP processor which wraps a BLIP image processor and a LLaMa/T5 tokenizer into a single
    processor.

    [`InstructBlipProcessor`] offers all the functionalities of [`BlipImageProcessor`] and [`AutoTokenizer`]. See the
    docstring of [`~BlipProcessor.__call__`] and [`~BlipProcessor.decode`] for more information.

    Args:
        image_processor (`BlipImageProcessor`):
            An instance of [`BlipImageProcessor`]. The image processor is a required input.
        tokenizer (`AutoTokenizer`):
            An instance of ['PreTrainedTokenizer`]. The tokenizer is a required input.
        qformer_tokenizer (`AutoTokenizer`):
            An instance of ['PreTrainedTokenizer`]. The Q-Former tokenizer is a required input.
        num_query_tokens (`int`, *optional*):"
            Number of tokens used by the Qformer as queries, should be same as in model's config.
    )image_processor	tokenizerqformer_tokenizer)BlipImageProcessorBlipImageProcessorFastr   c                     t        |d      s2t        ddd      | _        |j                  | j                  gd       n|j                  | _        || _        t
        |   |||       y )Nimage_tokenz<image>FT)
normalizedspecial)special_tokens)hasattrr   r.   
add_tokensnum_query_tokenssuper__init__)selfr(   r)   r*   r4   kwargs	__class__s         r$   r6   zInstructBlipProcessor.__init__J   sb    y-0))tTD  $"2"2!3D I(44D 0)5FGr#   imagestextr8   returnc                    ||t        d       | j                  t        fd| j                  j                  i|}|d   j                  dd      }i }|Kt        |t              r|g}n.t        |t              st        |d   t              st        d       | j                  |fi |d   }	|	j                  d      |d	<   |	j                  d
      |d<   |d   j                  d      |d   dxx   | j                  z  cc<    | j                  |fi |d   }
|t| j                  j                  | j                  z  }d|d   d<   d|d   d<   d|d   d<    | j                  |fi |d   }|
D ]  }|
|   D cg c]
  }||   |z    c}|
|<     |j                  |
       |' | j                  |fi |d   }|j                  |       t!        ||      }|S c c}w )a  
        This method uses [`BlipImageProcessor.__call__`] method to prepare image(s) for the model, and
        [`BertTokenizerFast.__call__`] to prepare text for the model.

        Please refer to the docstring of the above two methods for more information.
        Args:
            images (`ImageInput`):
                The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch
                tensor. Both channels-first and channels-last formats are supported.
            text (`TextInput`, `PreTokenizedInput`, `list[TextInput]`, `list[PreTokenizedInput]`):
                The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings
                (pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set
                `is_split_into_words=True` (to lift the ambiguity with a batch of sequences).
        Nz,You have to specify at least images or text.tokenizer_init_kwargsr   return_tensorsr   zAInvalid input text. Please provide a string, or a list of strings	input_idsqformer_input_idsattention_maskqformer_attention_mask
max_lengthFr   r   
truncationr   )tensor_type)
ValueError_merge_kwargsr   r)   init_kwargspop
isinstancestrlistr*   getr4   r.   contentupdater(   r   )r7   r:   r;   audiovideosr8   output_kwargsr?   encodingqformer_text_encodingtext_encodingimage_tokensimage_text_encodingksampleimage_encodings                   r$   __call__zInstructBlipProcessor.__call__T   s0   , >dlKLL***'
"&.."<"<
 
 '}599:JDQ$$vd+JtAw4L !dee$:D$:$:4$`=Q^C_$`!,A,E,Ek,RH()1F1J1JK[1\H-. ]+//=Im,\:d>S>SS:*DNN4P=3OPM!#//77$:O:OOEJm,-AB:?m,Y7=Bm,\:&4dnn\&b]S`Ea&b#&AVcdeVf'gVfF(;A(>(GVf'gM!$ 'OOM*1T11&[M/<Z[NOON+  nE (hs   :Gc                 r    | j                   j                  }| j                  j                  }ddg}||z   |z   S )NrA   rC   )r)   model_input_namesr(   )r7   tokenizer_input_namesimage_processor_input_namesqformer_input_namess       r$   r^   z'InstructBlipProcessor.model_input_names   sB     $ @ @&*&:&:&L&L#24LM$'BBEXXXr#   c                    t         j                  j                  |      rt        d| d      t        j                  |d       t         j                  j                  |d      }| j                  j                  |       d| j                  v }|r| j                  j                  d       t        |   |fi |}|r| xj                  dgz  c_        |S )NzProvided path (z#) should be a directory, not a fileT)exist_okr*   )ospathisfilerG   makedirsjoinr*   save_pretrained
attributesremover5   )r7   save_directoryr8   qformer_tokenizer_pathqformer_presentoutputsr9   s         r$   ri   z%InstructBlipProcessor.save_pretrained   s    77>>.)~.>>abcc
NT2!#n>Q!R../EF .@OO""#67').CFCOO 344Or#   c                     t        |   |fi |}t        |t              r|d   }t	        j                  |d      }||_        |S )Nr   r*   )	subfolder)r5   from_pretrainedrK   tupler   r*   )clspretrained_model_name_or_pathr8   	processorr*   r9   s        r$   rr   z%InstructBlipProcessor.from_pretrained   sP    G+,ITVT	 i'!!I)99:Wcvw&7	#r#   )N)NNNN)r   r   r    __doc__rj   image_processor_classtokenizer_classqformer_tokenizer_classr6   r   r   r   r   r   rM   r
   r   r   r\   propertyr^   ri   classmethodrr   __classcell__)r9   s   @r$   r'   r'   2   s    $ GJL%O-H (,^bA$A I0$y/4HYCZZ[A 45A 
AF Y Y&  r#   r'   )rw   rd   typingr   r   image_processing_utilsr   image_utilsr   processing_utilsr   r	   r
   tokenization_utils_baser   r   r   utilsr   autor   
get_loggerr   loggerr   r'   __all__r"   r#   r$   <module>r      sf    
 " 2 % H H O O    
		H	%"2% "IN IX #
#r#   