
    <h                          S SK JrJr  S SKrSSKJr  SSKJrJ	r	  SSK
JrJrJrJrJr  SSKJrJr   " S S	\5      r " S
 S\SS9r " S S\5      rS/rg)    )OptionalUnionN   )BatchFeature)
ImageInputmake_nested_list_of_images)AudioKwargsImagesKwargsProcessingKwargsProcessorMixinUnpack)PreTokenizedInput	TextInputc                   f    \ rS rSr% \\   \S'   \\   \S'   \\   \S'   \\   \S'   \\   \S'   Sr	g)	Gemma3nImagesKwargs   do_pan_and_scanpan_and_scan_min_crop_sizepan_and_scan_max_num_crops"pan_and_scan_min_ratio_to_activatedo_convert_rgb N)
__name__
__module____qualname____firstlineno__r   bool__annotations__intfloat__static_attributes__r       f/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/gemma3n/processing_gemma3n.pyr   r      s4    d^# (- (-(07TN"r"   r   c                   6    \ rS rSr% \\S'   \\S'   SSS00rSrg)	Gemma3nProcessorKwargs"   audio_kwargsimages_kwargstext_kwargspaddingFr   N)	r   r   r   r   r	   r   r   	_defaultsr!   r   r"   r#   r%   r%   "   s"    &&u
Ir"   r%   F)totalc                     ^  \ rS rSrSr/ SQrSrSrSr   SS\	S\	4U 4S	 jjjr
    SS
\S\\\\\   \\   4   S\\\R$                  \\   \\R$                     \\\      4      S\\   S\4
S jjrS rS r\S 5       rSrU =r$ )Gemma3nProcessor,   a,  
A processor for Gemma 3n, wrapping the full capabilities of a feature extractor, image processor, and tokenizer
into a single processor.

Args:
    feature_extractor (`Gemma3nAudioFeatureExtractor`):
        Feature extractor that converts raw audio waveforms into MEL spectrograms for the audio encoder. This
        should return a `BatchFeature` with `input_features` and `input_features_mask` features.
    image_processor (`SiglipImageProcessorFast`):
        Image processor that prepares batches of images for the vision encoder. This should return a `BatchFeature`
        with a `pixel_values` feature.
    tokenizer (`GemmaTokenizerFast`):
        The text tokenizer for the model.
    chat_template (`string`, *optional*):
        A Jinja template for generating text prompts from a set of messages.
    audio_seq_length (int, *optional*, defaults to 188):
        The number of audio soft tokens that will be added to the text prompt
    image_seq_length (int, *optional*, defaults to 256):
        The number of image soft tokens that should be added to
)feature_extractorimage_processor	tokenizerAutoFeatureExtractorAutoImageProcessorAutoTokenizeraudio_seq_lengthimage_seq_lengthc                   > XPl         UR                  U l        UR                  U l        UR                  U l        SR	                  UR                  /U-  5      nSUR                   U UR
                   S3U l        X`l        UR                  U l        UR                  U l	        UR                  U l
        SR	                  UR                  /U-  5      n	SUR                   U	 UR                   S3U l        [        T
U ]8  " SUUUUS.UD6  g )N z

)r0   r1   r2   chat_templater   )r6   audio_token_id	boa_tokenaudio_tokenjoin	eoa_tokenfull_audio_sequencer7   image_token_id	boi_tokenimage_token	eoi_tokenfull_image_sequencesuper__init__)selfr0   r1   r2   r:   r6   r7   kwargsaudio_tokens_expandedimage_tokens_expanded	__class__s             r#   rG   Gemma3nProcessor.__init__G   s    !1'66",,$00 ")>)>(?BR(R S%))*=*=)>?T>UV_ViViUjjn#o  0'66",,$00 ")>)>(?BR(R S%))*=*=)>?T>UV_ViViUjjn#o  	
/+'		

 	
r"   imagestextaudiorI   returnc           	         Uc  Uc  Uc  [        S5      eU R                  " [        4SU R                  R                  0UD6n[        U[        5      (       a  U/nO8[        U[        5      (       d#  [        US   [        5      (       d  [        S5      eUbl  U R                  " U40 US   D6nU(       d  U Vs/ sH  oR                  PM     nnU V	s/ sH(  oR                  U R                  U R                  5      PM*     nn	O0 nUb  [        U5      n
U R                  " U
40 US   D6nU(       d7  U
 Vs/ sH+  nSR                  U R                  /[!        U5      -  5      PM-     nn[!        U
5      [!        U5      :w  a$  [        S[!        U
5       S	[!        U5       S
35      eU V	s/ sH(  oR                  U R                  U R"                  5      PM*     nn	O0 nUS   R%                  SS 5      nU R                  " SSU0US   DSS0D6nU R'                  X-S/S9  US   n[(        R*                  " U5      nSXU R,                  :H  '   SXU R.                  :H  '   UR1                  5        VVs0 sH  u  nnUUR3                  5       _M     nnnUR3                  5       US'   [5        0 UEUEUEUS9$ s  snf s  sn	f s  snf s  sn	f s  snnf )Nz5Provide at least one of `text`, `images`, or `audio`.tokenizer_init_kwargsr   zAInvalid input text. Please provide a string, or a list of stringsr'   r(    z1Received inconsistently sized batches of images (z) and text (z).r)   return_tensorsrO   npimage)
modalities	input_ids   r   token_type_ids)datatensor_typer   )
ValueError_merge_kwargsr%   r2   init_kwargs
isinstancestrlistr0   r=   replacer@   r   r1   r>   rC   lenrE   pop_check_special_mm_tokensrV   
zeros_likerA   r;   itemstolistr   )rH   rN   rO   rP   videosrI   output_kwargsaudio_inputs_promptbatched_imagesimage_inputsrU   text_inputs	array_idsr[   kvs                     r#   __call__Gemma3nProcessor.__call__g   s    <FNu}TUU**"
"&.."<"<
 
 dC  6DD$''
47C0H0H`aa11%Y=;XYL278%Q((%8 ^bb]aSYNN4#3#3T5M5MN]aDbDL7?N//a-P_B`aL Q_`Q_v$"2"2!3c&k!ABQ_`>"c$i/ GNH[G\\hilmqirhssuv 
 ^bb]aSYNN4#3#3T5M5MN]aDbDL&}599:JDQnnd$d-2Nd_cd%%dWI%N  ,	y1;<D$7$778;<D$7$7781<1B1B1DE1DAq!((*}1DE(6(=(=(?$%!PK!P<!P<!P^lmmI 9 c a c Fs   7J-.J231J7&.J<*Kc                 :    U R                   R                  " U0 UD6$ )z
This method forwards all its arguments to GemmaTokenizerFast's [`~PreTrainedTokenizer.batch_decode`]. Please
refer to the docstring of this method for more information.
)r2   batch_decoderH   argsrI   s      r#   ry   Gemma3nProcessor.batch_decode   s    
 ~~**D;F;;r"   c                 :    U R                   R                  " U0 UD6$ )z
This method forwards all its arguments to GemmaTokenizerFast's [`~PreTrainedTokenizer.decode`]. Please refer to
the docstring of this method for more information.
)r2   decoderz   s      r#   r~   Gemma3nProcessor.decode   s    
 ~~$$d5f55r"   c                     U R                   R                  S/-   nU R                  R                  nU R                  R                  n[	        [
        R                  X-   U-   5      5      $ )Nr[   )r2   model_input_namesr1   r0   rc   dictfromkeys)rH   tokenizer_input_namesimage_processor_input_namesfeature_extactor_input_namess       r#   r   "Gemma3nProcessor.model_input_names   s]     $ @ @DTCU U&*&:&:&L&L#'+'='='O'O$DMM"7"UXt"tuvvr"   )
r6   r=   r;   r<   rB   r@   rE   r7   rC   rA   )N      )NNNN)r   r   r   r   __doc__
attributesfeature_extractor_classimage_processor_classtokenizer_classr   rG   r   r   r   r   rc   r   rV   ndarrayr    r   r%   r   rv   ry   r~   propertyr   r!   __classcell__)rL   s   @r#   r.   r.   ,   s   * GJ40%O  # #
 
 
 
D "^b_c>n>n I0$y/4HYCZZ[>n bjj$u+tBJJ7GdSXkIZZ[\	>n /0>n 
>nB<6 w wr"   r.   )typingr   r   numpyrV   feature_extraction_utilsr   image_utilsr   r   processing_utilsr	   r
   r   r   r   tokenization_utils_baser   r   r   r%   r.   __all__r   r"   r#   <module>r      sR     #  4 A c c C#, #-U Pw~ Pwf 
r"   