ó
    <±h\!  ã                   óœ   • S r SSKrSSKJr  SSKJrJr  SSKJrJ	r	J
r
  SSKJrJrJr  SS	KJr  SS
KJr   " S S\SS9r " S S\	5      rS/rg)z%
Speech processor class for Wav2Vec2
é    N)Úcontextmanager)ÚOptionalÚUnioné   )ÚProcessingKwargsÚProcessorMixinÚUnpack)Ú
AudioInputÚPreTokenizedInputÚ	TextInputé   )ÚWav2Vec2FeatureExtractor)ÚWav2Vec2CTCTokenizerc                   ó   • \ rS rSr0 rSrg)ÚWav2Vec2ProcessorKwargsé   © N)Ú__name__Ú
__module__Ú__qualname__Ú__firstlineno__Ú	_defaultsÚ__static_attributes__r   ó    Úh/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/wav2vec2/processing_wav2vec2.pyr   r      s   † ØƒIr   r   F)Útotalc            
       ó®   ^ • \ rS rSrSrSrSrU 4S jr\U 4S j5       r	    SS\
S\\\\\   \\4      S	\\   4S
 jjrS rS rS r\S 5       rSrU =r$ )ÚWav2Vec2Processoré!   aw  
Constructs a Wav2Vec2 processor which wraps a Wav2Vec2 feature extractor and a Wav2Vec2 CTC tokenizer into a single
processor.

[`Wav2Vec2Processor`] offers all the functionalities of [`Wav2Vec2FeatureExtractor`] and [`PreTrainedTokenizer`].
See the docstring of [`~Wav2Vec2Processor.__call__`] and [`~Wav2Vec2Processor.decode`] for more information.

Args:
    feature_extractor (`Wav2Vec2FeatureExtractor`):
        An instance of [`Wav2Vec2FeatureExtractor`]. The feature extractor is a required input.
    tokenizer ([`PreTrainedTokenizer`]):
        An instance of [`PreTrainedTokenizer`]. The tokenizer is a required input.
r   ÚAutoTokenizerc                 óT   >• [         TU ]  X5        U R                  U l        SU l        g )NF)ÚsuperÚ__init__Úfeature_extractorÚcurrent_processorÚ_in_target_context_manager)Úselfr$   Ú	tokenizerÚ	__class__s      €r   r#   ÚWav2Vec2Processor.__init__3   s'   ø€ Ü‰ÑÐ*Ô6Ø!%×!7Ñ!7ˆÔØ*/ˆÕ'r   c                 ó  >•  [         TU ]  " U40 UD6$ ! [        [        4 a`    [        R
                  " SU R                   S3[        5        [        R                  " U40 UD6n[        R                  " U40 UD6nU " X4S9s $ f = f)NzLoading a tokenizer inside a   from a config that does not include a `tokenizer_class` attribute is deprecated and will be removed in v5. Please add `'tokenizer_class': 'Wav2Vec2CTCTokenizer'` attribute to either your `config.json` or `tokenizer_config.json` file to suppress this warning: )r$   r(   )
r"   Úfrom_pretrainedÚOSErrorÚ
ValueErrorÚwarningsÚwarnr   ÚFutureWarningr   r   )ÚclsÚpretrained_model_name_or_pathÚkwargsr$   r(   r)   s        €r   r,   Ú!Wav2Vec2Processor.from_pretrained8   s˜   ø€ ð	QÜ‘7Ò*Ð+HÑSÈFÑSÐSøÜœÐ$ó 	QÜMŠMØ-¨c¯l©l¨^ð <2ð 2ô
 ôô !9× HÒ HÐIfÑ qÐjpÑ qÐÜ,×<Ò<Ð=ZÑeÐ^dÑeˆIáÐ):ÑPÒPð	Qús   ƒ ”A-BÂBÚaudioÚtextr4   c                 óÆ  • SU;   a'  [         R                  " S5        UR                  S5      nUc  Uc  [        S5      eU R                  " [
        4SU R                  R                  0UD6nU R                  (       a   U R                  " U40 US   DUS   DUS   D6$ Ub  U R                  " U40 US   D6nUb  U R                  " U40 US   D6nUc  W$ Uc  W$ WS   WS	'   U$ )
aÚ  
This method forwards all arguments to [`Wav2Vec2FeatureExtractor.__call__`] and/or
[`PreTrainedTokenizer.__call__`] depending on the input modality and returns their outputs. If both modalities are passed, [`Wav2Vec2FeatureExtractor.__call__`] and [`PreTrainedTokenizer.__call__`] are called.

Args:
    audio (`np.ndarray`, `torch.Tensor`, `List[np.ndarray]`, `List[torch.Tensor]`, *optional*):
        An audio input is passed to [`Wav2Vec2FeatureExtractor.__call__`].
    text (`str`, `List[str]`, *optional*):
        A text input is passed to [`PreTrainedTokenizer.__call__`].


Returns:
    This method returns the results of each `call` method. If both are used, the output is a dictionary containing the results of both.
Ú
raw_speechzLUsing `raw_speech` as a keyword argument is deprecated. Use `audio` instead.zAYou need to specify either an `audio` or `text` input to process.Útokenizer_init_kwargsÚaudio_kwargsÚtext_kwargsÚcommon_kwargsÚ	input_idsÚlabels)r/   r0   Úpopr.   Ú_merge_kwargsr   r(   Úinit_kwargsr&   r%   r$   )	r'   r6   r7   ÚimagesÚvideosr4   Úoutput_kwargsÚinputsÚ	encodingss	            r   Ú__call__ÚWav2Vec2Processor.__call__K   s   € ð, ˜6Ó!ÜMŠMÐhÔiØ—J‘J˜|Ó,ˆEà‰=˜T™\ÜÐ`ÓaÐaà×*Ò*Ü#ñ
à"&§.¡.×"<Ñ"<ð
ð ñ
ˆð ×*×*Ø×)Ò)Øñà Ñ/ðð   Ñ.ðð   Ñ0ñ	ð ð ÑØ×+Ò+¨EÑS°]À>Ñ5RÑSˆFØÑØŸš tÑL¨}¸]Ñ/KÑLˆIà‰<ØˆMØ‰]ØÐà(¨Ñ5ˆF8ÑØˆMr   c                 ó„  • U R                   (       a  U R                  R                  " U0 UD6$ UR                  SS5      nUR                  SS5      n[	        U5      S:”  a
  US   nUSS nUb   U R
                  R                  " U/UQ70 UD6nUb  U R                  R                  " U40 UD6nUc  U$ Uc  U$ US   US'   U$ )a  
This method operates on batches of extracted features and/or tokenized text. It forwards all arguments to
[`Wav2Vec2FeatureExtractor.pad`] and/or [`PreTrainedTokenizer.pad`] depending on the input modality and returns their outputs. If both modalities are passed, [`Wav2Vec2FeatureExtractor.pad`] and [`PreTrainedTokenizer.pad`] are called.

Args:
    input_features:
        When the first argument is a dictionary containing a batch of tensors, or the `input_features` argument is present, it is passed to [`Wav2Vec2FeatureExtractor.pad`].
    labels:
        When the `label` argument is present, it is passed to [`PreTrainedTokenizer.pad`].

Returns:
    This method returns the results of each `pad` method. If both are used, the output is a dictionary containing the results of both.
Úinput_featuresNr?   r   r   r>   )r&   r%   Úpadr@   Úlenr$   r(   )r'   Úargsr4   rK   r?   s        r   rL   ÚWav2Vec2Processor.padƒ   sÚ   € ð ×*×*Ø×)Ñ)×-Ò-¨tÐ>°vÑ>Ð>àŸ™Ð$4°dÓ;ˆØ—‘˜H dÓ+ˆÜˆt‹9q‹=Ø! !™WˆNØ˜˜8ˆDàÑ%Ø!×3Ñ3×7Ò7¸ÐXÈÒXÐQWÑXˆNØÑØ—^‘^×'Ò'¨Ñ9°&Ñ9ˆFà‰>Ø!Ð!ØÑ#ØˆMà'-¨kÑ':ˆN˜8Ñ$Ø!Ð!r   c                 ó:   • U R                   R                  " U0 UD6$ )z«
This method forwards all its arguments to PreTrainedTokenizer's [`~PreTrainedTokenizer.batch_decode`]. Please
refer to the docstring of this method for more information.
)r(   Úbatch_decode©r'   rN   r4   s      r   rQ   ÚWav2Vec2Processor.batch_decode¨   s   € ð
 ~‰~×*Ò*¨DÐ;°FÑ;Ð;r   c                 ó:   • U R                   R                  " U0 UD6$ )z¥
This method forwards all its arguments to PreTrainedTokenizer's [`~PreTrainedTokenizer.decode`]. Please refer
to the docstring of this method for more information.
)r(   ÚdecoderR   s      r   rU   ÚWav2Vec2Processor.decode¯   s   € ð
 ~‰~×$Ò$ dÐ5¨fÑ5Ð5r   c              #   ó    #   • [         R                  " S5        SU l        U R                  U l        Sv •  U R
                  U l        SU l        g7f)zt
Temporarily sets the tokenizer for processing the input. Useful for encoding the labels when fine-tuning
Wav2Vec2.
zî`as_target_processor` is deprecated and will be removed in v5 of Transformers. You can process your labels by using the argument `text` of the regular `__call__` method (either in the same call as your audio inputs, or in a separate call.TNF)r/   r0   r&   r(   r%   r$   )r'   s    r   Úas_target_processorÚ%Wav2Vec2Processor.as_target_processor¶   sG   é € ô 	Šð8ô	
ð
 +/ˆÔ'Ø!%§¡ˆÔÛØ!%×!7Ñ!7ˆÔØ*/ˆÕ'ùs   ‚AA)r&   r%   )NNNN)r   r   r   r   Ú__doc__Úfeature_extractor_classÚtokenizer_classr#   Úclassmethodr,   r
   r   r   ÚstrÚlistr   r   r	   r   rH   rL   rQ   rU   r   rX   r   Ú__classcell__)r)   s   @r   r   r   !   s¢   ø† ñð 9ÐØ%€Oõ0ð
 ôQó ðQð( !ØNRØØñ6àð6ð u˜S $ s¡)¨YÐ8IÐIÑJÑKð6ð Ð0Ñ1õ6òp#"òJ<ò6ð ñ0ó ö0r   r   )rZ   r/   Ú
contextlibr   Útypingr   r   Úprocessing_utilsr   r   r	   Útokenization_utils_baser
   r   r   Úfeature_extraction_wav2vec2r   Útokenization_wav2vec2r   r   r   Ú__all__r   r   r   Ú<module>rh      sO   ðñó Ý %ß "ç HÑ Hß OÑ OÝ AÝ 7ôÐ.°eò ôd0˜ô d0ðN Ð
r   