
    <h\!                         S r SSKrSSKJr  SSKJrJr  SSKJrJ	r	J
r
  SSKJrJrJr  SS	KJr  SS
KJr   " S S\SS9r " S S\	5      rS/rg)z%
Speech processor class for Wav2Vec2
    N)contextmanager)OptionalUnion   )ProcessingKwargsProcessorMixinUnpack)
AudioInputPreTokenizedInput	TextInput   )Wav2Vec2FeatureExtractor)Wav2Vec2CTCTokenizerc                       \ rS rSr0 rSrg)Wav2Vec2ProcessorKwargs    N)__name__
__module____qualname____firstlineno__	_defaults__static_attributes__r       h/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/wav2vec2/processing_wav2vec2.pyr   r      s    Ir   r   F)totalc            
          ^  \ rS rSrSrSrSrU 4S jr\U 4S j5       r	    SS\
S\\\\\   \\4      S	\\   4S
 jjrS rS rS r\S 5       rSrU =r$ )Wav2Vec2Processor!   aw  
Constructs a Wav2Vec2 processor which wraps a Wav2Vec2 feature extractor and a Wav2Vec2 CTC tokenizer into a single
processor.

[`Wav2Vec2Processor`] offers all the functionalities of [`Wav2Vec2FeatureExtractor`] and [`PreTrainedTokenizer`].
See the docstring of [`~Wav2Vec2Processor.__call__`] and [`~Wav2Vec2Processor.decode`] for more information.

Args:
    feature_extractor (`Wav2Vec2FeatureExtractor`):
        An instance of [`Wav2Vec2FeatureExtractor`]. The feature extractor is a required input.
    tokenizer ([`PreTrainedTokenizer`]):
        An instance of [`PreTrainedTokenizer`]. The tokenizer is a required input.
r   AutoTokenizerc                 T   > [         TU ]  X5        U R                  U l        SU l        g )NF)super__init__feature_extractorcurrent_processor_in_target_context_manager)selfr$   	tokenizer	__class__s      r   r#   Wav2Vec2Processor.__init__3   s'    *6!%!7!7*/'r   c                   >  [         TU ]  " U40 UD6$ ! [        [        4 a`    [        R
                  " SU R                   S3[        5        [        R                  " U40 UD6n[        R                  " U40 UD6nU " X4S9s $ f = f)NzLoading a tokenizer inside a   from a config that does not include a `tokenizer_class` attribute is deprecated and will be removed in v5. Please add `'tokenizer_class': 'Wav2Vec2CTCTokenizer'` attribute to either your `config.json` or `tokenizer_config.json` file to suppress this warning: )r$   r(   )
r"   from_pretrainedOSError
ValueErrorwarningswarnr   FutureWarningr   r   )clspretrained_model_name_or_pathkwargsr$   r(   r)   s        r   r,   !Wav2Vec2Processor.from_pretrained8   s    	Q7*+HSFSS$ 	QMM-cll^ <2 2
  !9 H HIf qjp q,<<=Ze^deI):PP	Qs    A-BBaudiotextr4   c                    SU;   a'  [         R                  " S5        UR                  S5      nUc  Uc  [        S5      eU R                  " [
        4SU R                  R                  0UD6nU R                  (       a   U R                  " U40 US   DUS   DUS   D6$ Ub  U R                  " U40 US   D6nUb  U R                  " U40 US   D6nUc  W$ Uc  W$ WS   WS	'   U$ )
a  
This method forwards all arguments to [`Wav2Vec2FeatureExtractor.__call__`] and/or
[`PreTrainedTokenizer.__call__`] depending on the input modality and returns their outputs. If both modalities are passed, [`Wav2Vec2FeatureExtractor.__call__`] and [`PreTrainedTokenizer.__call__`] are called.

Args:
    audio (`np.ndarray`, `torch.Tensor`, `List[np.ndarray]`, `List[torch.Tensor]`, *optional*):
        An audio input is passed to [`Wav2Vec2FeatureExtractor.__call__`].
    text (`str`, `List[str]`, *optional*):
        A text input is passed to [`PreTrainedTokenizer.__call__`].


Returns:
    This method returns the results of each `call` method. If both are used, the output is a dictionary containing the results of both.

raw_speechzLUsing `raw_speech` as a keyword argument is deprecated. Use `audio` instead.zAYou need to specify either an `audio` or `text` input to process.tokenizer_init_kwargsaudio_kwargstext_kwargscommon_kwargs	input_idslabels)r/   r0   popr.   _merge_kwargsr   r(   init_kwargsr&   r%   r$   )	r'   r6   r7   imagesvideosr4   output_kwargsinputs	encodingss	            r   __call__Wav2Vec2Processor.__call__K   s    , 6!MMhiJJ|,E=T\`aa**#
"&.."<"<
 
 **))/  .  0	  ++ES]>5RSFtL}]/KLI<M](5F8Mr   c                    U R                   (       a  U R                  R                  " U0 UD6$ UR                  SS5      nUR                  SS5      n[	        U5      S:  a
  US   nUSS nUb   U R
                  R                  " U/UQ70 UD6nUb  U R                  R                  " U40 UD6nUc  U$ Uc  U$ US   US'   U$ )a  
This method operates on batches of extracted features and/or tokenized text. It forwards all arguments to
[`Wav2Vec2FeatureExtractor.pad`] and/or [`PreTrainedTokenizer.pad`] depending on the input modality and returns their outputs. If both modalities are passed, [`Wav2Vec2FeatureExtractor.pad`] and [`PreTrainedTokenizer.pad`] are called.

Args:
    input_features:
        When the first argument is a dictionary containing a batch of tensors, or the `input_features` argument is present, it is passed to [`Wav2Vec2FeatureExtractor.pad`].
    labels:
        When the `label` argument is present, it is passed to [`PreTrainedTokenizer.pad`].

Returns:
    This method returns the results of each `pad` method. If both are used, the output is a dictionary containing the results of both.
input_featuresNr?   r   r   r>   )r&   r%   padr@   lenr$   r(   )r'   argsr4   rK   r?   s        r   rL   Wav2Vec2Processor.pad   s     **))--t>v>>$4d;Hd+t9q=!!WN8D%!3377XXQWXN^^''9&9F>!!#M'-k':N8$!!r   c                 :    U R                   R                  " U0 UD6$ )z
This method forwards all its arguments to PreTrainedTokenizer's [`~PreTrainedTokenizer.batch_decode`]. Please
refer to the docstring of this method for more information.
)r(   batch_decoder'   rN   r4   s      r   rQ   Wav2Vec2Processor.batch_decode   s    
 ~~**D;F;;r   c                 :    U R                   R                  " U0 UD6$ )z
This method forwards all its arguments to PreTrainedTokenizer's [`~PreTrainedTokenizer.decode`]. Please refer
to the docstring of this method for more information.
)r(   decoderR   s      r   rU   Wav2Vec2Processor.decode   s    
 ~~$$d5f55r   c              #      #    [         R                  " S5        SU l        U R                  U l        Sv   U R
                  U l        SU l        g7f)zt
Temporarily sets the tokenizer for processing the input. Useful for encoding the labels when fine-tuning
Wav2Vec2.
z`as_target_processor` is deprecated and will be removed in v5 of Transformers. You can process your labels by using the argument `text` of the regular `__call__` method (either in the same call as your audio inputs, or in a separate call.TNF)r/   r0   r&   r(   r%   r$   )r'   s    r   as_target_processor%Wav2Vec2Processor.as_target_processor   sG      	8	

 +/'!%!%!7!7*/'s   AA)r&   r%   )NNNN)r   r   r   r   __doc__feature_extractor_classtokenizer_classr#   classmethodr,   r
   r   r   strlistr   r   r	   r   rH   rL   rQ   rU   r   rX   r   __classcell__)r)   s   @r   r   r   !   s     9%O0
 Q Q( !NR66 uS$s)Y8IIJK6 016p#"J<6 0 0r   r   )rZ   r/   
contextlibr   typingr   r   processing_utilsr   r   r	   tokenization_utils_baser
   r   r   feature_extraction_wav2vec2r   tokenization_wav2vec2r   r   r   __all__r   r   r   <module>rh      sO     % " H H O O A 7.e d0 d0N 
r   