o
    
sh+                     @   s   d dl Z d dlmZmZ d dlZd dlZddlmZm	Z	m
Z
mZmZ ddlmZmZ e	 r3ddlmZ eeZded	ed
ejfddZeeddG dd deZdS )    N)AnyUnion   )add_end_docstringsis_torch_availableis_torchaudio_availableis_torchcodec_availablelogging   )Pipelinebuild_pipeline_init_args),MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMESbpayloadsampling_ratereturnc           
      C   s   | }d}d}dddd|d|d|d	d
ddg}zt j|t jt jd}W n ty-   tdw || }|d }t|tj}	|	j	d dkrItd|	S )z?
    Helper function to read an audio file through ffmpeg.
    1f32leffmpegz-izpipe:0z-acz-arz-fz-hide_bannerz	-loglevelquietzpipe:1)stdinstdoutzFffmpeg was not found but is required to load audio files from filenamer   zMalformed soundfile)

subprocessPopenPIPEFileNotFoundError
ValueErrorcommunicatenp
frombufferfloat32shape)
r   r   aracformat_for_conversionffmpeg_commandffmpeg_processoutput_stream	out_bytesaudio r)   i/var/www/html/alpaca_bot/venv/lib/python3.10/site-packages/transformers/pipelines/audio_classification.pyffmpeg_read   s8   
r+   T)has_feature_extractorc                	       s   e Zd ZdZdZdZdZdZ fddZde	e
jeeef dedeeeef  f fd	d
ZdddZdd Zdd ZdddZ  ZS )AudioClassificationPipelinea  
    Audio classification pipeline using any `AutoModelForAudioClassification`. This pipeline predicts the class of a
    raw waveform or an audio file. In case of an audio file, ffmpeg should be installed to support multiple audio
    formats.

    Example:

    ```python
    >>> from transformers import pipeline

    >>> classifier = pipeline(model="superb/wav2vec2-base-superb-ks")
    >>> classifier("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/1.flac")
    [{'score': 0.997, 'label': '_unknown_'}, {'score': 0.002, 'label': 'left'}, {'score': 0.0, 'label': 'yes'}, {'score': 0.0, 'label': 'down'}, {'score': 0.0, 'label': 'stop'}]
    ```

    Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)


    This pipeline can currently be loaded from [`pipeline`] using the following task identifier:
    `"audio-classification"`.

    See the list of available models on
    [huggingface.co/models](https://huggingface.co/models?filter=audio-classification).
    FTc                    sj   d|v r|d d u rd |d< nd|vrd|d< t  j|i | | jdkr.td| j d| t d S )Ntop_k   ptzThe z is only available in PyTorch.)super__init__	frameworkr   	__class__check_model_typer   )selfargskwargsr4   r)   r*   r2   b   s   

z$AudioClassificationPipeline.__init__inputsr8   r   c                    s   t  j|fi |S )a  
        Classify the sequence(s) given as inputs. See the [`AutomaticSpeechRecognitionPipeline`] documentation for more
        information.

        Args:
            inputs (`np.ndarray` or `bytes` or `str` or `dict`):
                The inputs is either :
                    - `str` that is the filename of the audio file, the file will be read at the correct sampling rate
                      to get the waveform using *ffmpeg*. This requires *ffmpeg* to be installed on the system.
                    - `bytes` it is supposed to be the content of an audio file and is interpreted by *ffmpeg* in the
                      same way.
                    - (`np.ndarray` of shape (n, ) of type `np.float32` or `np.float64`)
                        Raw audio at the correct sampling rate (no further check will be done)
                    - `dict` form can be used to pass raw audio sampled at arbitrary `sampling_rate` and let this
                      pipeline do the resampling. The dict must be either be in the format `{"sampling_rate": int,
                      "raw": np.array}`, or `{"sampling_rate": int, "array": np.array}`, where the key `"raw"` or
                      `"array"` is used to denote the raw audio waveform.
            top_k (`int`, *optional*, defaults to None):
                The number of top labels that will be returned by the pipeline. If the provided number is `None` or
                higher than the number of labels available in the model configuration, it will default to the number of
                labels.
            function_to_apply(`str`, *optional*, defaults to "softmax"):
                The function to apply to the model output. By default, the pipeline will apply the softmax function to
                the output of the model. Valid options: ["softmax", "sigmoid", "none"]. Note that passing Python's
                built-in `None` will default to "softmax", so you need to pass the string "none" to disable any
                post-processing.

        Return:
            A list of `dict` with the following keys:

            - **label** (`str`) -- The label predicted.
            - **score** (`float`) -- The corresponding probability.
        )r1   __call__)r6   r:   r8   r9   r)   r*   r;   o   s   "z$AudioClassificationPipeline.__call__Nc                 K   sx   i }|d u r| j jj|d< n|| j jjkr| j jj}||d< |d ur3|dvr.td| d||d< nd|d< i i |fS )Nr.   )softmaxsigmoidnonez'Invalid value for `function_to_apply`: z2. Valid options are ['softmax', 'sigmoid', 'none']function_to_applyr<   )modelconfig
num_labelsr   )r6   r.   r?   r8   postprocess_paramsr)   r)   r*   _sanitize_parameters   s   



z0AudioClassificationPipeline._sanitize_parametersc                 C   s  t |tr/|ds|drt|j}nt|d}| }W d    n1 s*w   Y  t |tr;t	|| j
j}t rNdd l}t ||jrN|  }t rmdd l}dd l}t ||jjrm| }|j}||jd}t |tr| }d|v rd|v sd|v std	|dd }|d u r|d
d  |dd }|d}|}|| j
jkrdd l}t rddlm}	 ntd|	 t |t!j"r|#|n||| j
j }t |t!j"st$dt%|j&dkrtd| j
|| j
jdd}
| j'd ur|
j(| j'd}
|
S )Nzhttp://zhttps://rbr   )arrayr   r   rawrF   zWhen passing a dictionary to AudioClassificationPipeline, the dict needs to contain a "raw" key containing the numpy array or torch tensor representing the audio and a "sampling_rate" key, containing the sampling_rate associated with that arraypath)
functionalztorchaudio is required to resample audio samples in AudioClassificationPipeline. The torchaudio package can be installed through: `pip install torchaudio`.z2We expect a numpy ndarray or torch tensor as inputr
   zFWe expect a single channel audio input for AudioClassificationPipeliner0   )r   return_tensors)dtype))
isinstancestr
startswithrequestsgetcontentopenreadbytesr+   feature_extractorr   r   torchTensorcpunumpyr   
torchcodecdecodersAudioDecoderget_all_samplesdatasample_ratedictcopyr   popr   
torchaudiorI   ImportErrorresampler   ndarray
from_numpy	TypeErrorlenr    rK   to)r6   r:   frV   rZ   _audio_samples_array_inputsin_sampling_rateF	processedr)   r)   r*   
preprocess   sn   






z&AudioClassificationPipeline.preprocessc                 C   s   | j di |}|S )Nr)   )r@   )r6   model_inputsmodel_outputsr)   r)   r*   _forward   s   z$AudioClassificationPipeline._forwardr/   r<   c                    sv   |dkr|j d d}n|dkr|j d  }n|j d }||\}}| }| } fddt||D }|S )Nr<   r   r=   c                    s$   g | ]\}}| j jj| d qS ))scorelabel)r@   rA   id2label).0rw   _idr6   r)   r*   
<listcomp>  s   $ z;AudioClassificationPipeline.postprocess.<locals>.<listcomp>)logitsr<   r=   topktolistzip)r6   rt   r.   r?   probsscoresidslabelsr)   r|   r*   postprocess   s   
z'AudioClassificationPipeline.postprocess)NN)r/   r<   )__name__
__module____qualname____doc___load_processor_load_image_processor_load_feature_extractor_load_tokenizerr2   r   r   rf   rT   rM   r`   r   listr;   rD   rr   ru   r   __classcell__r)   r)   r9   r*   r-   B   s    4
$Kr-   )r   typingr   r   rY   r   rO   utilsr   r   r   r   r	   baser   r   models.auto.modeling_autor   
get_loggerr   loggerrT   intrf   r+   r-   r)   r)   r)   r*   <module>   s   
$