o
    
sh!                     @   s   d dl mZmZmZmZ ddlmZmZmZm	Z	m
Z
 ddlmZmZ e r+ddlmZ e r:d dlZddlmZmZ erBd d	lmZ e	eZeed
dG dd deZdS )    )TYPE_CHECKINGAnyUnionoverload   )add_end_docstringsis_torch_availableis_vision_availableloggingrequires_backends   )Pipelinebuild_pipeline_init_args)
load_imageN)(MODEL_FOR_OBJECT_DETECTION_MAPPING_NAMES,MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES)ImageT)has_image_processorc                       s  e Zd ZdZdZdZdZdZ fddZdd Z	e
d	eed
f dededeeeef  fddZe
d	eee ed
 f dededeeeeef   fddZdeeeeef  eeeeef   f f fddZdddZdd ZdddZdddeeef fddZ  ZS )ObjectDetectionPipelinea  
    Object detection pipeline using any `AutoModelForObjectDetection`. This pipeline predicts bounding boxes of objects
    and their classes.

    Example:

    ```python
    >>> from transformers import pipeline

    >>> detector = pipeline(model="facebook/detr-resnet-50")
    >>> detector("https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png")
    [{'score': 0.997, 'label': 'bird', 'box': {'xmin': 69, 'ymin': 171, 'xmax': 396, 'ymax': 507}}, {'score': 0.999, 'label': 'bird', 'box': {'xmin': 398, 'ymin': 105, 'xmax': 767, 'ymax': 507}}]

    >>> # x, y  are expressed relative to the top left hand corner.
    ```

    Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)

    This object detection pipeline can currently be loaded from [`pipeline`] using the following task identifier:
    `"object-detection"`.

    See the list of available models on [huggingface.co/models](https://huggingface.co/models?filter=object-detection).
    FTNc                    sX   t  j|i | | jdkrtd| j dt| d t }|t	 | 
| d S )NtfzThe z is only available in PyTorch.vision)super__init__	framework
ValueError	__class__r   r   copyupdater   check_model_type)selfargskwargsmappingr    e/var/www/html/alpaca_bot/venv/lib/python3.10/site-packages/transformers/pipelines/object_detection.pyr   8   s   


z ObjectDetectionPipeline.__init__c                 K   s:   i }d|v r|d |d< i }d|v r|d |d< |i |fS )Ntimeout	thresholdr$   )r   r!   preprocess_paramspostprocess_kwargsr$   r$   r%   _sanitize_parametersC   s   
z,ObjectDetectionPipeline._sanitize_parametersimagezImage.Imager    r!   returnc                 O      d S Nr$   r   r+   r    r!   r$   r$   r%   __call__L   s   z ObjectDetectionPipeline.__call__c                 O   r-   r.   r$   r/   r$   r$   r%   r0   O   s   c                    s0   d|v rd|vr| d|d< t j|i |S )ai  
        Detect objects (bounding boxes & classes) in the image(s) passed as inputs.

        Args:
            inputs (`str`, `list[str]`, `PIL.Image` or `list[PIL.Image]`):
                The pipeline handles three types of images:

                - A string containing an HTTP(S) link pointing to an image
                - A string containing a local path to an image
                - An image loaded in PIL directly

                The pipeline accepts either a single image or a batch of images. Images in a batch must all be in the
                same format: all as HTTP(S) links, all as local paths, or all as PIL images.
            threshold (`float`, *optional*, defaults to 0.5):
                The probability necessary to make a prediction.
            timeout (`float`, *optional*, defaults to None):
                The maximum time in seconds to wait for fetching images from the web. If None, no timeout is set and
                the call may block forever.

        Return:
            A list of dictionaries or a list of list of dictionaries containing the result. If the input is a single
            image, will return a list of dictionaries, if the input is a list of several images, will return a list of
            list of dictionaries corresponding to each image.

            The dictionaries contain the following keys:

            - **label** (`str`) -- The class label identified by the model.
            - **score** (`float`) -- The score attributed by the model for that label.
            - **box** (`list[dict[str, int]]`) -- The bounding box of detected object in image's original size.
        imagesinputs)popr   r0   )r   r    r!   r#   r$   r%   r0   T   s    c                 C   st   t ||d}t|j|jgg}| j|gdd}| jdkr#|| j}| j	d ur4| j	|d |d dd}||d< |S )N)r&   pt)r1   return_tensorswordsboxes)textr7   r5   target_size)
r   torch	IntTensorheightwidthimage_processorr   todtype	tokenizer)r   r+   r&   r9   r2   r$   r$   r%   
preprocessx   s   

z"ObjectDetectionPipeline.preprocessc                 C   sF   | d}| jdi |}|d|i|}| jd ur!|d |d< |S )Nr9   bboxr$   )r3   modelr   rA   )r   model_inputsr9   outputsmodel_outputsr$   r$   r%   _forward   s   

z ObjectDetectionPipeline._forward      ?c                    sL  |d }j d urY|d  \  fdd|d djddjdd\}}fdd	| D }fd
d	|d dD }g dfdd	t| ||D }|S j||}	|	d }
|
d }|
d }|
d }| |
d< fdd	|D |
d< fdd	|D |
d< g dfdd	t|
d |
d |
d D }|S )Nr9   r   c              
      sH    t| d  d  | d  d | d  d  | d  d gS )Nr   i  r   r      )_get_bounding_boxr:   Tensor)rC   )r<   r   r=   r$   r%   unnormalize   s   z8ObjectDetectionPipeline.postprocess.<locals>.unnormalizelogits)dimc                    s   g | ]	} j jj| qS r$   )rD   configid2label).0
predictionr   r$   r%   
<listcomp>   s    z7ObjectDetectionPipeline.postprocess.<locals>.<listcomp>c                    s   g | ]} |qS r$   r$   )rS   rC   )rM   r$   r%   rV      s    rC   )scorelabelboxc                    s&   g | ]}|d  krt t |qS )r   dictziprS   vals)keysr'   r$   r%   rV      s   & scoreslabelsr7   c                    s   g | ]} j jj|  qS r$   )rD   rQ   rR   item)rS   rX   rU   r$   r%   rV      s    c                    s   g | ]}  |qS r$   )rK   )rS   rY   rU   r$   r%   rV      s    c                    s   g | ]	}t t |qS r$   rZ   r]   )r_   r$   r%   rV      s    )rA   tolistsqueezesoftmaxmaxr\   r>   post_process_object_detection)r   rG   r'   r9   r`   classesra   r7   
annotationraw_annotationsraw_annotationr$   )r<   r_   r   r'   rM   r=   r%   postprocess   s.   
" 
z#ObjectDetectionPipeline.postprocessrY   ztorch.Tensorc                 C   s8   | j dkr	td|  \}}}}||||d}|S )a%  
        Turns list [xmin, xmax, ymin, ymax] into dict { "xmin": xmin, ... }

        Args:
            box (`torch.Tensor`): Tensor containing the coordinates in corners format.

        Returns:
            bbox (`dict[str, int]`): Dict containing the coordinates in corners format.
        r4   z9The ObjectDetectionPipeline is only available in PyTorch.)xminyminxmaxymax)r   r   intrc   )r   rY   rm   rn   ro   rp   rC   r$   r$   r%   rK      s   

z)ObjectDetectionPipeline._get_bounding_boxr.   )rI   )__name__
__module____qualname____doc___load_processor_load_image_processor_load_feature_extractor_load_tokenizerr   r*   r   r   strr   listr[   r0   rB   rH   rl   rq   rK   __classcell__r$   r$   r#   r%   r      s2    	06
$
"-r   )typingr   r   r   r   utilsr   r   r	   r
   r   baser   r   image_utilsr   r:   models.auto.modeling_autor   r   PILr   
get_loggerrr   loggerr   r$   r$   r$   r%   <module>   s    
