o
    sh                     @   sN   d dl mZmZ d dlZd dlZd dlmZ d dlmZ G dd dejZ	dS )    )DictUnionN)Image)nnc                       s   e Zd Zddeddf fddZdefddZd	eeejf deeejf fd
dZ	dde
eef deeejf fddZedejfddZdeddfddZededd fddZ  ZS )	CLIPModelopenai/clip-vit-base-patch32N
model_namereturnc                    s:   t t|   |d u r|}tj|| _tj|| _d S N)superr   __init__transformersfrom_pretrainedmodelCLIPProcessor	processor)selfr   processor_name	__class__ d/var/www/html/alpaca_bot/venv/lib/python3.10/site-packages/sentence_transformers/models/CLIPModel.pyr   
   s
   zCLIPModel.__init__c                 C   s   dS )NzCLIPModel()r   r   r   r   r   __repr__   s   zCLIPModel.__repr__featuresc              	   C   s   g }g }d|v r| j j|d d}| j |d }d|v rC| j j|d|dd |dd |dd |dd d	}| j |d }g }t|}t|}t|d
 D ]\}	}
|
dkrc|t	| qS|t	| qSt
| |d< |S )Npixel_values)r      	input_idsattention_maskposition_idsoutput_attentionsoutput_hidden_states)r   r   r   r    r!   image_text_infor   sentence_embedding)r   vision_modelvisual_projection
text_modelgettext_projectioniter	enumerateappendnexttorchstackfloat)r   r   image_embedstext_embedsvision_outputstext_outputsr#   image_featurestext_featuresidx
input_typer   r   r   forward   s.   



zCLIPModel.forwardTpaddingc           
      C   s   g }g }g }t |D ]\}}t|tjr|| |d q
|| |d q
i }t|r9| jj|d|d}t|rJ| jj|dd}	|	j|d< ||d< t	|S )Nr   r   pt)return_tensorsr9   )r;   r   r"   )
r*   
isinstancer   r+   lenr   	tokenizerimage_processorr   dict)
r   textsr9   imagestexts_valuesr"   r6   dataencodingr4   r   r   r   tokenize6   s"   


zCLIPModel.tokenizec                 C   s   | j S r
   )r   r   r   r   r   r>   N   s   zCLIPModel.tokenizeroutput_pathc                 C   s   | j | | j| d S r
   )r   save_pretrainedr   )r   rG   r   r   r   saveR   s   zCLIPModel.save
input_pathc                 C   s
   t | dS )N)r   )r   )rJ   r   r   r   loadV   s   
zCLIPModel.load)r   N)T)__name__
__module____qualname__strr   r   r   r-   Tensorr8   r   boolrF   propertyr   r   r>   rI   staticmethodrK   __classcell__r   r   r   r   r   	   s    	&& r   )
typingr   r   r-   r   PILr   r   Moduler   r   r   r   r   <module>   s    