o
    sÒhŠ  ã                   @   sR   d dl mZmZ d dlmZmZ d dlmZ ddlmZm	Z	 G dd„ dej
ƒZdS )	é    )ÚDictÚIterable)ÚTensorÚnn)ÚSentenceTransformeré   )ÚBatchHardTripletLossÚ$BatchHardTripletLossDistanceFunctionc                       s|   e Zd Zejdfdededdf‡ fdd„Zdee	e
ef  d	edefd
d„Zd	ededefdd„Zede
fdd„ƒZ‡  ZS )ÚBatchAllTripletLossé   ÚmodelÚmarginÚreturnNc                    s$   t t| ƒ ¡  || _|| _|| _dS )aÇ  
        BatchAllTripletLoss takes a batch with (sentence, label) pairs and computes the loss for all possible, valid
        triplets, i.e., anchor and positive must have the same label, anchor and negative a different label. The labels
        must be integers, with same label indicating sentences from the same class. Your train dataset
        must contain at least 2 examples per label class.

        Args:
            model: SentenceTransformer model
            distance_metric: Function that returns a distance between
                two embeddings. The class SiameseDistanceMetric contains
                pre-defined metrics that can be used.
            margin: Negative samples should be at least margin further
                apart from the anchor than the positive.

        References:
            * Source: https://github.com/NegatioN/OnlineMiningTripletLoss/blob/master/online_triplet_loss/losses.py
            * Paper: In Defense of the Triplet Loss for Person Re-Identification, https://arxiv.org/abs/1703.07737
            * Blog post: https://omoindrot.github.io/triplet-loss

        Requirements:
            1. Each sentence must be labeled with a class.
            2. Your dataset must contain at least 2 examples per labels class.

        Relations:
            * :class:`BatchHardTripletLoss` uses only the hardest positive and negative samples, rather than all possible, valid triplets.
            * :class:`BatchHardSoftMarginTripletLoss` uses only the hardest positive and negative samples, rather than all possible, valid triplets.
              Also, it does not require setting a margin.
            * :class:`BatchSemiHardTripletLoss` uses only semi-hard triplets, valid triplets, rather than all possible, valid triplets.

        Inputs:
            +------------------+--------+
            | Texts            | Labels |
            +==================+========+
            | single sentences | class  |
            +------------------+--------+

        Example:
            ::

                from sentence_transformers import SentenceTransformer, SentenceTransformerTrainer, losses
                from datasets import Dataset

                model = SentenceTransformer("microsoft/mpnet-base")
                # E.g. 0: sports, 1: economy, 2: politics
                train_dataset = Dataset.from_dict({
                    "sentence": [
                        "He played a great game.",
                        "The stock is up 20%",
                        "They won 2-1.",
                        "The last goal was amazing.",
                        "They all voted against the bill.",
                    ],
                    "label": [0, 1, 0, 0, 2],
                })
                loss = losses.BatchAllTripletLoss(model)

                trainer = SentenceTransformerTrainer(
                    model=model,
                    train_dataset=train_dataset,
                    loss=loss,
                )
                trainer.train()

        N)Úsuperr
   Ú__init__Úsentence_embedderÚtriplet_marginÚdistance_metric)Úselfr   r   r   ©Ú	__class__© ún/var/www/html/alpaca_bot/venv/lib/python3.10/site-packages/sentence_transformers/losses/BatchAllTripletLoss.pyr      s   F
zBatchAllTripletLoss.__init__Úsentence_featuresÚlabelsc                 C   s   |   |d ¡d }|  ||¡S )Nr   Úsentence_embedding)r   Úbatch_all_triplet_loss)r   r   r   Úrepr   r   r   ÚforwardV   s   zBatchAllTripletLoss.forwardÚ
embeddingsc           
      C   sx   |   |¡}| d¡}| d¡}|| | j }t |¡}| ¡ | }d||dk < ||dk }| d¡}	| ¡ |	d  }|S )a]  Build the triplet loss over a batch of embeddings.
        We generate all the valid triplets and average the loss over the positive ones.
        Args:
            labels: labels of the batch, of size (batch_size,)
            embeddings: tensor of shape (batch_size, embed_dim)
            margin: margin for triplet loss
            squared: Boolean. If true, output is the pairwise squared euclidean distance matrix.
                     If false, output is the pairwise euclidean distance matrix.
        Returns:
            Label_Sentence_Triplet: scalar tensor containing the triplet loss
        é   r   r   g¼‰Ø—²Òœ<)r   Ú	unsqueezer   r   Úget_triplet_maskÚfloatÚsizeÚsum)
r   r   r   Úpairwise_distÚanchor_positive_distÚanchor_negative_distÚtriplet_lossÚmaskÚvalid_tripletsÚnum_positive_tripletsr   r   r   r   Z   s   




z*BatchAllTripletLoss.batch_all_triplet_lossc                 C   s   dS )Na  
@misc{hermans2017defense,
    title={In Defense of the Triplet Loss for Person Re-Identification}, 
    author={Alexander Hermans and Lucas Beyer and Bastian Leibe},
    year={2017},
    eprint={1703.07737},
    archivePrefix={arXiv},
    primaryClass={cs.CV}
}
r   )r   r   r   r   Úcitation…   s   zBatchAllTripletLoss.citation)Ú__name__Ú
__module__Ú__qualname__r	   Úeucledian_distancer   r#   r   r   r   Ústrr   r   r   Úpropertyr-   Ú__classcell__r   r   r   r   r
   
   s    üþüû"K+r
   N)Útypingr   r   Útorchr   r   Ú)sentence_transformers.SentenceTransformerr   r   r	   ÚModuler
   r   r   r   r   Ú<module>   s
    