
    Cht                    b    S SK Jr  S SKJr  S SKrS SKJr  S SKJr  SSKJrJ	r	   " S S	\5      r
g)
    )annotations)IterableN)Tensor)SentenceTransformer   )BatchHardTripletLoss$BatchHardTripletLossDistanceFunctionc                  t   ^  \ rS rSr\R
                  4   SU 4S jjjrSS jrS	S jr\	S
S j5       r
SrU =r$ )BatchHardSoftMarginTripletLoss   c                <   > [         TU ]  U5        Xl        X l        g)a  
BatchHardSoftMarginTripletLoss takes a batch with (sentence, label) pairs and computes the loss for all possible, valid
triplets, i.e., anchor and positive must have the same label, anchor and negative a different label. The labels
must be integers, with same label indicating sentences from the same class. Your train dataset
must contain at least 2 examples per label class. This soft-margin variant does not require setting a margin.

Args:
    model: SentenceTransformer model
    distance_metric: Function that returns a distance between
        two embeddings. The class SiameseDistanceMetric contains
        pre-defined metrics that can be used.

Definitions:
    :Easy triplets: Triplets which have a loss of 0 because
        ``distance(anchor, positive) + margin < distance(anchor, negative)``.
    :Hard triplets: Triplets where the negative is closer to the anchor than the positive, i.e.,
        ``distance(anchor, negative) < distance(anchor, positive)``.
    :Semi-hard triplets: Triplets where the negative is not closer to the anchor than the positive, but which
        still have a positive loss, i.e., ``distance(anchor, positive) < distance(anchor, negative) + margin``.

References:
    * Source: https://github.com/NegatioN/OnlineMiningTripletLoss/blob/master/online_triplet_loss/losses.py
    * Paper: In Defense of the Triplet Loss for Person Re-Identification, https://arxiv.org/abs/1703.07737
    * Blog post: https://omoindrot.github.io/triplet-loss

Requirements:
    1. Each sentence must be labeled with a class.
    2. Your dataset must contain at least 2 examples per labels class.
    3. Your dataset should contain hard positives and negatives.

Inputs:
    +------------------+--------+
    | Texts            | Labels |
    +==================+========+
    | single sentences | class  |
    +------------------+--------+

Recommendations:
    - Use ``BatchSamplers.GROUP_BY_LABEL`` (:class:`docs <sentence_transformers.training_args.BatchSamplers>`) to
      ensure that each batch contains 2+ examples per label class.

Relations:
    * :class:`BatchHardTripletLoss` uses a user-specified margin, while this loss does not require setting a margin.

Example:
    ::

        from sentence_transformers import SentenceTransformer, SentenceTransformerTrainer, losses
        from datasets import Dataset

        model = SentenceTransformer("microsoft/mpnet-base")
        # E.g. 0: sports, 1: economy, 2: politics
        train_dataset = Dataset.from_dict({
            "sentence": [
                "He played a great game.",
                "The stock is up 20%",
                "They won 2-1.",
                "The last goal was amazing.",
                "They all voted against the bill.",
            ],
            "label": [0, 1, 0, 0, 2],
        })
        loss = losses.BatchHardSoftMarginTripletLoss(model)

        trainer = SentenceTransformerTrainer(
            model=model,
            train_dataset=train_dataset,
            loss=loss,
        )
        trainer.train()
N)super__init__sentence_embedderdistance_metric)selfmodelr   	__class__s      s/var/www/html/shao/venv/lib/python3.13/site-packages/sentence_transformers/losses/BatchHardSoftMarginTripletLoss.pyr   'BatchHardSoftMarginTripletLoss.__init__   s    T 	!&.    c                R    U R                  US   5      S   nU R                  X#5      $ )Nr   sentence_embedding)r   #batch_hard_triplet_soft_margin_loss)r   sentence_featureslabelsreps       r   forward&BatchHardSoftMarginTripletLoss.forward\   s/    $$%6q%9:;OP77DDr   c                   U R                  U5      n[        R                  " U5      R                  5       nXC-  nUR	                  SSS9u  pg[        R
                  " U5      R                  5       nUR	                  SSS9u  pX9SU-
  -  -   n
U
R                  SSS9u  p[        R                  " [        R                  " Xk-
  5      5      nUR                  5       nU$ )a  Build the triplet loss over a batch of embeddings.
For each anchor, we get the hardest positive and hardest negative to form a triplet.
Args:
    labels: labels of the batch, of size (batch_size,)
    embeddings: tensor of shape (batch_size, embed_dim)
    squared: Boolean. If true, output is the pairwise squared euclidean distance matrix.
             If false, output is the pairwise euclidean distance matrix.
Returns:
    Label_Sentence_Triplet: scalar tensor containing the triplet loss
r   T)keepdimg      ?)r   r    get_anchor_positive_triplet_maskfloatmax get_anchor_negative_triplet_maskmintorchlog1pexpmean)r   r   
embeddingspairwise_distmask_anchor_positiveanchor_positive_disthardest_positive_dist_mask_anchor_negativemax_anchor_negative_distanchor_negative_disthardest_negative_disttltriplet_losss                 r   r   BBatchHardSoftMarginTripletLoss.batch_hard_triplet_soft_margin_lossb   s     ,,Z8  4TTU[\bbd  4C $8#;#;At#;#L   4TTU[\bbd '4&7&74&7&H# ,3QeKe/ff $8#;#;At#;#L 
 [[#8#PQRwwyr   c                    g)Na  
@misc{hermans2017defense,
    title={In Defense of the Triplet Loss for Person Re-Identification},
    author={Alexander Hermans and Lucas Beyer and Bastian Leibe},
    year={2017},
    eprint={1703.07737},
    archivePrefix={arXiv},
    primaryClass={cs.CV}
}
 )r   s    r   citation'BatchHardSoftMarginTripletLoss.citation   s    	r   )r   r   )r   r   returnNone)r   zIterable[dict[str, Tensor]]r   r   r<   r   )r   r   r+   r   r<   r   )r<   str)__name__
__module____qualname____firstlineno__r	   eucledian_distancer   r   r   propertyr:   __static_attributes____classcell__)r   s   @r   r   r      sK    :^:q:qL/(L/	L/ L/\E)V 
 
r   r   )
__future__r   collections.abcr   r'   r   )sentence_transformers.SentenceTransformerr   r   r	   r   r9   r   r   <module>rJ      s&    " $   I \K%9 Kr   