
    Ch                         S SK Jr  S SKJr  S SKrS SKJs  Jr  S SKJ	r	Jr  S SK
JrJr   " S S\R                  5      rg)    )annotations)IterableN)Tensornn)SentenceTransformerutilc                  v   ^  \ rS rSr    S           SU 4S jjjrS	S jrS	S jr\S
S j5       rSr	U =r
$ )MegaBatchMarginLoss   c                   > [         TU ]  5         Xl        X l        X0l        XPl        U(       a  U R                  U l        gU R                  U l        g)a.
  
Given a large batch (like 500 or more examples) of (anchor_i, positive_i) pairs, find for each pair in the batch
the hardest negative, i.e. find j != i such that cos_sim(anchor_i, positive_j) is maximal. Then create from this a
triplet (anchor_i, positive_i, positive_j) where positive_j serves as the negative for this triplet.

Then train as with the triplet loss.

Args:
    model: SentenceTransformerModel
    positive_margin: Positive margin, cos(anchor, positive)
        should be > positive_margin
    negative_margin: Negative margin, cos(anchor, negative)
        should be < negative_margin
    use_mini_batched_version: As large batch sizes require a lot
        of memory, we can use a mini-batched version. We break
        down the large batch into smaller batches with fewer
        examples.
    mini_batch_size: Size for the mini-batches. Should be a
        divisor for the batch size in your data loader.

References:
    - This loss function was inspired by the ParaNMT paper: https://www.aclweb.org/anthology/P18-1042/

Requirements:
    1. (anchor, positive) pairs
    2. Large batches (500 or more examples)

Inputs:
    +---------------------------------------+--------+
    | Texts                                 | Labels |
    +=======================================+========+
    | (anchor, positive) pairs              | none   |
    +---------------------------------------+--------+

Recommendations:
    - Use ``BatchSamplers.NO_DUPLICATES`` (:class:`docs <sentence_transformers.training_args.BatchSamplers>`) to
      ensure that no in-batch negatives are duplicates of the anchor or positive samples.

Example:
    ::

        from sentence_transformers import SentenceTransformer, SentenceTransformerTrainingArguments, SentenceTransformerTrainer, losses
        from datasets import Dataset

        train_batch_size = 250
        train_mini_batch_size = 32

        model = SentenceTransformer('all-MiniLM-L6-v2')
        train_dataset = Dataset.from_dict({
            "anchor": [f"This is sentence number {i}" for i in range(500)],
            "positive": [f"This is sentence number {i}" for i in range(1, 501)],
        })
        loss = losses.MegaBatchMarginLoss(model=model, mini_batch_size=train_mini_batch_size)

        args = SentenceTransformerTrainingArguments(
            output_dir="output",
            per_device_train_batch_size=train_batch_size,
        )
        trainer = SentenceTransformerTrainer(
            model=model,
            args=args,
            train_dataset=train_dataset,
            loss=loss,
        )
        trainer.train()
N)	super__init__modelpositive_marginnegative_marginmini_batch_sizeforward_mini_batchedforward_non_mini_batchedforward)selfr   r   r   use_mini_batched_versionr   	__class__s         h/var/www/html/shao/venv/lib/python3.13/site-packages/sentence_transformers/losses/MegaBatchMarginLoss.pyr   MegaBatchMarginLoss.__init__   sD    T 	
...4Lt00RVRoRo    c           
     N   Uu  p4[        UR                  5       5      n[        R                  " 5          U R                  R                  5         U R	                  U5      S   R                  5       nU R                  R                  5         S S S 5        [        R                  " [        W5      [        U5      UR                  S9n[        S[        U5      U R                  5       GH	  nXR                  -   n	U R	                  U V
s0 sH	  oX:   X _M     sn
5      S   nU V
s0 sH  o/ _M     nn
[        R                  " 5          [        R                  " X5      nUSXxU	 -  -
  n[        R                  " USS9u  nnS S S 5        W H$  nU H  n
X   R!                  XJ   U   5        M     M&     U H  n
[        R"                  " X   5      X'   M     U R	                  U V
s0 sH	  oXJ   X _M     sn
5      S   nU R	                  U5      S   nUR$                  UR$                  :X  d   eUR$                  UR$                  :X  d   e[&        R(                  " UU5      n[&        R(                  " UU5      n[&        R*                  " U R,                  U-
  5      [&        R*                  " UU R.                  -
  5      -   nUR1                  5       nU	[        W5      :  d  GM  UR3                  5         GM     W$ ! , (       d  f       GNq= fs  sn
f s  sn
f ! , (       d  f       GN= fs  sn
f )Nsentence_embedding)devicer         dim)listkeystorchno_gradr   evaldetachtraineyelenr   ranger   r   pytorch_cos_simmaxappendstackshapeFcosine_similarityrelur   r   meanbackward)r   sentence_featureslabelsanchorpositivefeature_namesall_positive_embdiagonal_matrix	start_idxend_idxkey
anchor_embhard_negative_features
cos_scoresnegative_scoresnegatives_maxnegatives_idshard_negative_idpositive_embnegative_emb
pos_cosine
neg_cosinelossess                          r   r   (MegaBatchMarginLoss.forward_mini_batched^   s   ,V[[]+]]_JJOO#zz(34HIPPRJJ 
  ))C(8$93?O;PYiYpYpq q#&6"79M9MNI"6"66GTa$bTaS&+i*H%HTa$bc$J :G%G#2g"%G!11*O
_w%G!GG   05yya/P,} ! %2 (C*/66x}EU7VW ) %2 %.3kk:P:U.V&+ %  ::Xe&fXeQTHM),L'LXe&fg$L  ::&<=>RSL##|'9'9999##|'9'9999 ,,ZFJ,,ZFJVVD00:=>
UYUiUiHiAjjF[[]F Z(!Q OT e _ %c &H  'gs*   AK4L
"L9L!L"
4
L
L	c                   U Vs/ sH  o0R                  U5      S   PM     nnUu  pV[        R                  " XV5      n[        R                  " U5      nUS[        R
                  " UR                  SUR                  06-  -
  n	[        R                  " U	SS9u  p[        R                  " U R                  U-
  5      [        R                  " XR                  -
  5      -   nUR                  5       $ s  snf )Nr   r   r   r    r!   )r   r   r-   r%   diagonalr*   r1   r   r.   r2   r4   r   r   r5   )r   r7   r8   sentence_featurerepsembeddings_aembeddings_brC   positive_scoresrD   rE   _rL   s                r   r   ,MegaBatchMarginLoss.forward_non_mini_batched   s    [lm[lGW

+,-AB[lm%)")),E
..4$		:++FJ4E4EFF
 !99_!<,,>?!&&YmYmImBnn{{} ns   C0c                    g)Na  
@inproceedings{wieting-gimpel-2018-paranmt,
    title = "{P}ara{NMT}-50{M}: Pushing the Limits of Paraphrastic Sentence Embeddings with Millions of Machine Translations",
    author = "Wieting, John and Gimpel, Kevin",
    editor = "Gurevych, Iryna and Miyao, Yusuke",
    booktitle = "Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
    month = jul,
    year = "2018",
    address = "Melbourne, Australia",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/P18-1042",
    doi = "10.18653/v1/P18-1042",
    pages = "451--462",
}
 )r   s    r   citationMegaBatchMarginLoss.citation   s    r   )r   r   r   r   r   )g?g333333?T2   )r   r   r   floatr   r\   r   boolr   intreturnNone)r7   zIterable[dict[str, Tensor]]r8   r   r_   r   )r_   str)__name__
__module____qualname____firstlineno__r   r   r   propertyrY   __static_attributes____classcell__)r   s   @r   r
   r
      s     "%!$)-!Op"Op Op 	Op
 #'Op Op 
Op Opb6r  r   r
   )
__future__r   collections.abcr   r%   torch.nn.functionalr   
functionalr2   r   sentence_transformersr   r   Moduler
   rX   r   r   <module>ro      s,    " $     ;h")) hr   