o
    sh                     @   s`   d dl mZmZ d dlZd dlm  mZ d dlmZmZ d dl	m
Z
mZ G dd dejZdS )    )DictIterableN)Tensornn)SentenceTransformerutilc                       s   e Zd Z				ddedededed	ed
df fddZdee	e
ef  ded
efddZdee	e
ef  ded
efddZed
e
fddZ  ZS )MegaBatchMarginLoss皙?333333?T2   modelpositive_marginnegative_marginuse_mini_batched_versionmini_batch_sizereturnNc                    sB   t t|   || _|| _|| _|| _|r| j| _	dS | j| _	dS )a	  
        Given a large batch (like 500 or more examples) of (anchor_i, positive_i) pairs, find for each pair in the batch
        the hardest negative, i.e. find j != i such that cos_sim(anchor_i, positive_j) is maximal. Then create from this a
        triplet (anchor_i, positive_i, positive_j) where positive_j serves as the negative for this triplet.

        Then train as with the triplet loss.

        Args:
            model: SentenceTransformerModel
            positive_margin: Positive margin, cos(anchor, positive)
                should be > positive_margin
            negative_margin: Negative margin, cos(anchor, negative)
                should be < negative_margin
            use_mini_batched_version: As large batch sizes require a lot
                of memory, we can use a mini-batched version. We break
                down the large batch into smaller batches with fewer
                examples.
            mini_batch_size: Size for the mini-batches. Should be a
                devisor for the batch size in your data loader.

        References:
            - This loss function was inspired by the ParaNMT paper: https://www.aclweb.org/anthology/P18-1042/

        Requirements:
            1. (anchor, positive) pairs
            2. Large batches (500 or more examples)

        Input:
            +---------------------------------------+--------+
            | Texts                                 | Labels |
            +=======================================+========+
            | (anchor, positive) pairs              | none   |
            +---------------------------------------+--------+

        Example:
            ::

                from sentence_transformers import SentenceTransformer, InputExample, losses
                from torch.utils.data import DataLoader

                model = SentenceTransformer('all-MiniLM-L6-v2')

                total_examples = 500
                train_batch_size = 250
                train_mini_batch_size = 32

                train_examples = [
                    InputExample(texts=[f"This is sentence number {i}", f"This is sentence number {i+1}"]) for i in range(total_examples)
                ]
                train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=train_batch_size)
                train_loss = losses.MegaBatchMarginLoss(model=model, mini_batch_size=train_mini_batch_size)

                model.fit(
                    [(train_dataloader, train_loss)],
                    epochs=10,
                )
        N)
superr   __init__r   r   r   r   forward_mini_batchedforward_non_mini_batchedforward)selfr   r   r   r   r   	__class__ n/var/www/html/alpaca_bot/venv/lib/python3.10/site-packages/sentence_transformers/losses/MegaBatchMarginLoss.pyr      s   AzMegaBatchMarginLoss.__init__sentence_featureslabelsc              	      s  |\ t   }t  | j  | d  }| j  W d    n1 s,w   Y  tjt	|t	||j
d}tdt	|| jD ]| j |  fdd|D d }dd |D }t ! t||}|d|   }	tj|	dd	\}
}W d    n1 sw   Y  |D ]}|D ]}|| | |  qq|D ]}t|| ||< q| fd
d|D d }| |d }|j|jksJ |j|jksJ t||}t||}t| j| t|| j  }| }t	|k r|  qG|S )Nsentence_embedding)devicer   c                    s   i | ]}| |  qS r   r   .0key)anchorend_idx	start_idxr   r   
<dictcomp>a       z<MegaBatchMarginLoss.forward_mini_batched.<locals>.<dictcomp>c                 S   s   i | ]}|g qS r   r   r    r   r   r   r&   g   s          dimc                    s   i | ]}||   qS r   r   r    )r$   positiver%   r   r   r&   w   r'   )listkeystorchno_gradr   evaldetachtraineyelenr   ranger   r   pytorch_cos_simmaxappendstackshapeFcosine_similarityrelur   r   meanbackward)r   r   r   feature_namesall_positive_embdiagonal_matrix
anchor_embhard_negative_features
cos_scoresnegative_scoresnegatives_maxnegatives_idshard_negative_idr"   positive_embnegative_emb
pos_cosine
neg_cosinelossesr   )r#   r$   r,   r%   r   r   S   sP   



 z(MegaBatchMarginLoss.forward_mini_batchedc                    s    fdd|D }|\}}t ||}t|}|dtj|jd|ji  }tj|dd\}	}
t	 j
| t	|	 j  }| S )Nc                    s   g | ]	}  |d  qS )r   )r   )r!   sentence_featurer   r   r   
<listcomp>   s    z@MegaBatchMarginLoss.forward_non_mini_batched.<locals>.<listcomp>r(   r   r)   r*   )r   r7   r/   diagonalr4   r;   r   r8   r<   r>   r   r   r?   )r   r   r   repsembeddings_aembeddings_brF   positive_scoresrG   rH   _rO   r   rQ   r   r      s   
 z,MegaBatchMarginLoss.forward_non_mini_batchedc                 C   s   dS )Na  
@inproceedings{wieting-gimpel-2018-paranmt,
    title = "{P}ara{NMT}-50{M}: Pushing the Limits of Paraphrastic Sentence Embeddings with Millions of Machine Translations",
    author = "Wieting, John and Gimpel, Kevin",
    editor = "Gurevych, Iryna and Miyao, Yusuke",
    booktitle = "Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
    month = jul,
    year = "2018",
    address = "Melbourne, Australia",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/P18-1042",
    doi = "10.18653/v1/P18-1042",
    pages = "451--462",
}
r   rQ   r   r   r   citation   s   zMegaBatchMarginLoss.citation)r	   r
   Tr   )__name__
__module____qualname__r   floatboolintr   r   r   strr   r   r   propertyrY   __classcell__r   r   r   r   r   
   s,    "H"9r   )typingr   r   r/   torch.nn.functionalr   
functionalr<   r   sentence_transformersr   r   Moduler   r   r   r   r   <module>   s    