
    Ch+                        S SK Jr  S SKrS SKJr  S SKJr  S SKrS SKJ	s  J
r  S SKJrJ	r	  S SKJr  S SKJrJrJr  SS jr " S	 S
5      r " S S5      r " S S\	R,                  5      rg)    )annotationsN)Iterable)Any)Tensornn)SentenceTransformer)CachedGISTEmbedLoss"CachedMultipleNegativesRankingLoss+CachedMultipleNegativesSymmetricRankingLossc                    U R                   S   nX:  a  [        SU SU 35      eU SS U24   n [        R                  " U SSS9n U $ )Nz
Dimension zL in matryoshka_dims cannot be greater than the model's embedding dimension: .   )pdim)shape
ValueErrorF	normalize)tensorr   
tensor_dims      c/var/www/html/shao/venv/lib/python3.13/site-packages/sentence_transformers/losses/MatryoshkaLoss.pyshrinkr      sa    b!J
ijtiuv
 	
 C#IF[[1"-FM    c                  6    \ rS rSrSrSS jrSS jrS	S jrSrg)
ForwardDecorator   a?  
This decorator is used to cache the output of the Sentence Transformer's forward pass,
so that it can be shrank and reused for multiple loss calculations. This prevents the
model from recalculating the embeddings for each desired Matryoshka dimensionality.

This decorator is applied to `SentenceTransformer.forward`.
c                H    Xl         S U l        / U l        S U l        SU l        g Nr   )fnr   cache	cache_dimidx)selfr   s     r   __init__ForwardDecorator.__init__'   s#    
r   c                    Xl         SU l        g r   )r   r"   )r#   r   s     r   set_dimForwardDecorator.set_dim/   s    r   c                   U R                   b  U R                   U R                  :X  a>  U R                  U5      nU R                  R	                  U5        U R                  U l         OU R                  U R
                     nSU;   a  [        US   U R                  5      US'   [        US   U R                  5      US'   U =R
                  S-  sl        U$ )Ntoken_embeddingssentence_embedding   )r!   r   r   r    appendr"   r   )r#   featuresoutputs      r   __call__ForwardDecorator.__call__3   s    >>!T^^txx%?WWX&FJJf%!XXDN ZZ)F')/7I0JDHH)UF%&'-f5I.JDHH'U#$Ar   )r    r!   r   r   r"   N)returnNone)r.   dict[str, Tensor]r2   r4   )	__name__
__module____qualname____firstlineno____doc__r$   r'   r0   __static_attributes__ r   r   r   r      s    r   r   c                  @    \ rS rSrSr S       SS jjrS	S jrSrg)
CachedLossDecoratorC   a  
This decorator is used with the Cached... losses to compute the underlying loss function
for each Matryoshka dimensionality. This is done by shrinking the pre-computed embeddings
to the desired dimensionality and then passing them to the underlying loss function once
for each desired dimensionality.

This decorator is applied to the `calculate_loss` method of the Cached... losses.
c                4    Xl         X l        X0l        X@l        g )Nr   matryoshka_dimsmatryoshka_weightsn_dims_per_step)r#   r   rA   rB   rC   s        r   r$   CachedLossDecorator.__init__M   s     ."4.r   c                :   [        [        U R                  5      5      nU R                  S:  a9  U R                  [        U5      :  a   [        R
                  " X@R                  5      nSnU GH  nU R                  U   nU R                  U   nU V	V
s/ sH  o V
s/ sH  n
[        X5      PM     sn
PM     nn	n
[        R                  " 5       nU(       a>  U V	V
s/ sH/  o V
s/ sH   oR                  5       R                  5       PM"     sn
PM1     nn	n
OUnXXU R                  " U/UQ70 UD6-  -  nU(       d  M  [        X5       H8  u  p[        X5       H$  u  nnUR                  UUR                  -  5        M&     M:     GM     U$ s  sn
f s  sn
n	f s  sn
f s  sn
n	f Nr   g        )rangelenrA   rC   randomsamplerB   r   torchis_grad_enableddetachrequires_grad_r   zipbackwardgrad)r#   repsargskwargsdim_indiceslossr"   r   weight	minibatchr	truncatedcompute_gradientsmatryoshka_repst_minibatchd_minibatchtds                     r   r0   CachedLossDecorator.__call__U   si   C 4 456!#(<(<s;?O(O --5I5IJKC&&s+C,,S1FNRSd)<)Q&.)<dIS % 5 5 7 !en"oenXa#SAHHJ$=$=$?#Sen"o"+TWW_FtFvFFFD ! 03I0O,K #K =1

6AFF?3 !> 1P! &  =S
 $T"os0   FF/FF"&FFFFr@   N)r   )rA   	list[int]rB   zlist[float | int]rC   intr2   r3   )rR   zlist[list[Tensor]]r2   r   )r5   r6   r7   r8   r9   r$   r0   r:   r;   r   r   r=   r=   C   s8     mo/#,/BS/fi/	/r   r=   c                  r   ^  \ rS rSr  S           SU 4S jjjrS	S jrS
S jr\SS j5       rSr	U =r
$ )MatryoshkaLossq   c                  > [         TU ]  5         Xl        X l        Uc  S/[	        U5      -  n[        X45      n[        [        US SS96 u  U l        U l        XPl	        [        [        [        4U l        [        X R                  5      (       a1  [        UR                   U R                  U R                  5      Ul        gg)a*
  
The MatryoshkaLoss can be seen as a loss *modifier* that allows you to use other loss functions at various
different embedding dimensions. This is useful for when you want to train a model where users have the option
to lower the embedding dimension to improve their embedding comparison speed and costs.

This loss is also compatible with the Cached... losses, which are in-batch negative losses that allow for
higher batch sizes. The higher batch sizes allow for more negatives, and often result in a stronger model.

Args:
    model: SentenceTransformer model
    loss: The loss function to be used, e.g.
        :class:`MultipleNegativesRankingLoss`,
        :class:`CoSENTLoss`, etc.
    matryoshka_dims: A list of embedding dimensions to be used
        for the loss function, e.g. [768, 512, 256, 128, 64].
    matryoshka_weights: A list of weights to be used for the
        loss function, e.g. [1, 1, 1, 1, 1]. If None, then the
        weights will be set to 1 for all dimensions.
    n_dims_per_step: The number of dimensions to use per step.
        If -1, then all dimensions are used. If > 0, then a
        random sample of n_dims_per_step dimensions are used per
        step. The default value is -1.

References:
    - The concept was introduced in this paper: https://arxiv.org/abs/2205.13147
    - `Matryoshka Embeddings <../../../examples/sentence_transformer/training/matryoshka/README.html>`_

Inputs:
    +---------------------------------------+--------+
    | Texts                                 | Labels |
    +=======================================+========+
    | any                                   | any    |
    +---------------------------------------+--------+

Relations:
    - :class:`Matryoshka2dLoss` uses this loss in combination with :class:`AdaptiveLayerLoss` which allows for
        layer reduction for faster inference.

Example:
    ::

        from sentence_transformers import SentenceTransformer, SentenceTransformerTrainer, losses
        from datasets import Dataset

        model = SentenceTransformer("microsoft/mpnet-base")
        train_dataset = Dataset.from_dict({
            "anchor": ["It's nice weather outside today.", "He drove to work."],
            "positive": ["It's so sunny.", "He took the car to the office."],
        })
        loss = losses.MultipleNegativesRankingLoss(model)
        loss = losses.MatryoshkaLoss(model, loss, [768, 512, 256, 128, 64])

        trainer = SentenceTransformerTrainer(
            model=model,
            train_dataset=train_dataset,
            loss=loss,
        )
        trainer.train()
Nr,   c                    U S   $ r   r;   )xs    r   <lambda>)MatryoshkaLoss.__init__.<locals>.<lambda>   s    `abc`dr   T)keyreverse)superr$   modelrV   rH   rO   sortedrA   rB   rC   r
   r	   r   cached_losses
isinstancer=   calculate_loss)r#   ro   rV   rA   rB   rC   dims_weights	__class__s          r   r$   MatryoshkaLoss.__init__r   s    F 	
	%"#s?';!;??8;VLVdnr=s8t5d5. /7

 d..//"5##T%9%94;R;R#D 0r   c                   [        U R                  U R                  5      (       a  U R                  X5      $ U R                  R                  n [        U5      nX@R                  l        [        [        U R                  5      5      nU R                  S:  aI  U R                  [        U5      :  a0  [        R                  " XPR                  5      nUR                  5         SnU HH  nU R                  U   nU R                  U   n	UR                  U5        XiU R                  X5      -  -  nMJ     X0R                  l        U$ ! X0R                  l        f = frF   )rr   rV   rq   ro   forwardr   rG   rH   rA   rC   rI   rJ   sortrB   r'   )
r#   sentence_featureslabelsoriginal_forwarddecorated_forwardrU   rV   r"   r   rW   s
             r   rx   MatryoshkaLoss.forward   s     dii!3!34499.77  ::--	2 01A B!2JJD$8$8 9:K##a'D,@,@3{CS,S$mmK9M9MN  "D"**3/005!))#.+<!EEE	 # "2JJ "2JJs   C"E Ec                    U R                   R                  R                  U R                  U R                  U R
                  S.$ )N)rV   rA   rB   rC   )rV   ru   r5   rA   rB   rC   r#   s    r   get_config_dictMatryoshkaLoss.get_config_dict   s:    II''00#33"&"9"9#33	
 	
r   c                    g)Na  
@misc{kusupati2024matryoshka,
    title={Matryoshka Representation Learning},
    author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
    year={2024},
    eprint={2205.13147},
    archivePrefix={arXiv},
    primaryClass={cs.LG}
}
r;   r   s    r   citationMatryoshkaLoss.citation   s    	r   )rq   rV   rA   rB   ro   rC   )Nr   )ro   r   rV   z	nn.ModulerA   rb   rB   zlist[float | int] | NonerC   rc   r2   r3   )rz   zIterable[dict[str, Tensor]]r{   r   r2   r   )r2   zdict[str, Any])r2   str)r5   r6   r7   r8   r$   rx   r   propertyr   r:   __classcell__)ru   s   @r   re   re   q   sv     8<!Y"Y Y #	Y
 5Y Y 
Y Yv8
 
 
r   re   )r   r   r   rc   r2   r   )
__future__r   rI   collections.abcr   typingr   rK   torch.nn.functionalr   
functionalr   r   sentence_transformersr   sentence_transformers.lossesr	   r
   r   r   r   r=   Modulere   r;   r   r   <module>r      sW    "  $      5 " "J+ +\KRYY Kr   