
    Ch                    b    S SK Jr  S SKJr  S SKJr  S SKJr  SSKJr  SSK	J	r	   " S S	\5      r
g
)    )annotations)Any)Module)SentenceTransformer   )AdaptiveLayerLoss)MatryoshkaLossc                     ^  \ rS rSr       S                     SU 4S jjjrSU 4S jjr\S	S j5       rSrU =r	$ )
Matryoshka2dLoss   c           
     F   > [        UUUUUS9n[        TU ]	  UUUUUU	U
S9  g)a  
The Matryoshka2dLoss can be seen as a loss *modifier* that combines the :class:`AdaptiveLayerLoss` and the
:class:`MatryoshkaLoss`. This allows you to train an embedding model that 1) allows users to specify the number
of model layers to use, and 2) allows users to specify the output dimensions to use.

The former is useful for when you want users to have the option to lower the number of layers used to improve
their inference speed and memory usage, and the latter is useful for when you want users to have the option to
lower the output dimensions to improve the efficiency of their downstream tasks (e.g. retrieval) or to lower
their storage costs.

Note, this uses `n_layers_per_step=1` and `n_dims_per_step=1` as default, following the original 2DMSE
implementation.

Args:
    model: SentenceTransformer model
    loss: The loss function to be used, e.g.
        :class:`MultipleNegativesRankingLoss`,
        :class:`CoSENTLoss`, etc.
    matryoshka_dims: A list of embedding dimensions to be used
        for the loss function, e.g. [768, 512, 256, 128, 64].
    matryoshka_weights: A list of weights to be used for the
        loss function, e.g. [1, 1, 1, 1, 1]. If None, then the
        weights will be set to 1 for all dimensions.
    n_layers_per_step: The number of layers to use per step. If
        -1, then all layers are used. If > 0, then a random
        sample of n_layers_per_step layers are used per step.
        The 2DMSE paper uses `n_layers_per_step=1`. The default
        value is -1.
    n_dims_per_step: The number of dimensions to use per step.
        If -1, then all dimensions are used. If > 0, then a
        random sample of n_dims_per_step dimensions are used per
        step. The default value is -1.
    last_layer_weight: The weight to use for the loss of the
        final layer. Increase this to focus more on the
        performance when using all layers. The default value is
        1.0.
    prior_layers_weight: The weight to use for the loss of the
        prior layers. Increase this to focus more on the
        performance when using fewer layers. The default value
        is 1.0.
    kl_div_weight: The weight to use for the KL-divergence loss
        that is used to make the prior layers match that of the
        last layer. Increase this to focus more on the
        performance when using fewer layers. The default value
        is 1.0.
    kl_temperature: The temperature to use for the KL-divergence
        loss. If 0, then the KL-divergence loss is not used. The
        default value is 1.0.

References:
    - See the 2D Matryoshka Sentence Embeddings (2DMSE) paper: https://arxiv.org/abs/2402.14776
    - `Matryoshka Embeddings <../../../examples/sentence_transformer/training/matryoshka/README.html>`_
    - `Adaptive Layers <../../../examples/sentence_transformer/training/adaptive_layer/README.html>`_

Requirements:
    1. The base loss cannot be :class:`CachedMultipleNegativesRankingLoss`,
       :class:`CachedMultipleNegativesSymmetricRankingLoss`, or :class:`CachedGISTEmbedLoss`.

Inputs:
    +---------------------------------------+--------+
    | Texts                                 | Labels |
    +=======================================+========+
    | any                                   | any    |
    +---------------------------------------+--------+

Relations:
    - :class:`MatryoshkaLoss` is used in this loss, and it is responsible for the dimensionality reduction.
    - :class:`AdaptiveLayerLoss` is used in this loss, and it is responsible for the layer reduction.

Example:
    ::

        from sentence_transformers import SentenceTransformer, SentenceTransformerTrainer, losses
        from datasets import Dataset

        model = SentenceTransformer("microsoft/mpnet-base")
        train_dataset = Dataset.from_dict({
            "anchor": ["It's nice weather outside today.", "He drove to work."],
            "positive": ["It's so sunny.", "He took the car to the office."],
        })
        loss = losses.MultipleNegativesRankingLoss(model)
        loss = losses.Matryoshka2dLoss(model, loss, [768, 512, 256, 128, 64])

        trainer = SentenceTransformerTrainer(
            model=model,
            train_dataset=train_dataset,
            loss=loss,
        )
        trainer.train()
)matryoshka_weightsn_dims_per_step)n_layers_per_steplast_layer_weightprior_layers_weightkl_div_weightkl_temperatureN)r	   super__init__)selfmodellossmatryoshka_dimsr   r   r   r   r   r   r   matryoshka_loss	__class__s               e/var/www/html/shao/venv/lib/python3.13/site-packages/sentence_transformers/losses/Matryoshka2dLoss.pyr   Matryoshka2dLoss.__init__   sH    N )1+
 	// 3') 	 	
    c                X   > 0 [         TU ]  5       EU R                  R                  5       E$ )N)r   get_config_dictr   )r   r   s    r   r!    Matryoshka2dLoss.get_config_dict   s2    
g%'
ii'')
 	
r   c                    g)Nz
@misc{li20242d,
    title={2D Matryoshka Sentence Embeddings},
    author={Xianming Li and Zongxi Li and Jing Li and Haoran Xie and Qing Li},
    year={2024},
    eprint={2402.14776},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}
 )r   s    r   citationMatryoshka2dLoss.citation   s    	r   r$   )Nr   r         ?r'   r'   g333333?)r   r   r   r   r   z	list[int]r   zlist[float | int] | Noner   intr   r(   r   floatr   r)   r   r)   r   r)   returnNone)r*   zdict[str, Any])r*   str)
__name__
__module____qualname____firstlineno__r   r!   propertyr%   __static_attributes____classcell__)r   s   @r   r   r      s     8<!" #&%(" #v
"v
 v
 #	v

 5v
 v
 v
 !v
 #v
 v
 v
 
v
 v
p
 
 
r   r   N)
__future__r   typingr   torch.nnr   sentence_transformersr   r   r	   r   r$   r   r   <module>r8      s&    "   5 0 *J( Jr   