
    Ch10                        S SK Jr  S SKrS SKrS SKJr  S SKJr  S SKrS SKJ	r	J
r
  S SKJr  S SKJr  S SKJr  S S	KJr  S S
KJr  S SKJr   " S S5      r " S S5      r " S S\
R4                  5      rg)    )annotationsN)Iterable)Any)Tensornn)
functional)SentenceTransformer)CachedGISTEmbedLoss)"CachedMultipleNegativesRankingLoss)+CachedMultipleNegativesSymmetricRankingLoss)Transformerc                  T    \ rS rSrSrSS jrSS jrSS jrSS jrSS jr	SS jr
S	rg
)TransformerDecorator   z
Decorator that caches the embeddings of all layers of the transformer.
When `layer_idx` is set, it returns the cached embeddings of that layer instead.

This is meant to override the forward function of the Transformer.
c                b    Xl         X l        / U l        / U l        / U l        S U l        SU l        g Nr   )transformeroriginal_forward
embeddingslast_embeddingsfeatures	layer_idxcall_idx)selfr   r   s      f/var/www/html/shao/venv/lib/python3.13/site-packages/sentence_transformers/losses/AdaptiveLayerLoss.py__init__TransformerDecorator.__init__   s2    & 0/1-/13    c                    Xl         SU l        g r   )r   r   )r   r   s     r   set_layer_idx"TransformerDecorator.set_layer_idx&   s    "r   c                    [         R                  " U R                   Vs/ sH  oU R                     PM     snSS9$ s  snf )N   dim)torchconcatr   r   )r   	embeddings     r   get_layer_embeddings)TransformerDecorator.get_layer_embeddings*   s1    ||X9t~~6X^_``Xs   <c                    U R                   c  U R                  U5      nU$ U R                  U5      nU =R                  S-  sl        U$ )Nr#   )r   call_grow_cachecall_use_cacher   r   r   outputs      r   __call__TransformerDecorator.__call__-   sH    >>!))(3F  ((2FMMQMr   c                b   U R                   R                  R                  R                  nSU R                   R                  R                  l        U R	                  U5      n[        US   5      S-
  U l        U R                  R                  US   SS 5        U R                  R                  US   5        U R                  R                  UR                  5        VVs0 sH  u  pEUS;  d  M  XE_M     snn5        X R                   R                  R                  l        U(       a  US	 U$ s  snnf )z
Temporarily sets the output_hidden_states to True, runs the model, and then restores the original setting.
Use the all_layer_embeddings to get the embeddings of all layers.
Tall_layer_embeddingsr#   token_embeddings)r3   r5   )r   
auto_modelconfigoutput_hidden_statesr   len
num_layersr   appendr   r   items)r   r   original_output_hidden_statesr/   keyvalues         r   r,   $TransformerDecorator.call_grow_cache5   s	   
 )-(8(8(C(C(J(J(_(_%BF##**?&&x0 f%;<=Av&<=aCD##F+=$>?*0,,.t.JCCGs<sZSZ.t	

 C`##**?(-. us   D+
-D+
c                    0 U R                   U R                     ESU R                  U R                     U R                     0E$ )Nr5   )r   r   r   r   )r   r   s     r   r-   #TransformerDecorator.call_use_cacheO   s>    s$--.s0BDOOTXTaTaDbcgcqcqDrssr   )r   r   r   r   r   r:   r   r   N)r   r   returnNonerC   rD   rC   r   )rC   dict[str, Tensor]r   rG   rC   rG   )__name__
__module____qualname____firstlineno____doc__r   r    r)   r0   r,   r-   __static_attributes__ r   r   r   r      s'    a4tr   r   c                  6    \ rS rSrSrSS jrS	S jrS
S jrSrg)ForwardDecoratorS   z
Decorator that caches the embeddings after all modules (e.g. pooling) of the model.
Required to get the embeddings after all modules for the KL-divergence loss.

This is meant to override the forward function of the SentenceTransformer.
c                    Xl         / U l        g )N)fnr   )r   rT   s     r   r   ForwardDecorator.__init__[   s    r   c                d    U R                  U5      nU R                  R                  US   5        U$ )Nsentence_embedding)rT   r   r;   r.   s      r   r0   ForwardDecorator.__call___   s-    "v&:;<r   c                R    [         R                  " U R                  SS9n/ U l        U$ )Nr   r$   )r&   r'   r   )r   r   s     r   get_embeddingsForwardDecorator.get_embeddingsd   s#    \\$//q9
r   )r   rT   NrE   rH   rF   )	rI   rJ   rK   rL   rM   r   r0   rZ   rN   rO   r   r   rQ   rQ   S   s    
r   rQ   c                     ^  \ rS rSr     S               SU 4S jjjrS	S jrS
S jr\SS j5       rSr	U =r
$ )AdaptiveLayerLossj   c                Z  > [         TU ]  5         Xl        X l        X0l        X@l        XPl        X`l        Xpl        [        U R                  S   [        5      (       d   e[        U[        [        [        45      (       a.  [        R                  " SUR                   R"                   S3SS9  gg)a  
The AdaptiveLayerLoss can be seen as a loss *modifier* that allows you to use other loss functions at non-final
layers of the Sentence Transformer model. This is useful for when you want to train a model where users have
the option to lower the number of layers used to improve their inference speed and memory usage.

Args:
    model: SentenceTransformer model
    loss: The loss function to be used, e.g.
        :class:`MultipleNegativesRankingLoss`,
        :class:`CoSENTLoss`, etc.
    n_layers_per_step: The number of layers to use per step. If
        -1, then all layers are used. If > 0, then a random
        sample of `n_layers_per_step` layers are used per step,
        separate from the final layer, which is always used. The
        2DMSE paper uses `n_layers_per_step=1`. The default
        value is 1.
    last_layer_weight: The weight to use for the loss of the
        final layer. Increase this to focus more on the
        performance when using all layers. The default value is
        1.0.
    prior_layers_weight: The weight to use for the loss of the
        prior layers. Increase this to focus more on the
        performance when using fewer layers. The default value
        is 1.0.
    kl_div_weight: The weight to use for the KL-divergence loss
        that is used to make the prior layers match that of the
        last layer. Increase this to focus more on the
        performance when using fewer layers. The default value
        is 1.0.
    kl_temperature: The temperature to use for the KL-divergence
        loss. If 0, then the KL-divergence loss is not used. The
        default value is 1.0.

References:
    - The concept was inspired by the 2DMSE paper: https://arxiv.org/abs/2402.14776
    - `Adaptive Layers <../../../examples/sentence_transformer/training/adaptive_layer/README.html>`_

Requirements:
    1. The base loss cannot be :class:`CachedMultipleNegativesRankingLoss`,
       :class:`CachedMultipleNegativesSymmetricRankingLoss`, or :class:`CachedGISTEmbedLoss`.

Inputs:
    +---------------------------------------+--------+
    | Texts                                 | Labels |
    +=======================================+========+
    | any                                   | any    |
    +---------------------------------------+--------+

Relations:
    - :class:`Matryoshka2dLoss` uses this loss in combination with :class:`MatryoshkaLoss` which allows for
        output dimensionality reduction for faster downstream tasks (e.g. retrieval).

Example:
    ::

        from sentence_transformers import SentenceTransformer, SentenceTransformerTrainer, losses
        from datasets import Dataset

        model = SentenceTransformer("microsoft/mpnet-base")
        train_dataset = Dataset.from_dict({
            "anchor": ["It's nice weather outside today.", "He drove to work."],
            "positive": ["It's so sunny.", "He took the car to the office."],
        })
        loss = losses.MultipleNegativesRankingLoss(model=model)
        loss = losses.AdaptiveLayerLoss(model, loss)

        trainer = SentenceTransformerTrainer(
            model=model,
            train_dataset=train_dataset,
            loss=loss,
        )
        trainer.train()
r   z&MatryoshkaLoss is not compatible with .   )
stacklevelN)superr   modellossn_layers_per_steplast_layer_weightprior_layers_weightkl_div_weightkl_temperature
isinstancer   r   r   r
   warningswarn	__class__rI   )	r   rd   re   rf   rg   rh   ri   rj   rn   s	           r   r   AdaptiveLayerLoss.__init__k   s    f 	
	!2!2#6 *,$**Q-5555/1\^qr
 
 MMB4>>CZCZB[[\]jkl	
r   c                6   U R                   S   R                  n[        U R                   S   U5      nX@R                   S   l        U R                   R                  n[        U5      nX`R                   l        U R	                  X5      U R
                  -  nU R                  S:  a1  UR                  5       n[        R                  " XR                  -  SS9nUR                  n	[        U	S-
  5      n
U R                  S:  a3  U R                  U	S-
  :  a   [        R                  " XR                  5      n
U
 H  nUR                  U5        U R	                  X5      nX|SU-   -  [!        U
5      -  U R"                  -  -   nU R                  S:  d  MZ  UR                  5       n[        R$                  " [        R&                  " XR                  -  SS9WSS9nX~U R                  -  U R(                  -  -   nM     X0R                   S   l        XPR                   l        U$ )Nr   r4   r$   r#   	batchmean)	reduction)rd   forwardr   rQ   re   rg   rj   rZ   Fsoftmaxr:   rangerf   randomsampler    r9   rh   kl_divlog_softmaxri   )r   sentence_featureslabelsoriginal_transformer_forwardtransformer_decoratorr   forward_decoratorre   final_embeddingsr:   layer_indicesr   
layer_lossr   kl_div_losss                  r   rs   AdaptiveLayerLoss.forward   s   '+zz!}'<'<$ 4TZZ]D` a 5

1  ::--,-=>.


 yy*3d6L6LL"0??A yy)9<O<O)OUWX*55
j1n-!!A%$*@*@:PQ>*Q"MM-9O9OPM 'I!//	:#4=JI6]9KKdNfNfffD ""Q&.==?
hhMM*/B/B"BK$)
 D,?,??$BTBTTT '" !=

1-

r   c                    U R                   R                  R                  U R                  U R                  U R
                  U R                  U R                  S.$ )N)re   rf   rg   rh   ri   rj   )re   rn   rI   rf   rg   rh   ri   rj   r   s    r   get_config_dict!AdaptiveLayerLoss.get_config_dict   sL    II''00!%!7!7!%!7!7#'#;#;!//"11
 	
r   c                    g)Nz
@misc{li20242d,
    title={2D Matryoshka Sentence Embeddings},
    author={Xianming Li and Zongxi Li and Jing Li and Haoran Xie and Qing Li},
    year={2024},
    eprint={2402.14776},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}
rO   r   s    r   citationAdaptiveLayerLoss.citation  s    	r   )ri   rj   rg   re   rd   rf   rh   )r#         ?r   r   g333333?)rd   r	   re   z	nn.Modulerf   intrg   floatrh   r   ri   r   rj   r   rC   rD   )r{   zIterable[dict[str, Tensor]]r|   r   rC   r   )rC   zdict[str, Any])rC   str)rI   rJ   rK   rL   r   rs   r   propertyr   rN   __classcell__)rn   s   @r   r]   r]   j   s    
 "##&%(" #`m"`m `m 	`m
 !`m #`m `m `m 
`m `mD-^
 
 
r   r]   )
__future__r   rw   rl   collections.abcr   typingr   r&   r   r   torch.nnr   rt   sentence_transformersr	   0sentence_transformers.losses.CachedGISTEmbedLossr
   ?sentence_transformers.losses.CachedMultipleNegativesRankingLossr   Hsentence_transformers.losses.CachedMultipleNegativesSymmetricRankingLossr   sentence_transformers.modelsr   r   rQ   Moduler]   rO   r   r   <module>r      sY    "   $    $ 5 P n 5;t ;t| .g		 gr   