
    Ch5                        S SK Jr  S SKrS SKrS SKrS SKJr  S SKJr  \(       a
  S SK	r
S SKJr  \R                  " \5      r " S S\5      rg)    )annotationsN)TYPE_CHECKING)SentenceEvaluator)SentenceTransformerc                     ^  \ rS rSrSr      S	             S
U 4S jjjr S     SS jjr      SS jr\SS j5       r	S r
SrU =r$ )MSEEvaluator   ab	  
Computes the mean squared error (x100) between the computed sentence embedding
and some target sentence embedding.

The MSE is computed between ||teacher.encode(source_sentences) - student.encode(target_sentences)||.

For multilingual knowledge distillation (https://arxiv.org/abs/2004.09813), source_sentences are in English
and target_sentences are in a different language like German, Chinese, Spanish...

Args:
    source_sentences (List[str]): Source sentences to embed with the teacher model.
    target_sentences (List[str]): Target sentences to embed with the student model.
    teacher_model (SentenceTransformer, optional): The teacher model to compute the source sentence embeddings.
    show_progress_bar (bool, optional): Show progress bar when computing embeddings. Defaults to False.
    batch_size (int, optional): Batch size to compute sentence embeddings. Defaults to 32.
    name (str, optional): Name of the evaluator. Defaults to "".
    write_csv (bool, optional): Write results to CSV file. Defaults to True.
    truncate_dim (int, optional): The dimension to truncate sentence embeddings to. `None` uses the model's current truncation
        dimension. Defaults to None.

Example:
    ::

        from sentence_transformers import SentenceTransformer
        from sentence_transformers.evaluation import MSEEvaluator
        from datasets import load_dataset

        # Load a model
        student_model = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')
        teacher_model = SentenceTransformer('all-mpnet-base-v2')

        # Load any dataset with some texts
        dataset = load_dataset("sentence-transformers/stsb", split="validation")
        sentences = dataset["sentence1"] + dataset["sentence2"]

        # Given queries, a corpus and a mapping with relevant documents, the MSEEvaluator computes different MSE metrics.
        mse_evaluator = MSEEvaluator(
            source_sentences=sentences,
            target_sentences=sentences,
            teacher_model=teacher_model,
            name="stsb-dev",
        )
        results = mse_evaluator(student_model)
        '''
        MSE evaluation (lower = better) on the stsb-dev dataset:
        MSE (*100):  0.805045
        '''
        print(mse_evaluator.primary_metric)
        # => "stsb-dev_negative_mse"
        print(results[mse_evaluator.primary_metric])
        # => -0.8050452917814255
c	                   > [         T	U ]  5         Xl        X l        X@l        XPl        X`l        SU-   S-   U l        / SQU l        Xpl	        SU l
        U R                  X15      U l        g )Nmse_evaluation_z_results.csv)epochstepsMSEnegative_mse)super__init__truncate_dimtarget_sentencesshow_progress_bar
batch_sizenamecsv_filecsv_headers	write_csvprimary_metricembed_inputssource_embeddings)
selfsource_sentencesr   teacher_modelr   r   r   r   r   	__class__s
            e/var/www/html/shao/venv/lib/python3.13/site-packages/sentence_transformers/evaluation/MSEEvaluator.pyr   MSEEvaluator.__init__H   se     	( 0!2$	)D0>A4",!%!2!2=!S    c                   US:w  a  US:X  a  SU 3nOSU SU S3nOSnU R                   b  USU R                    S3-  nU R                  XR                  5      nU R                  U-
  S	-  R	                  5       nUS
-  n[
        R                  SU R                   SU S35        [
        R                  SUS 35        Ub  U R                  (       a  [        R                  R                  X R                  5      n[        R                  R                  U5      n	[        USU	(       a  SOSSS9 n
[        R                   " U
5      nU	(       d  UR#                  U R$                  5        UR#                  X4U/5        S S S 5        SU* 0nU R'                  XR                  5      nU R)                  XX45        U$ ! , (       d  f       NB= f)Nz after epoch z
 in epoch z after z steps z (truncated to )   d   z'MSE evaluation (lower = better) on the z dataset:zMSE (*100):	4fawzutf-8)newlinemodeencodingr   )r   r   r   r   meanloggerinfor   r   ospathjoinr   isfileopencsvwriterwriterowr   prefix_name_to_metrics store_metrics_in_model_card_data)r   modeloutput_pathr   r   out_txttarget_embeddingsmsecsv_pathoutput_file_existsfr:   metricss                r!   __call__MSEEvaluator.__call__a   s    B;{)%1&ugWUG6BG():):(;1==G --e5J5JK&&)::q@FFHCi=dii[QXPYYZ[\mC8,-"t~~ww||K?H!#!9h8JPS^efjkA)OOD$4$45s 34 g "C4(--gyyA--eeK gfs   /AF77
Gc                n    UR                   " U4U R                  U R                  SU R                  S.UD6$ )NT)r   r   convert_to_numpyr   )encoder   r   r   )r   r>   	sentenceskwargss       r!   r   MSEEvaluator.embed_inputs   sC     ||
"44!**
 
 	
r#   c                    g)NzKnowledge Distillation )r   s    r!   descriptionMSEEvaluator.description   s    'r#   c                B    0 nU R                   b  U R                   US'   U$ )Nr   )r   )r   config_dicts     r!   get_config_dictMSEEvaluator.get_config_dict   s)    (*.*;*;K'r#   )
r   r   r   r   r   r   r   r   r   r   )NF    r&   TN)r   	list[str]r   rX   r   boolr   intr   strr   rY   r   z
int | None)Nr%   r%   )r>   r   r?   z
str | Nonereturnzdict[str, float])r>   r   rL   zstr | list[str] | np.ndarrayr\   z
np.ndarray)r\   r[   )__name__
__module____qualname____firstlineno____doc__r   rG   r   propertyrQ   rU   __static_attributes____classcell__)r    s   @r!   r   r      s    3r "'#'T#T $T
  T T T T !T T4 []#(#7A#	#J
"
 0

 

 ( ( r#   r   )
__future__r   r9   loggingr4   typingr   2sentence_transformers.evaluation.SentenceEvaluatorr   numpynp)sentence_transformers.SentenceTransformerr   	getLoggerr]   r2   r   rP   r#   r!   <module>rm      s?    " 
  	   PM			8	$K$ Kr#   