
    Ch                        S SK Jr  S SKrS SKrS SKrS SKJr  S SKrS SK	J
r
  \(       a
  S SKrS SKJr  \R                  " \5      r " S S\
5      rg)    )annotationsN)TYPE_CHECKING)SentenceEvaluator)SentenceTransformerc                     ^  \ rS rSrSr    S             S	U 4S jjjr S
         SS jjr      SS jr\SS j5       r	Sr
U =r$ )MSEEvaluatorFromDataFrame   ug  
Computes the mean squared error (x100) between the computed sentence embedding and some target sentence embedding.

Args:
    dataframe (List[Dict[str, str]]): It must have the following format. Rows contains different, parallel sentences.
        Columns are the respective language codes::

        [{'en': 'My sentence in English', 'es': 'Oración en español', 'fr': 'Phrase en français'...},
         {'en': 'My second sentence', ...}]
    teacher_model (SentenceTransformer): The teacher model used to compute the sentence embeddings.
    combinations (List[Tuple[str, str]]): Must be of the format ``[('en', 'es'), ('en', 'fr'), ...]``.
        First entry in a tuple is the source language. The sentence in the respective language will be fetched from
        the dataframe and passed to the teacher model. Second entry in a tuple the the target language. Sentence
        will be fetched from the dataframe and passed to the student model
    batch_size (int, optional): The batch size to compute sentence embeddings. Defaults to 8.
    name (str, optional): The name of the evaluator. Defaults to "".
    write_csv (bool, optional): Whether to write the results to a CSV file. Defaults to True.
    truncate_dim (Optional[int], optional): The dimension to truncate sentence embeddings to. If None, uses the model's
        current truncation dimension. Defaults to None.
c                  > [         TU ]  5         X0l        XPl        X@l        U(       a  SU-   nSU-   S-   U l        SS/U l        SU l        X`l        Xpl	        0 U l
        [        R                  S5        [        5       nU R                   H  u  p/ n/ nU Hl  nX   R                  5       S:w  d  M  X   R                  5       S:w  d  M3  UR                  X   5        UR!                  X   5        UR!                  X   5        Mn     X4U R                  X4'   U R                  R!                  U	 S	U
 35        M     [#        U5      nU R%                  X(5      n['        X5       VVs0 sH	  u  nnUU_M     snnU l        g s  snnf )
N_mse_evaluationz_results.csvepochstepsnegative_msezCompute teacher embeddings -)super__init__combinationsname
batch_sizecsv_filecsv_headersprimary_metric	write_csvtruncate_dimdataloggerinfosetstripaddappendlistembed_inputszipteacher_embeddings)self	dataframeteacher_modelr   r   r   r   r   all_source_sentencessrc_langtrg_langsrc_sentencestrg_sentencesrowall_src_embeddingssentemb	__class__s                    r/var/www/html/shao/venv/lib/python3.13/site-packages/sentence_transformers/evaluation/MSEEvaluatorFromDataFrame.pyr   "MSEEvaluatorFromDataFrame.__init__*   ss    	(	$:D(4/.@#W-,"(	01"u"&"3"3HMM =&&(B.3=3F3F3HB3N(,,S];!((7!((7	 ! 0=.LDIIx*+##xj($<= #4  $$89!..}S>ABV>k"l>ks49>k"l"ls   #E<c           
     f   UR                  5         / nU R                   H  u  pgU R                  Xg4   u  p[        R                  " U V
s/ sH  oR
                  U
   PM     sn
5      n[        R                  " U R                  X5      5      nX-
  S-  R                  5       nUS-  nUR                  U5        [        R                  SU R                   SU SU S35        [        R                  SUS 35        M     Ub  U R                  (       a  [        R                  R                  X R                   5      n[        R                  R#                  U5      n[%        US	U(       a  S
OSSS9 n[&        R(                  " U5      nU(       d  UR+                  U R,                  5        UR+                  X4/U-   5        S S S 5        S[        R                  " U5      R/                  5       * 0nU R1                  UU R                  5      nU R3                  UUX45        U$ s  sn
f ! , (       d  f       Nk= f)N   d   zMSE evaluation on z dataset - r   :zMSE (*100):	4fr   awzutf-8)newlinemodeencodingr   )evalr   r   npasarrayr&   r$   meanr"   r   r   r   r   ospathjoinr   isfileopencsvwriterwriterowr   itemprefix_name_to_metrics store_metrics_in_model_card_data)r'   modeloutput_pathr   r   
mse_scoresr+   r,   r-   r.   r1   src_embeddingstrg_embeddingsmsecsv_pathoutput_file_existsfrJ   metricss                      r4   __call__"MSEEvaluatorFromDataFrame.__call__W   s    	


"&"3"3H+/99h5I+J(MZZS`(aS`4)@)@)FS`(abNZZ(9(9%(OPN"39??AC3JCc"KK,TYYK{8*AhZWXYZKK-Bx01 #4 "t~~ww||K?H!#!9h8JPS^efjkA)OOD$4$45 ;< g "BGGJ$7$<$<$>#>?--gtyyA--eWeK1 )b gfs   
H
/AH""
H0c                X    UR                   " U4U R                  SU R                  S.UD6$ )NT)r   convert_to_numpyr   )encoder   r   )r'   rO   	sentenceskwargss       r4   r$   &MSEEvaluatorFromDataFrame.embed_inputsz   s:     ||
!**	

 
 	
    c                    g)NzKnowledge Distillation )r'   s    r4   description%MSEEvaluatorFromDataFrame.description   s    'ra   )
r   r   r   r   r   r   r   r&   r   r   )   r   TN)r(   zlist[dict[str, str]]r)   r   r   zlist[tuple[str, str]]r   intr   strr   boolr   z
int | None)Nrj   )
rO   r   rP   z
str | Noner   rg   r   rg   returnzdict[str, float])rO   r   r^   zstr | list[str] | np.ndarrayrk   z
np.ndarray)rk   rh   )__name__
__module____qualname____firstlineno____doc__r   rY   r$   propertyrd   __static_attributes____classcell__)r3   s   @r4   r   r      s    4 #'+m'+m ++m ,	+m
 +m +m +m !+m +m\ ik!(!7A!QT!be!	!F
"
 0

 

 ( (ra   r   )
__future__r   rI   loggingrD   typingr   numpyrA   2sentence_transformers.evaluation.SentenceEvaluatorr   )sentence_transformers.SentenceTransformerr   	getLoggerrl   r   r   rc   ra   r4   <module>r{      sB    " 
  	    PM			8	$v( 1 v(ra   