
    Ch:                        S SK Jr  S SKrS SKrS SKrS SKJrJr  S SKr	S SK
r
S SKrS SKJrJr  S SKJr  S SKJr  \(       a  S SK
Jr  S SKJr  \R,                  " \5      r " S	 S
\5      rg)    )annotationsN)TYPE_CHECKINGCallable)average_precision_score
ndcg_score)SentenceEvaluator)cos_sim)Tensor)SentenceTransformerc            	         ^  \ rS rSrSrSSS\SSSSS4	                   SU 4S	 jjjr S         SS
 jjrSS jrSS jr	SS jr
  S         SS jjrS rSrU =r$ )RerankingEvaluator   at  
This class evaluates a SentenceTransformer model for the task of re-ranking.

Given a query and a list of documents, it computes the score [query, doc_i] for all possible
documents and sorts them in decreasing order. Then, MRR@10, NDCG@10 and MAP is compute to measure the quality of the ranking.

Args:
    samples (list): A list of dictionaries, where each dictionary represents a sample and has the following keys:

        - 'query': The search query.
        - 'positive': A list of positive (relevant) documents.
        - 'negative': A list of negative (irrelevant) documents.
    at_k (int, optional): Only consider the top k most similar documents to each query for the evaluation. Defaults to 10.
    name (str, optional): Name of the evaluator. Defaults to "".
    write_csv (bool, optional): Write results to CSV file. Defaults to True.
    similarity_fct (Callable[[torch.Tensor, torch.Tensor], torch.Tensor], optional): Similarity function between sentence embeddings. By default, cosine similarity. Defaults to cos_sim.
    batch_size (int, optional): Batch size to compute sentence embeddings. Defaults to 64.
    show_progress_bar (bool, optional): Show progress bar when computing embeddings. Defaults to False.
    use_batched_encoding (bool, optional): Whether or not to encode queries and documents in batches for greater speed, or 1-by-1 to save memory. Defaults to True.
    truncate_dim (Optional[int], optional): The dimension to truncate sentence embeddings to. `None` uses the model's current truncation dimension. Defaults to None.
    mrr_at_k (Optional[int], optional): Deprecated parameter. Please use `at_k` instead. Defaults to None.

Example:
    ::

        from sentence_transformers import SentenceTransformer
        from sentence_transformers.evaluation import RerankingEvaluator
        from datasets import load_dataset

        # Load a model
        model = SentenceTransformer("all-MiniLM-L6-v2")

        # Load a dataset with queries, positives, and negatives
        eval_dataset = load_dataset("microsoft/ms_marco", "v1.1", split="validation")

        samples = [
            {
                "query": sample["query"],
                "positive": [text for is_selected, text in zip(sample["passages"]["is_selected"], sample["passages"]["passage_text"]) if is_selected],
                "negative": [text for is_selected, text in zip(sample["passages"]["is_selected"], sample["passages"]["passage_text"]) if not is_selected],
            }
            for sample in eval_dataset
        ]

        # Initialize the evaluator
        reranking_evaluator = RerankingEvaluator(
            samples=samples,
            name="ms-marco-dev",
        )
        results = reranking_evaluator(model)
        '''
        RerankingEvaluator: Evaluating the model on the ms-marco-dev dataset:
        Queries: 9706      Positives: Min 1.0, Mean 1.1, Max 5.0   Negatives: Min 1.0, Mean 7.1, Max 9.0
        MAP: 56.07
        MRR@10: 56.70
        NDCG@10: 67.08
        '''
        print(reranking_evaluator.primary_metric)
        # => ms-marco-dev_ndcg@10
        print(results[reranking_evaluator.primary_metric])
        # => 0.6708042171399308

    T@   FNc                  > [         TU ]  5         Xl        X0l        U
b   [        R                  SU
 S35        Xl        OX l        XPl        X`l        Xpl	        Xl
        Xl        [        U R                  [        5      (       a(  [        U R                  R                  5       5      U l        U R                   Vs/ sH-  n[!        US   5      S:  d  M  [!        US   5      S:  d  M+  UPM/     snU l        SU(       a  SU-   OS-   S	U R                   S
3-   U l        SSSSU R                   3SU R                   3/U l        X@l        SU R                   3U l        g s  snf )Nz?The `mrr_at_k` parameter has been deprecated; please use `at_k=z
` instead.positiver   negativer   _r   z
_results_@z.csvepochstepsMAPMRR@NDCG@ndcg@)super__init__samplesnameloggerwarningat_ksimilarity_fct
batch_sizeshow_progress_baruse_batched_encodingtruncate_dim
isinstancedictlistvalueslencsv_filecsv_headers	write_csvprimary_metric)selfr   r"   r   r/   r#   r$   r%   r&   r'   mrr_at_ksample	__class__s               k/var/www/html/shao/venv/lib/python3.13/site-packages/sentence_transformers/evaluation/RerankingEvaluator.pyr   RerankingEvaluator.__init__Y   sO    		NN\]e\ffpqr II,$!2$8!(dllD)) 3 3 56DL "&
!-vVJ5G1H11LFQTU[\fUgQhklQlF
 -dd
KPZ[_[d[dZeeiNjj499+DII;
 # %dii[1
s   8EE%Ec                `   US:w  a  US:X  a  SU 3nOSU SU S3nOSnU R                   b  USU R                    S	3-  n[        R                  S
U R                   SU S35        U R	                  U5      nUS   nUS   nUS   n	U R
                   V
s/ sH  n
[        U
S   5      PM     nn
U R
                   V
s/ sH  n
[        U
S   5      PM     nn
[        R                  S[        U R
                  5       S[        R                  " U5      S S[        R                  " U5      S S[        R                  " U5      S S[        R                  " U5      S S[        R                  " U5      S S[        R                  " U5      S 35        [        R                  SUS-  S 35        [        R                  SU R                   SUS-  S 35        [        R                  SU R                   SU	S-  S 35        Ub  U R                  (       a  [        R                  R                  X R                   5      n[        R                  R#                  U5      n[%        USU(       a  SOSS S!9 n[&        R(                  " U5      nU(       d  UR+                  U R,                  5        UR+                  X4XxU	/5        SSS5        SUS"U R                   3US#U R                   3U	0nU R/                  UU R                  5      nU R1                  UUX45        U$ s  sn
f s  sn
f ! , (       d  f       Nk= f)$a  
Evaluates the model on the dataset and returns the evaluation metrics.

Args:
    model (SentenceTransformer): The SentenceTransformer model to evaluate.
    output_path (str, optional): The output path to write the results. Defaults to None.
    epoch (int, optional): The current epoch number. Defaults to -1.
    steps (int, optional): The current step number. Defaults to -1.

Returns:
    Dict[str, float]: A dictionary containing the evaluation metrics.
z after epoch z
 in epoch z after z stepsr   Nz (truncated to )z0RerankingEvaluator: Evaluating the model on the z dataset:mapmrrndcgr   r   z	Queries: z 	 Positives: Min z.1fz, Mean z, Max z 	 Negatives: Min zMAP: d   z.2fr   z: r   awzutf-8)newlinemodeencodingzmrr@r   )r'   r    infor   compute_metricesr   r,   npminmeanmaxr"   r/   ospathjoinr-   isfileopencsvwriterwriterowr.   prefix_name_to_metrics store_metrics_in_model_card_data)r1   modeloutput_pathr   r   out_txtscoresmean_apmean_mrr	mean_ndcgr3   num_positivesnum_negativescsv_pathoutput_file_existsfrP   metricss                     r5   __call__RerankingEvaluator.__call__   s    B;{)%1&ugWUG6BG():):(;1==GFtyykQYZaYbbcde&&u--%=6N	 @D||L|VVJ/0|L?C||L|VVJ/0|LDLL)**=bff]>STW=XX_`b`g`ghu`vwz_{  |B  CE  CI  CI  JW  CX  Y\  B]  ]p  qs  qw  qw  xE  qF  GJ  pK  KR  SU  SZ  SZ  [h  Si  jm  Rn  nt  uw  u{  u{  |I  uJ  KN  tO  P	
 	eGcM#./0d499+R3s';<=eDII;bS(=>? "t~~ww||K?H!#!9h8JPS^efjkA)OOD$4$45w) LM g 7499+DII;

 --gtyyA--eWeK9 ML gfs   L:L-AL
L-c                h    U R                   (       a  U R                  U5      $ U R                  U5      $ )z
Computes the evaluation metrics for the given model.

Args:
    model (SentenceTransformer): The SentenceTransformer model to compute metrics for.

Returns:
    Dict[str, float]: A dictionary containing the evaluation metrics.
)r&   compute_metrices_batchedcompute_metrices_individual)r1   rT   s     r5   rE   #RerankingEvaluator.compute_metrices   s7     (( ))%0	
 11%8	
    c           	     v   / n/ n/ nU R                  UU R                   Vs/ sH  oUS   PM	     snSU R                  S9n/ nU R                   H+  nUR                  US   5        UR                  US   5        M-     U R                  XSU R                  S9nSu  pU R                   GHB  nXi   nU	S-  n	[	        US   5      n[	        US   5      nXX-   U-    nXU-   -  n
US:X  d  US:X  a  MG  U R                  X5      n[	        UR                  5      S:  a  US   n[        R                  " U* 5      nUR                  5       R                  5       nS/U-  S/U-  -   nSn[        USU R                   5       H  u  nnUU   (       d  M  SUS-   -  n  O   UR                  U5        UR                  [        U/U/U R                  S	95        UR                  [        UU5      5        GME     [         R"                  " U5      n[         R"                  " U5      n[         R"                  " U5      nUUUS
.$ s  snf )a  
Computes the evaluation metrics in a batched way, by batching all queries and all documents together.

Args:
    model (SentenceTransformer): The SentenceTransformer model to compute metrics for.

Returns:
    Dict[str, float]: A dictionary containing the evaluation metrics.
queryencode_fn_namer%   r   r   document)r   r      r   kr;   r<   r=   )embed_inputsr   r%   extendr,   r#   shapetorchargsortcputolist	enumerater"   appendr   r   rF   rH   )r1   rT   all_mrr_scoresall_ndcg_scoresall_ap_scoresr3   all_query_embsall_docsall_docs_embs	query_idxdocs_idxinstance	query_embnum_posnum_negdocs_embpred_scorespred_scores_argsortis_relevant	mrr_scorerankindexrX   rY   rZ   s                            r5   rd   +RerankingEvaluator.compute_metrices_batched   sT    **+/<<8<G_<8""44	 + 
 llFOOF:./OOF:./ # ))J$J`J` * 

 #	H&1INI(:./G(:./G$0BW0LMH'))H!|w!|--iBK;$$%))!n"'--"=%//+224K #-1#-7KI()<Q)KLeu%% !TAXI  M !!), "":{mk]dii#XY   !8k!RSA %D ''-(77>*GGO,	xCCo 9s   H6
c           	        / n/ n/ n[         R                   " U R                  U R                  (       + SS9 GHx  nUS   n[        US   5      n[        US   5      n[	        U5      S:X  d  [	        U5      S:X  a  ME  Xx-   n	S/[	        U5      -  S/[	        U5      -  -   n
U R                  X/SSS	9nU R                  XS
SS	9nU R                  X5      n[	        UR                  5      S:  a  US   n[        R                  " U* 5      nUR                  5       R                  5       nSn[        USU R                   5       H  u  nnU
U   (       d  M  SUS-   -  n  O   UR                  U5        UR                  [        U
/U/U R                  S95        UR                  [!        X5      5        GM{     ["        R$                  " U5      n["        R$                  " U5      n["        R$                  " U5      nUUUS.$ )a  
Computes the evaluation metrics individually by embedding every (query, positive, negative) tuple individually.

Args:
    model (SentenceTransformer): The SentenceTransformer model to compute metrics for.

Returns:
    Dict[str, float]: A dictionary containing the evaluation metrics.
Samples)disabledescri   r   r   r   rm   Frj   rl   rn   rp   )tqdmr   r%   r*   r,   rq   r#   rs   rt   ru   rv   rw   rx   r"   ry   r   r   rF   rH   )r1   rT   rz   r{   r|   r   ri   r   r   docsr   r   r   r   r   r   r   r   rX   rY   rZ   s                        r5   re   .RerankingEvaluator.compute_metrices_individual!  s    		$,,D<R<R8RYbcHW%EHZ01HHZ01H8}!S]a%7&D#H-c(m0CCK))%di)jI((Zch(iH--iBK;$$%))!n"'--"=%//+224K I()<Q)KLeu%% !TAXI  M !!), "":{mk]dii#XY   !8!RSC dF ''-(77>*GGO,	xCCrg   c                    Uc  UR                   nO%US:X  a  UR                  nOUS:X  a  UR                  nW" U4U R                  USU R                  S.UD6$ )Nri   rl   T)r$   r%   convert_to_tensorr'   )encodeencode_queryencode_documentr$   r'   )r1   rT   	sentencesrk   r%   kwargs	encode_fns          r5   rq   RerankingEvaluator.embed_inputsX  sn     !Iw&**Iz)--I
/"**
 
 	
rg   c                Z    SU R                   0nU R                  b  U R                  US'   U$ )Nr"   r'   )r"   r'   )r1   config_dicts     r5   get_config_dict"RerankingEvaluator.get_config_dicto  s2    tyy)(*.*;*;K'rg   )r"   r$   r-   r.   r   r0   r   r%   r#   r'   r&   r/   )r   z list[dict[str, str | list[str]]]r"   intr   strr/   boolr#   z4Callable[[torch.Tensor, torch.Tensor], torch.Tensor]r$   r   r%   r   r&   r   r'   
int | Noner2   r   )Nr8   r8   )
rT   r   rU   
str | Noner   r   r   r   returnzdict[str, float])rT   r   )NN)
rT   r   r   zstr | list[str] | np.ndarrayrk   r   r%   zbool | Noner   r
   )__name__
__module____qualname____firstlineno____doc__r	   r   ra   rE   rd   re   rq   r   __static_attributes____classcell__)r4   s   @r5   r   r      s$   =D OV"'%)#'#.21.2 .2 	.2
 .2 M.2 .2  .2 #.2 !.2 .2 .2b ik=(=7A=QT=be=	=~
 GDR5Dv &*)-
"
 0
 #	

 '
 

. rg   r   )
__future__r   rO   loggingrJ   typingr   r   numpyrF   rt   r   sklearn.metricsr   r   2sentence_transformers.evaluation.SentenceEvaluatorr   sentence_transformers.utilr	   r
   )sentence_transformers.SentenceTransformerr   	getLoggerr   r    r    rg   r5   <module>r      sP    " 
  	 *    ? P .M 
		8	$Z* Zrg   