
    Ch1                        S SK Jr  S SKrS SKrS SKrS SKJr  S SKJr  S SK	J
r
  S SKJr  \(       a  S SKJr  \R                  " \5      r " S S	\
5      rg)
    )annotationsN)defaultdict)TYPE_CHECKING)SentenceEvaluator)paraphrase_mining)SentenceTransformerc                     ^  \ rS rSrSr            S                         S	U 4S jjjr S
         SS jjr\S 5       rS r	Sr
U =r$ )ParaphraseMiningEvaluator   a  
Given a large set of sentences, this evaluator performs paraphrase (duplicate) mining and
identifies the pairs with the highest similarity. It compare the extracted paraphrase pairs
with a set of gold labels and computes the F1 score.

Args:
    sentences_map (Dict[str, str]): A dictionary that maps sentence-ids to sentences.
        For example, sentences_map[id] => sentence.
    duplicates_list (List[Tuple[str, str]], optional): A list with id pairs [(id1, id2), (id1, id5)]
        that identifies the duplicates / paraphrases in the sentences_map. Defaults to None.
    duplicates_dict (Dict[str, Dict[str, bool]], optional): A default dictionary mapping [id1][id2]
        to true if id1 and id2 are duplicates. Must be symmetric, i.e., if [id1][id2] => True,
        then [id2][id1] => True. Defaults to None.
    add_transitive_closure (bool, optional): If true, it adds a transitive closure,
        i.e. if dup[a][b] and dup[b][c], then dup[a][c]. Defaults to False.
    query_chunk_size (int, optional): To identify the paraphrases, the cosine-similarity between
        all sentence-pairs will be computed. As this might require a lot of memory, we perform
        a batched computation. query_chunk_size sentences will be compared against up to
        corpus_chunk_size sentences. In the default setting, 5000 sentences will be grouped
        together and compared up-to against 100k other sentences. Defaults to 5000.
    corpus_chunk_size (int, optional): The corpus will be batched, to reduce the memory requirement.
        Defaults to 100000.
    max_pairs (int, optional): We will only extract up to max_pairs potential paraphrase candidates.
        Defaults to 500000.
    top_k (int, optional): For each query, we extract the top_k most similar pairs and add it to a sorted list.
        I.e., for one sentence we cannot find more than top_k paraphrases. Defaults to 100.
    show_progress_bar (bool, optional): Output a progress bar. Defaults to False.
    batch_size (int, optional): Batch size for computing sentence embeddings. Defaults to 16.
    name (str, optional): Name of the experiment. Defaults to "".
    write_csv (bool, optional): Write results to CSV file. Defaults to True.
    truncate_dim (Optional[int], optional): The dimension to truncate sentence embeddings to.
        `None` uses the model's current truncation dimension. Defaults to None.

Example:
    ::

        from datasets import load_dataset
        from sentence_transformers.SentenceTransformer import SentenceTransformer
        from sentence_transformers.evaluation import ParaphraseMiningEvaluator

        # Load a model
        model = SentenceTransformer('all-mpnet-base-v2')

        # Load the Quora Duplicates Mining dataset
        questions_dataset = load_dataset("sentence-transformers/quora-duplicates-mining", "questions", split="dev")
        duplicates_dataset = load_dataset("sentence-transformers/quora-duplicates-mining", "duplicates", split="dev")

        # Create a mapping from qid to question & a list of duplicates (qid1, qid2)
        qid_to_questions = dict(zip(questions_dataset["qid"], questions_dataset["question"]))
        duplicates = list(zip(duplicates_dataset["qid1"], duplicates_dataset["qid2"]))

        # Initialize the paraphrase mining evaluator
        paraphrase_mining_evaluator = ParaphraseMiningEvaluator(
            sentences_map=qid_to_questions,
            duplicates_list=duplicates,
            name="quora-duplicates-dev",
        )
        results = paraphrase_mining_evaluator(model)
        '''
        Paraphrase Mining Evaluation of the model on the quora-duplicates-dev dataset:
        Number of candidate pairs: 250564
        Average Precision: 56.51
        Optimal threshold: 0.8325
        Precision: 52.76
        Recall: 59.19
        F1: 55.79
        '''
        print(paraphrase_mining_evaluator.primary_metric)
        # => "quora-duplicates-dev_average_precision"
        print(results[paraphrase_mining_evaluator.primary_metric])
        # => 0.5650940787776353
c           
       > [         TU ]  5         / U l        / U l        UR	                  5        H;  u  pU R                  R                  U5        U R                  R                  U5        M=     Xl        Xl        Xl        XPl	        X`l
        Xpl        Xl        Xl        Ub  UO[        S 5      U l        Ub@  U H:  u  nnUU;   d  M  UU;   d  M  SU R                  U   U'   SU R                  U   U'   M<     U(       a   U R!                  U R                  5      U l        [#        5       nU R                   H~  nU R                  U    Hh  nUU;   d  M  UU;   d  M  U R                  U   U   (       d  U R                  U   U   (       d  MC  UR%                  ['        [)        UU/5      5      5        Mj     M     [+        U5      U l        U(       a  SU-   nSU-   S-   U l        / SQU l        Xl        SU l        g )Nc                      [        [        5      $ )N)r   bool     r/var/www/html/shao/venv/lib/python3.13/site-packages/sentence_transformers/evaluation/ParaphraseMiningEvaluator.py<lambda>4ParaphraseMiningEvaluator.__init__.<locals>.<lambda>}   s    bmnrbsr   T_paraphrase_mining_evaluationz_results.csv)epochsteps	precisionrecallf1	thresholdaverage_precisionr   )super__init__	sentencesidsitemsappendnameshow_progress_bar
batch_sizequery_chunk_sizecorpus_chunk_size	max_pairstop_ktruncate_dimr   
duplicatesadd_transitive_closuresetaddtuplesortedlentotal_num_duplicatescsv_filecsv_headers	write_csvprimary_metric)selfsentences_mapduplicates_listduplicates_dictr,   r&   r'   r(   r)   r$   r%   r#   r5   r*   idsentenceid1id2positive_key_pairskey1key2	__class__s                        r   r   "ParaphraseMiningEvaluator.__init__\   s     	)//1LBNN!!(+HHOOB 2 	!2$ 0!2"
(-<-H/kZsNt&+S-'C=,@04DOOC(-04DOOC(- , ""99$//JDO UOOD-M)-.t48Md8S8S&**5t1E+FG . $ %((:$;!:D;dB^Sl"1r   c                   US:w  a  US:X  a  SU 3nOSU SU S3nOSnU R                   b  USU R                    S3-  n[        R                  S	U R                   S
U S35        [	        UU R
                  U R                  U R                  U R                  U R                  U R                  U R                  U R                   S9	n[        R                  S[        [        U5      5      -   5        S=pxSn	S=n
=pSn[        [        U5      5       H  nXn   u  nnnU R                  U   nU R                  U   nUS-  nU R                   U   U   (       d  U R                   U   U   (       d  M^  US-  nX-  nXR"                  -  nSU-  U-  UU-   -  nUU-  nUU
:  d  M  Un
UnUnXn   S   U[%        US-   [        U5      S-
  5         S   -   S-  n	M     XR"                  -  n[        R                  SUS-  S 35        [        R                  SU	S 35        [        R                  SUS-  S 35        [        R                  SUS-  S 35        [        R                  SU
S-  S S35        Ub  U R&                  (       a  [(        R*                  R-                  X R.                  5      n[(        R*                  R1                  U5      (       d\  [3        USSSS9 n[4        R6                  " U5      nUR9                  U R:                  5        UR9                  X4XXU/5        S S S 5        O@[3        USSSS9 n[4        R6                  " U5      nUR9                  X4XXU/5        S S S 5        UU
UUU	S.nU R=                  UU R                  5      nU R?                  UUX45        U$ ! , (       d  f       NG= f! , (       d  f       NX= f)Nz after epoch z
 in epoch z after z steps z (truncated to )z1Paraphrase Mining Evaluation of the model on the z dataset:)r$   r%   r&   r'   r(   r)   r*   zNumber of candidate pairs: r         zAverage Precision: d   z.2fzOptimal threshold: z.4fzPrecision: zRecall: zF1: 
wzutf-8)newlinemodeencodinga)r   r   r   r   r   ) r*   loggerinfor#   r   r   r$   r%   r&   r'   r(   r)   strr1   ranger    r+   r2   minr5   ospathjoinr3   isfileopencsvwriterwriterowr4   prefix_name_to_metrics store_metrics_in_model_card_data)r7   modeloutput_pathr   r   out_txt
pairs_list	n_extract	n_correctr   best_f1best_recallbest_precisionr   idxscoreijr=   r>   r   r   r   csv_pathfr]   metricss                              r   __call__"ParaphraseMiningEvaluator.__call__   s    B;{)%1&ugWUG6BG():):(;1==GG		{RZ[bZccdef 'NN"44!22"44nn****


 	1CJ4HHI !"!		1222+Z)C$/KE1a((1+C((1+C NIs#C(DOOC,@,E,EQ	%1	"%>%>>]V+y6/AB!Y.!< G%.N"(K!+!3jS1WcR\o`aNaAb6cde6f!fjk kI# *& .0I0II)*;c*A#)FGH))C9:k.3"6s!;<=h{S0567d7S=-R01"t~~ww||K?H77>>(++(BS7Kq ZZ]FOOD$4$45OOU>PWdu$vw LK
 (BS7Kq ZZ]FOOU>PWdu$vw L
 "3'!"
 --gtyyA--eWeK% LK
 LKs   AN$7,N5$
N25
Oc                |   [        5       n[        U R                  5       5       GH  nX!;  d  M  [        5       nUR                  U5        [        X   5      n[	        U5      S:  aK  UR                  S5      nXS;  a$  UR                  U5        UR                  X   5        [	        U5      S:  a  MK  [        U5      n[        [	        U5      S-
  5       H`  n[        US-   [	        U5      5       HA  nSXU      X7   '   SXU      X6   '   UR                  X6   5        UR                  X7   5        MC     Mb     GM     U $ )Nr   rI   T)r-   listkeysr.   r1   popextendrU   )graphnodes_visitedrQ   connected_subgraph_nodesneighbor_nodes_queuenoderl   rm   s           r   r,   0ParaphraseMiningEvaluator.add_transitive_closure   s6   ejjl#A%+.5((,,Q/ (,EH~$./!3/33A6D;044T:,33EK@	 ./!3 ,00H+I(s#;<q@AA"1q5#.F*GHZ^q9:;S;VWZ^q9:;S;VW%))*B*EF%))*B*EF I B $, r   c                    U R                   U R                  U R                  S.nU R                  b  U R                  US'   U$ )N)r,   r(   r)   r*   )r,   r(   r)   r*   )r7   config_dicts     r   get_config_dict)ParaphraseMiningEvaluator.get_config_dict  sD    &*&A&AZZ

 (*.*;*;K'r   )r%   r'   r3   r4   r+   r    r(   r#   r6   r&   r   r$   r)   r2   r*   r5   )NNFi  i i  rK   F   rF   TN)r8   zdict[str, str]r9   zlist[tuple[str, str]] | Noner:   z!dict[str, dict[str, bool]] | Noner,   r   r&   intr'   r   r(   r   r)   r   r$   r   r%   r   r#   rT   r5   r   r*   z
int | None)NrE   rE   )
ra   r   rb   z
str | Noner   r   r   r   returnzdict[str, float])__name__
__module____qualname____firstlineno____doc__r   rq   staticmethodr,   r   __static_attributes____classcell__)rB   s   @r   r
   r
      s   GX 9==A', $!'"'#'>2%>2 6>2 ;	>2
 !%>2 >2 >2 >2 >2  >2 >2 >2 >2 !>2 >2B ikU(U7AUQTUbeU	Un  4 r   r
   )
__future__r   r\   loggingrW   collectionsr   typingr   2sentence_transformers.evaluation.SentenceEvaluatorr   sentence_transformers.utilr   )sentence_transformers.SentenceTransformerr   	getLoggerr   rR   r
   r   r   r   <module>r      sB    " 
  	 #   P 8M			8	$D 1 Dr   