
    AhYH                    B   S r SSKJr  SSKrSSKJrJr  SSKJr  SSK	J
r
  SSKJrJrJr  SSKJr  SS	KJr  SS
KJr  SSKJr  SSKJr  \R2                  " \5      r " S S\\
5      r " S S\5      r " S S5      r " S S\\5      r  " S S\\5      r! " S S\\5      r"g)z3Interfaces to be implemented by general evaluators.    )annotationsN)ABCabstractmethod)Sequence)Enum)AnyOptionalUnion)warn)AgentAction)BaseLanguageModel)run_in_executor)Chainc                      \ rS rSrSrSr Sr Sr Sr Sr	 Sr
 S	r S
r Sr Sr Sr Sr Sr Sr Sr Sr Sr Sr Sr SrSrg)EvaluatorType   zThe types of the evaluators.qacot_qa
context_qapairwise_stringscore_stringlabeled_pairwise_stringlabeled_score_string
trajectorycriterialabeled_criteriastring_distanceexact_matchregex_matchpairwise_string_distanceembedding_distancepairwise_embedding_distancejson_validityjson_equalityjson_edit_distancejson_schema_validation N)__name__
__module____qualname____firstlineno____doc__QACOT_QA
CONTEXT_QAPAIRWISE_STRINGSCORE_STRINGLABELED_PAIRWISE_STRINGLABELED_SCORE_STRINGAGENT_TRAJECTORYCRITERIALABELED_CRITERIASTRING_DISTANCEEXACT_MATCHREGEX_MATCHPAIRWISE_STRING_DISTANCEEMBEDDING_DISTANCEPAIRWISE_EMBEDDING_DISTANCEJSON_VALIDITYJSON_EQUALITYJSON_EDIT_DISTANCEJSON_SCHEMA_VALIDATION__static_attributes__r'       S/var/www/html/shao/venv/lib/python3.13/site-packages/langchain/evaluation/schema.pyr   r      s    &	BF% JS'O!L7H1@#VH<)7'OPKIKN9=-M"?;#M.#M=-T5IrB   r   c                  6    \ rS rSrSr\\SS j5       5       rSrg)LLMEvalChainK   z,A base class for evaluators that use an LLM.c                    g)z#Create a new evaluator from an LLM.Nr'   )clsllmkwargss      rC   from_llmLLMEvalChain.from_llmN       rB   r'   N)rI   r   rJ   r   returnrE   )	r(   r)   r*   r+   r,   classmethodr   rK   rA   r'   rB   rC   rE   rE   K   s    62  2rB   rE   c                      \ rS rSrSr\S
S j5       r\S
S j5       r\SS j5       r\SS j5       r	  S     SS jjr
S	rg)_EvalArgsMixinT   z(Mixin for checking evaluation arguments.c                    gz2Whether this evaluator requires a reference label.Fr'   selfs    rC   requires_reference!_EvalArgsMixin.requires_referenceW        rB   c                    g)0Whether this evaluator requires an input string.Fr'   rU   s    rC   requires_input_EvalArgsMixin.requires_input\   rY   rB   c                6    SU R                   R                   S3$ )z&Warning to show when input is ignored.zIgnoring input in , as it is not expected.	__class__r(   rU   s    rC   _skip_input_warning"_EvalArgsMixin._skip_input_warninga   s     $DNN$;$;#<<TUUrB   c                6    SU R                   R                   S3$ )z*Warning to show when reference is ignored.zIgnoring reference in r_   r`   rU   s    rC   _skip_reference_warning&_EvalArgsMixin._skip_reference_warningf   s!     %T^^%<%<$==UV	
rB   Nc                   U R                   (       a'  Uc$  U R                  R                   S3n[        U5      eUb%  U R                   (       d  [	        U R
                  SS9  U R                  (       a'  Uc$  U R                  R                   S3n[        U5      eUb'  U R                  (       d  [	        U R                  SS9  ggg)aO  Check if the evaluation arguments are valid.

Args:
    reference (Optional[str], optional): The reference label.
    input_ (Optional[str], optional): The input string.
Raises:
    ValueError: If the evaluator requires an input string but none is provided,
        or if the evaluator requires a reference label but none is provided.
Nz requires an input string.   )
stacklevelz requires a reference string.)r\   ra   r(   
ValueErrorr   rb   rW   re   )rV   	referenceinput_msgs       rC   _check_evaluation_args%_EvalArgsMixin._check_evaluation_argsm   s     6>^^,,--GHCS/!d&9&9))a8""y'8^^,,--JKCS/! )@)@--!< *A rB   r'   rN   boolrN   str)NN)rk   Optional[str]rl   rt   rN   None)r(   r)   r*   r+   r,   propertyrW   r\   rb   re   rn   rA   r'   rB   rC   rQ   rQ   T   s    2    V V 
 
 $( $= = = 
	= =rB   rQ   c                      \ rS rSrSr\SS j5       r\SS j5       r\SSS.         SS jj5       r	SSS.         SS jjr
SSS.         SS	 jjrSSS.         SS
 jjrSrg)StringEvaluator   z_Grade, tag, or otherwise evaluate predictions relative to their inputs
and/or reference labels.c                .    U R                   R                  $ )zThe name of the evaluation.r`   rU   s    rC   evaluation_nameStringEvaluator.evaluation_name   s     ~~&&&rB   c                    grT   r'   rU   s    rC   rW   "StringEvaluator.requires_reference   rY   rB   Nrk   inputc                   g)a  Evaluate Chain or LLM output, based on optional input and label.

Args:
    prediction (str): The LLM or chain prediction to evaluate.
    reference (Optional[str], optional): The reference label to evaluate against.
    input (Optional[str], optional): The input to consider during evaluation.
    kwargs: Additional keyword arguments, including callbacks, tags, etc.
Returns:
    dict: The evaluation results containing the score or value.
        It is recommended that the dictionary contain the following keys:
             - score: the score of the evaluation, if applicable.
             - value: the string value of the evaluation, if applicable.
             - reasoning: the reasoning for the evaluation, if applicable.
Nr'   rV   
predictionrk   r   rJ   s        rC   _evaluate_strings!StringEvaluator._evaluate_strings   rM   rB   c               T   #    [        SU R                  4UUUS.UD6I Sh  vN $  N7f)a  Asynchronously evaluate Chain or LLM output, based on optional input and label.

Args:
    prediction (str): The LLM or chain prediction to evaluate.
    reference (Optional[str], optional): The reference label to evaluate against.
    input (Optional[str], optional): The input to consider during evaluation.
    kwargs: Additional keyword arguments, including callbacks, tags, etc.
Returns:
    dict: The evaluation results containing the score or value.
        It is recommended that the dictionary contain the following keys:
             - score: the score of the evaluation, if applicable.
             - value: the string value of the evaluation, if applicable.
             - reasoning: the reasoning for the evaluation, if applicable.
Nr   rk   r   )r   r   r   s        rC   _aevaluate_strings"StringEvaluator._aevaluate_strings   sD     , %""
 "
 
 
 	
 
s   (&(c               L    U R                  X#S9  U R                  " SUUUS.UD6$ )a  Evaluate Chain or LLM output, based on optional input and label.

Args:
    prediction (str): The LLM or chain prediction to evaluate.
    reference (Optional[str], optional): The reference label to evaluate against.
    input (Optional[str], optional): The input to consider during evaluation.
    kwargs: Additional keyword arguments, including callbacks, tags, etc.
Returns:
    dict: The evaluation results containing the score or value.
rk   rl   r   r'   )rn   r   r   s        rC   evaluate_strings StringEvaluator.evaluate_strings   s@    $ 	##i#F%% 
!
 	
 	
rB   c               h   #    U R                  X#S9  U R                  " SUUUS.UD6I Sh  vN $  N7f)a  Asynchronously evaluate Chain or LLM output, based on optional input and label.

Args:
    prediction (str): The LLM or chain prediction to evaluate.
    reference (Optional[str], optional): The reference label to evaluate against.
    input (Optional[str], optional): The input to consider during evaluation.
    kwargs: Additional keyword arguments, including callbacks, tags, etc.
Returns:
    dict: The evaluation results containing the score or value.
r   r   Nr'   )rn   r   r   s        rC   aevaluate_strings!StringEvaluator.aevaluate_strings   sM     $ 	##i#F,, 
!
 	
 
 	
 
s   )202r'   rr   rp   )
r   zUnion[str, Any]rk   Optional[Union[str, Any]]r   r   rJ   r   rN   dict)
r   rs   rk   rt   r   rt   rJ   r   rN   r   )r(   r)   r*   r+   r,   rv   r{   rW   r   r   r   r   r   rA   r'   rB   rC   rx   rx      s@     ' '   
 04+/ $ -	
 )  
 6 04+/
 $
 -	

 )
 
 

F $(#
 
 !	

 
 
 

< $(#
 
 !	

 
 
 

 
rB   rx   c                      \ rS rSrSr\SSS.           S
S jj5       rSSS.           S
S jjrSSS.           S
S jjrSSS.           S
S jjr	S	r
g)PairwiseStringEvaluatori  zDCompare the output of two models (or two outputs of the same model).Nr   c                   g)  Evaluate the output string pairs.

Args:
    prediction (str): The output string from the first model.
    prediction_b (str): The output string from the second model.
    reference (Optional[str], optional): The expected output / reference string.
    input (Optional[str], optional): The input string.
    kwargs: Additional keyword arguments, such as callbacks and optional reference strings.
Returns:
    dict: A dictionary containing the preference, scores, and/or other information.
Nr'   rV   r   prediction_brk   r   rJ   s         rC   _evaluate_string_pairs.PairwiseStringEvaluator._evaluate_string_pairs  rM   rB   c               V   #    [        SU R                  4UUUUS.UD6I Sh  vN $  N7f)  Asynchronously evaluate the output string pairs.

Args:
    prediction (str): The output string from the first model.
    prediction_b (str): The output string from the second model.
    reference (Optional[str], optional): The expected output / reference string.
    input (Optional[str], optional): The input string.
    kwargs: Additional keyword arguments, such as callbacks and optional reference strings.
Returns:
    dict: A dictionary containing the preference, scores, and/or other information.
Nr   r   rk   r   )r   r   r   s         rC   _aevaluate_string_pairs/PairwiseStringEvaluator._aevaluate_string_pairs  sG     ( %''
 "%
 
 
 	
 
    )')c               N    U R                  X4S9  U R                  " SUUUUS.UD6$ )r   r   r   r'   )rn   r   r   s         rC   evaluate_string_pairs-PairwiseStringEvaluator.evaluate_string_pairs8  sC    ( 	##i#F** 
!%	

 
 	
rB   c               j   #    U R                  X4S9  U R                  " SUUUUS.UD6I Sh  vN $  N7f)r   r   r   Nr'   )rn   r   r   s         rC   aevaluate_string_pairs.PairwiseStringEvaluator.aevaluate_string_pairsU  sP     ( 	##i#F11 
!%	

 
 
 	
 
   *313r'   )r   rs   r   rs   rk   rt   r   rt   rJ   r   rN   r   )r(   r)   r*   r+   r,   r   r   r   r   r   rA   r'   rB   rC   r   r     s=   N $(#  	
 !   
 4 $(#
 
 	

 !
 
 
 

F $(#
 
 	

 !
 
 
 

D $(#
 
 	

 !
 
 
 

 
rB   r   c                      \ rS rSrSr\SS j5       r\SS.           SS jj5       rSS.           SS jjr	SS.           SS jjr
SS.           SS	 jjrS
rg)AgentTrajectoryEvaluatoris  z,Interface for evaluating agent trajectories.c                    g)r[   Tr'   rU   s    rC   r\   'AgentTrajectoryEvaluator.requires_inputv  s     rB   N)rk   c                   g)N  Evaluate a trajectory.

Args:
    prediction (str): The final predicted response.
    agent_trajectory (List[Tuple[AgentAction, str]]):
        The intermediate steps forming the agent trajectory.
    input (str): The input to the agent.
    reference (Optional[str]): The reference answer.

Returns:
    dict: The evaluation result.
Nr'   rV   r   agent_trajectoryr   rk   rJ   s         rC   _evaluate_agent_trajectory3AgentTrajectoryEvaluator._evaluate_agent_trajectory{  rM   rB   c               V   #    [        SU R                  4UUUUS.UD6I Sh  vN $  N7f)]  Asynchronously evaluate a trajectory.

Args:
    prediction (str): The final predicted response.
    agent_trajectory (List[Tuple[AgentAction, str]]):
        The intermediate steps forming the agent trajectory.
    input (str): The input to the agent.
    reference (Optional[str]): The reference answer.

Returns:
    dict: The evaluation result.
N)r   r   rk   r   )r   r   r   s         rC   _aevaluate_agent_trajectory4AgentTrajectoryEvaluator._aevaluate_agent_trajectory  sG     * %++
 "-
 
 
 	
 
r   c               N    U R                  XCS9  U R                  " SUUUUS.UD6$ )r   r   r   r   r   rk   r'   )rn   r   r   s         rC   evaluate_agent_trajectory2AgentTrajectoryEvaluator.evaluate_agent_trajectory  sC    * 	##i#F.. 
!-	

 
 	
rB   c               j   #    U R                  XCS9  U R                  " SUUUUS.UD6I Sh  vN $  N7f)r   r   r   Nr'   )rn   r   r   s         rC   aevaluate_agent_trajectory3AgentTrajectoryEvaluator.aevaluate_agent_trajectory  sP     * 	##i#F55 
!-	

 
 
 	
 
r   r'   rp   )r   rs   r   z!Sequence[tuple[AgentAction, str]]r   rs   rk   rt   rJ   r   rN   r   )r(   r)   r*   r+   r,   rv   r\   r   r   r   r   r   rA   r'   rB   rC   r   r   s  sE   6   $(  <	
  !  
 8 $(
 
 <	

 
 !
 
 

J $(
 
 <	

 
 !
 
 

H $(
 
 <	

 
 !
 
 

 
rB   r   )#r,   
__future__r   loggingabcr   r   collections.abcr   enumr   typingr   r	   r
   warningsr   langchain_core.agentsr   langchain_core.language_modelsr   langchain_core.runnables.configr   langchain.chains.baser   	getLoggerr(   loggerrs   r   rE   rQ   rx   r   r   r'   rB   rC   <module>r      s    9 "  # $  ' '  - < ; '			8	$3JC 3Jl25 20= 0=fw
nc w
to
nc o
dx
~s x
rB   