
    $h)                        S SK Jr  S SKJrJrJrJrJrJr  S SK	J
r
  S SKJr  S SKJrJr  \(       a  S SKJr  S SK	Jr   " S S	\5      r " S
 S5      rg)    )annotations)TYPE_CHECKINGAnyCallableOptional	TypedDictUnion)DynamicRunEvaluator)	traceable)ExampleRunStringEvaluator)RunEvaluatorc                  <    \ rS rSr% SrS\S'    S\S'    S\S'   S	rg
)SingleEvaluatorInput   z!The input to a `StringEvaluator`.str
predictionzOptional[Any]	referencezOptional[str]input N)__name__
__module____qualname____firstlineno____doc____annotations____static_attributes__r       d/var/www/html/shao/venv/lib/python3.13/site-packages/langsmith/evaluation/integrations/_langchain.pyr   r      s    +O r    r   c                  D    \ rS rSrSrSSS.     SS jjr  S	S jrSrg)
LangChainStringEvaluator   a"  A class for wrapping a LangChain StringEvaluator.

Requires the `langchain` package to be installed.

Attributes:
    evaluator (StringEvaluator): The underlying StringEvaluator OR the name
        of the evaluator to load.

Methods:
    as_run_evaluator() -> RunEvaluator:
        Convert the LangChainStringEvaluator to a RunEvaluator.

Examples:
    Creating a simple LangChainStringEvaluator:

    >>> evaluator = LangChainStringEvaluator("exact_match")

    Converting a LangChainStringEvaluator to a RunEvaluator:

    >>> from langsmith.evaluation import LangChainStringEvaluator
    >>> from langchain_openai import ChatOpenAI
    >>> evaluator = LangChainStringEvaluator(
    ...     "criteria",
    ...     config={
    ...         "criteria": {
    ...             "usefulness": "The prediction is useful if"
    ...             " it is correct and/or asks a useful followup question."
    ...         },
    ...         "llm": ChatOpenAI(model="gpt-4o"),
    ...     },
    ... )
    >>> run_evaluator = evaluator.as_run_evaluator()
    >>> run_evaluator  # doctest: +ELLIPSIS
    <DynamicRunEvaluator ...>

    Customizing the LLM model used by the evaluator:

    >>> from langsmith.evaluation import LangChainStringEvaluator
    >>> from langchain_anthropic import ChatAnthropic
    >>> evaluator = LangChainStringEvaluator(
    ...     "criteria",
    ...     config={
    ...         "criteria": {
    ...             "usefulness": "The prediction is useful if"
    ...             " it is correct and/or asks a useful followup question."
    ...         },
    ...         "llm": ChatAnthropic(model="claude-3-opus-20240229"),
    ...     },
    ... )
    >>> run_evaluator = evaluator.as_run_evaluator()
    >>> run_evaluator  # doctest: +ELLIPSIS
    <DynamicRunEvaluator ...>

    Using the `evaluate` API with different evaluators:
    >>> def prepare_data(run: Run, example: Example):
    ...     # Convert the evaluation data into the format expected by the evaluator
    ...     # Only required for datasets with multiple inputs/output keys
    ...     return {
    ...         "prediction": run.outputs["prediction"],
    ...         "reference": example.outputs["answer"],
    ...         "input": str(example.inputs),
    ...     }
    >>> import re
    >>> from langchain_anthropic import ChatAnthropic
    >>> import langsmith
    >>> from langsmith.evaluation import LangChainStringEvaluator, evaluate
    >>> criteria_evaluator = LangChainStringEvaluator(
    ...     "criteria",
    ...     config={
    ...         "criteria": {
    ...             "usefulness": "The prediction is useful if it is correct"
    ...             " and/or asks a useful followup question."
    ...         },
    ...         "llm": ChatAnthropic(model="claude-3-opus-20240229"),
    ...     },
    ...     prepare_data=prepare_data,
    ... )
    >>> embedding_evaluator = LangChainStringEvaluator("embedding_distance")
    >>> exact_match_evaluator = LangChainStringEvaluator("exact_match")
    >>> regex_match_evaluator = LangChainStringEvaluator(
    ...     "regex_match", config={"flags": re.IGNORECASE}, prepare_data=prepare_data
    ... )
    >>> scoring_evaluator = LangChainStringEvaluator(
    ...     "labeled_score_string",
    ...     config={
    ...         "criteria": {
    ...             "accuracy": "Score 1: Completely inaccurate\nScore 5: Somewhat accurate\nScore 10: Completely accurate"
    ...         },
    ...         "normalize_by": 10,
    ...         "llm": ChatAnthropic(model="claude-3-opus-20240229"),
    ...     },
    ...     prepare_data=prepare_data,
    ... )
    >>> string_distance_evaluator = LangChainStringEvaluator(
    ...     "string_distance",
    ...     config={"distance_metric": "levenshtein"},
    ...     prepare_data=prepare_data,
    ... )
    >>> from langsmith import Client
    >>> client = Client()
    >>> results = evaluate(
    ...     lambda inputs: {"prediction": "foo"},
    ...     data=client.list_examples(dataset_name="Evaluate Examples", limit=1),
    ...     evaluators=[
    ...         embedding_evaluator,
    ...         criteria_evaluator,
    ...         exact_match_evaluator,
    ...         regex_match_evaluator,
    ...         scoring_evaluator,
    ...         string_distance_evaluator,
    ...     ],
    ... )  # doctest: +ELLIPSIS
    View the evaluation results for experiment:...
N)configprepare_datac                   SSK Jn  [        X5      (       a  Xl        OJ[        U[        5      (       a  SSKJn  U" U40 U=(       d    0 D6U l        O[        S[        U5       35      eX0l	        g)a  Initialize a LangChainStringEvaluator.

See: https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.schema.StringEvaluator.html#langchain-evaluation-schema-stringevaluator

Args:
    evaluator (StringEvaluator): The underlying StringEvaluator.
r   r   )load_evaluatorzUnsupported evaluator type: N)
langchain.evaluation.schemar   
isinstance	evaluatorr   langchain.evaluationr(   NotImplementedErrortype_prepare_data)selfr+   r%   r&   r   r(   s         r!   __init__!LangChainStringEvaluator.__init__   s[      	@i11&N	3'';+IH&,BHDN%(DT)_DU&VWW)r    c                  ^ ^^ T R                   R                  (       a  SOSnT R                   R                  (       a  SOSnSU U S3m[         S
     SUU 4S jjj5       m[        T R                   R                  S9S
SUU 4S jjj5       n[        T R                   R                  S9S
SUU 4S	 jjj5       n[        X45      $ )zConvert the LangChainStringEvaluator to a RunEvaluator.

This is the object used in the LangSmith `evaluate` API.

Returns:
    RunEvaluator: The converted RunEvaluator.
z)
       "input": example.inputs['input'], z0
       "reference": example.outputs['expected']z]
def prepare_data(run, example):
    return {
        "prediction": run.outputs['my_output'],zL
    }
evaluator = LangChainStringEvaluator(..., prepare_data=prepare_data)
c           
       > U R                   (       a4  [        U R                   5      S:  a  [        STR                   ST 35      eTR                  R                  (       aL  U(       aE  UR                   (       a4  [        UR                   5      S:  a  [        STR                   ST 35      eTR                  R
                  (       aL  U(       aE  UR                  (       a4  [        UR                  5      S:  a  [        STR                   ST 35      e[        [        [        U R                   R                  5       5      5      TR                  R                  (       aD  U(       a=  UR                   (       a,  [        [        UR                   R                  5       5      5      OS TR                  R
                  (       aF  U(       a?  UR                  (       a.  [        [        UR                  R                  5       5      5      S9$ S S9$ )N   z
Evaluator z{ only supports a single prediction key. Please ensure that the run has a single output. Or initialize with a prepare_data:
z nly supports a single reference key. Please ensure that the example has a single output. Or create a custom evaluator yourself:
zy only supports a single input key. Please ensure that the example has a single input. Or initialize with a prepare_data:
)r   r   r   )outputslen
ValueErrorr+   requires_referencerequires_inputinputsr   nextitervalues)runexamplecustomization_error_strr0   s     r!   prepare_evaluator_inputsKLangChainStringEvaluator.as_run_evaluator.<locals>.prepare_evaluator_inputs   s    {{s3;;/!3   0 1< //1  11OO(1,   0 1@ //1  --NN'!+   0 1< //1  (S[[%7%7%9 :; 99##OO	 goo44678  55'gnn gnn33567   r    )namec                   > TR                   c  T" X5      OTR                  X5      nTR                  R                  " S0 UD6nSTR                  R                  0UE$ Nkeyr   )r/   r+   evaluate_stringsevaluation_namer@   rA   eval_inputsresultsrC   r0   s       r!   evaluate;LangChainStringEvaluator.as_run_evaluator.<locals>.evaluate   sa     %%- )6''5 
 nn55DDG4>>99EWEEr    c                   >#    TR                   c  T" X5      OTR                  X5      nTR                  R                  " S0 UD6I S h  vN nSTR                  R                  0UE$  N7frG   )r/   r+   aevaluate_stringsrJ   rK   s       r!   	aevaluate<LangChainStringEvaluator.as_run_evaluator.<locals>.aevaluate  sm      %%- )6''5 
 !NN<<K{KKG4>>99EWEE Ls   AA*	A(
A*)N)r@   r   rA   Optional[Example]returnr   )r@   r   rA   rT   rU   dict)r+   r;   r:   r   rJ   r
   )r0   	input_strreference_strrN   rR   rB   rC   s   `    @@r!   as_run_evaluator)LangChainStringEvaluator.as_run_evaluator   s     ~~,, ; 	 ~~00 B 	
'0 1>yk J# 
373	3	03	!3	 3	 
3	j 
66	7	F 	F 
8	F 
66	7	F 	F 
8	F #877r    )r/   r+   )r+   zUnion[StringEvaluator, str]r%   zOptional[dict]r&   zBOptional[Callable[[Run, Optional[Example]], SingleEvaluatorInput]])rU   r   )r   r   r   r   r   r1   rY   r   r   r    r!   r#   r#      sC    qn "& *.* 	*

*:f8	f8r    r#   N)
__future__r   typingr   r   r   r   r   r	   langsmith.evaluation.evaluatorr
   langsmith.run_helpersr   langsmith.schemasr   r   r)   r   r   r   r#   r   r    r!   <module>r`      s8    " K K > + *;;9 w8 w8r    