
    Ah*                       S r SSKJr  SSKrSSKrSSKJr  SSKJrJ	r	  SSK
Jr  SSKJr  SSKJr  SS	KJr  SS
KJr  SSKJr  SSKJrJrJr  SSKJrJr  SSKJr  SS jrSS jr  " S S\\\5      r! " S S\\\5      r" " S S\"5      r#g)z-LLM Chains for evaluating question answering.    )annotationsN)Sequence)AnyOptional)	Callbacks)BaseLanguageModel)PromptTemplate)
ConfigDict)override)LLMChain)CONTEXT_PROMPT
COT_PROMPTPROMPT)LLMEvalChainStringEvaluator)RUN_KEYc                "   [         R                  " SU R                  5       [         R                  5      nU(       aH  UR	                  S5      R                  5       S:X  a  gUR	                  S5      R                  5       S:X  a  g U R                  5       R                  5       S   R                  [        R                  SS[        R                  5      5      nUR                  5       S:X  a  gUR                  5       S:X  a  gU R                  5       R                  5       S	   R                  [        R                  SS[        R                  5      5      nUR                  5       S:X  a  gUR                  5       S:X  a  g g ! [         a     g f = f)
Nzgrade:\s*(correct|incorrect)   CORRECT)r   r   	INCORRECT)r   r   r    )researchstrip
IGNORECASEgroupuppersplit	translatestr	maketransstringpunctuation
IndexError)textmatch
first_word	last_words       Z/var/www/html/shao/venv/lib/python3.13/site-packages/langchain/evaluation/qa/eval_chain.py
_get_scorer+      sC   II5tzz|R]]SE;;q>!Y.;;q>![0!JJL #--cmmBFDVDV.WX 	 *,!JJLUWRYs}}RV-?-?@A 	
 ??	)??+! ,   s&   A'F -F A'F *F 
FFc                \    U R                  5       n[        U5      nUc  Su  p4OUu  p4UUUS.$ )znParse the output text.

Args:
    text (str): The output text to parse.

Returns:
    Any: The parsed output.
)NN)	reasoningvaluescore)r   r+   )r&   r-   parsed_scoresr.   r/   s        r*   _parse_string_eval_outputr1   3   sB     

Iy)M!u$     c                  d   \ rS rSr% SrSrS\S'   \" SS9r\	SS j5       r
\SS	 j5       r\SS
 j5       r\SS j5       r\	 S       SS jj5       r   SSS.             SS jjjrSS jr\SSSSS.             SS jj5       r\SSSSS.             SS jj5       rSrg)QAEvalChainI   z,LLM Chain for evaluating question answering.resultsr!   
output_keyignoreextrac                    gNF clss    r*   is_lc_serializableQAEvalChain.is_lc_serializableR       r2   c                    g)Ncorrectnessr=   selfs    r*   evaluation_nameQAEvalChain.evaluation_nameV   s    r2   c                    gNTr=   rE   s    r*   requires_referenceQAEvalChain.requires_referenceZ       r2   c                    grJ   r=   rE   s    r*   requires_inputQAEvalChain.requires_input^   rM   r2   Nc                    U=(       d    [         n1 SknU[        UR                  5      :w  a  SU SUR                   3n[        U5      eU " SXS.UD6$ )a  Load QA Eval Chain from LLM.

Args:
    llm (BaseLanguageModel): the base language model to use.

    prompt (PromptTemplate): A prompt template containing the input_variables:
    'input', 'answer' and 'result' that will be used as the prompt
    for evaluation.
    Defaults to PROMPT.

    **kwargs: additional keyword arguments.

Returns:
    QAEvalChain: the loaded QA eval chain.
>   queryanswerresultInput variables should be 
, but got llmpromptr=   )r   setinput_variables
ValueError)r?   rX   rY   kwargsexpected_input_varsmsgs         r*   from_llmQAEvalChain.from_llmb   sk    , !6;#f&<&<"==,-@,A B!1124  S/!4s4V44r2   	callbacksc                   [        U5       VVs/ sH  u  pxX   X   X'   U   S.PM     n	nnU R                  XS9$ s  snnf )5Evaluate question answering examples and predictions.rR   rS   rT   rb   	enumerateapply)
rF   examplespredictionsquestion_key
answer_keyprediction_keyrc   iexampleinputss
             r*   evaluateQAEvalChain.evaluate   s_    " (1
 2
	 !.!-%.8
 2 	 
 zz&z66
   ;c                h    [        XR                     5      n[        U;   a  U[           U[        '   U$ Nr1   r7   r   rF   rT   parsed_results      r*   _prepare_outputQAEvalChain._prepare_output   /    1&2IJf%+G_M'"r2   F	referenceinputrc   include_run_infoc               <    U " UUUS.UUS9nU R                  U5      $ )an  Evaluate Chain or LLM output, based on optional input and label.

Args:
    prediction (str): the LLM or chain prediction to evaluate.
    reference (Optional[str], optional): the reference label
        to evaluate against.
    input (Optional[str], optional): the input to consider during evaluation
    callbacks (Callbacks, optional): the callbacks to use for tracing.
    include_run_info (bool, optional): whether to include run info in the
        returned results.
    **kwargs: additional keyword arguments, including callbacks, tags, etc.
Returns:
    dict: The evaluation results containing the score or value.
rf   rc   r   rz   rF   
predictionr~   r   rc   r   r]   rT   s           r*   _evaluate_stringsQAEvalChain._evaluate_strings   s7    2 #$
  -
 ##F++r2   c               h   #    U R                  X2US.UUS9I S h  vN nU R                  U5      $  N7f)Nrf   rq   rc   r   acallrz   r   s           r*   _aevaluate_stringsQAEvalChain._aevaluate_strings   sE      zz":N- " 
 

 ##F++
   202r=   returnboolr   r!   rv   )rX   r   rY   Optional[PromptTemplate]r]   r   r   r4   rf   )rj   Sequence[dict]rk   r   rl   r!   rm   r!   rn   r!   rc   r   r   
list[dict]rT   dictr   r   r   r!   r~   Optional[str]r   r   rc   r   r   r   r]   r   r   r   )__name__
__module____qualname____firstlineno____doc__r7   __annotations__r
   model_configclassmethodr@   propertyrG   rK   rO   r`   rr   rz   r   r   r   __static_attributes__r=   r2   r*   r4   r4   I   s   6JL          ,055 )5 	5
 
5 5F $"&7  $7 7 $7 	7
 7 7 7 
7, 
 $(##!&!, !, !	!,
 !, !, !, !, 
!, !,F 
 $(##!&, , !	,
 , , , , 
, ,r2   r4   c                  h   \ rS rSrSr\SS j5       r\SS j5       r\SS j5       r	\
" SS9r\SS j5       r\SS	 j5       r\ S       SS jj5       r   SS
S.             SS jjjrSS jr\S
S
S
SS.             SS jj5       r\S
S
S
SS.             SS jj5       rSrg
)ContextQAEvalChain   z3LLM Chain for evaluating QA w/o GT based on contextc                    gr<   r=   r>   s    r*   r@   %ContextQAEvalChain.is_lc_serializable   rB   r2   c                    g)z.Whether the chain requires a reference string.Tr=   rE   s    r*   rK   %ContextQAEvalChain.requires_reference        r2   c                    g)z+Whether the chain requires an input string.Tr=   rE   s    r*   rO   !ContextQAEvalChain.requires_input   r   r2   r8   r9   c                x    1 SknU[        UR                  5      :w  a  SU SUR                   3n[        U5      eg )N>   rR   rT   contextrU   rV   )rZ   r[   r\   )r?   rY   r^   r_   s       r*   _validate_input_vars'ContextQAEvalChain._validate_input_vars   sQ    <#f&<&<"==,-@,A B!1124  S/! >r2   c                    g)NzContextual Accuracyr=   rE   s    r*   rG   "ContextQAEvalChain.evaluation_name   s    $r2   Nc                V    U=(       d    [         nU R                  U5        U " SXS.UD6$ )a  Load QA Eval Chain from LLM.

Args:
    llm (BaseLanguageModel): the base language model to use.

    prompt (PromptTemplate): A prompt template containing the input_variables:
    'query', 'context' and 'result' that will be used as the prompt
    for evaluation.
    Defaults to PROMPT.

    **kwargs: additional keyword arguments.

Returns:
    ContextQAEvalChain: the loaded QA eval chain.
rW   r=   )r   r   r?   rX   rY   r]   s       r*   r`   ContextQAEvalChain.from_llm   s/    , )>  (4s4V44r2   rb   c                   [        U5       VVs/ sH  u  pxX   X   X'   U   S.PM     n	nnU R                  XS9$ s  snnf )re   rR   r   rT   rb   rg   )
rF   rj   rk   rl   context_keyrn   rc   ro   rp   rq   s
             r*   rr   ContextQAEvalChain.evaluate  s_    " (1
 2
	 !."/%.8
 2 	 
 zz&z66
rt   c                h    [        XR                     5      n[        U;   a  U[           U[        '   U$ rv   rw   rx   s      r*   rz   "ContextQAEvalChain._prepare_output(  r|   r2   Fr}   c               <    U " UUUS.UUS9nU R                  U5      $ )Nr   r   r   r   s           r*   r   $ContextQAEvalChain._evaluate_strings.  s7     $$
  -
 ##F++r2   c               h   #    U R                  X2US.UUS9I S h  vN nU R                  U5      $  N7f)Nr   r   r   r   s           r*   r   %ContextQAEvalChain._aevaluate_stringsD  sE      zz"JO- " 
 

 ##F++
r   r=   r   )rY   r	   r   Noner   rv   )rX   r   rY   r   r]   r   r   r   r   )rj   r   rk   r   rl   r!   r   r!   rn   r!   rc   r   r   r   r   r   )r   r   r   r   r   r   r@   r   rK   rO   r
   r   r   rG   r`   rr   rz   r   r   r   r   r=   r2   r*   r   r      s   =      L " " % %  ,055 )5 	5
 
5 5: $$&7  $77  7 	7
 7 7 7 
7, 
 $(##!&, , !	,
 , , , , 
, ,* 
 $(##!&, , !	,
 , , , , 
, ,r2   r   c                  h    \ rS rSrSr\SS j5       r\S	S j5       r\ S
       SS jj5       r	Sr
g)CotQAEvalChainiW  z=LLM Chain for evaluating QA using chain of thought reasoning.c                    gr<   r=   r>   s    r*   r@   !CotQAEvalChain.is_lc_serializableZ  rB   r2   c                    g)NzCOT Contextual Accuracyr=   rE   s    r*   rG   CotQAEvalChain.evaluation_name^  s    (r2   Nc                V    U=(       d    [         nU R                  U5        U " SXS.UD6$ )zLoad QA Eval Chain from LLM.rW   r=   )r   r   r   s       r*   r`   CotQAEvalChain.from_llmb  s/     %:  (4s4V44r2   r=   r   r   rv   )rX   r   rY   r   r]   r   r   r   )r   r   r   r   r   r   r@   r   rG   r`   r   r=   r2   r*   r   r   W  sj    G  ) )  ,0	5	5 )	5 		5
 
	5 	5r2   r   )r&   r!   r   zOptional[tuple[str, int]])r&   r!   r   r   )$r   
__future__r   r   r#   collections.abcr   typingr   r   langchain_core.callbacksr   langchain_core.language_modelsr   langchain_core.promptsr	   pydanticr
   typing_extensionsr   langchain.chains.llmr   #langchain.evaluation.qa.eval_promptr   r   r   langchain.evaluation.schemar   r   langchain.schemar   r+   r1   r4   r   r   r=   r2   r*   <module>r      sr    3 " 	  $   . < 1  & ) R R E $:,I,(O\ I,X,?L ,D5' 5r2   