
    dh6                     X    S SK Jr  S SKJr  S SKJr  S SKJr  S SKJ	r	   " S S\\	5      r
g)	    )List)CallbackManagerForRetrieverRun)Document)BaseRetriever)ArxivAPIWrapperc                   D    \ rS rSr% SrSr\\S'   S\S\	S\
\   4S jrS	rg
)ArxivRetriever
   a  `Arxiv` retriever.

Setup:
    Install ``arxiv``:

    .. code-block:: bash

        pip install -U arxiv

Key init args:
    load_max_docs: int
        maximum number of documents to load
    get_ful_documents: bool
        whether to return full document text or snippets

Instantiate:
    .. code-block:: python

        from langchain_community.retrievers import ArxivRetriever

        retriever = ArxivRetriever(
            load_max_docs=2,
            get_ful_documents=True,
        )

Usage:
    .. code-block:: python

        docs = retriever.invoke("What is the ImageBind model?")
        docs[0].metadata

    .. code-block:: none

        {'Entry ID': 'http://arxiv.org/abs/2305.05665v2',
        'Published': datetime.date(2023, 5, 31),
        'Title': 'ImageBind: One Embedding Space To Bind Them All',
        'Authors': 'Rohit Girdhar, Alaaeldin El-Nouby, Zhuang Liu, Mannat Singh, Kalyan Vasudev Alwala, Armand Joulin, Ishan Misra'}

Use within a chain:
    .. code-block:: python

        from langchain_core.output_parsers import StrOutputParser
        from langchain_core.prompts import ChatPromptTemplate
        from langchain_core.runnables import RunnablePassthrough
        from langchain_openai import ChatOpenAI

        prompt = ChatPromptTemplate.from_template(
            """Answer the question based only on the context provided.

        Context: {context}

        Question: {question}"""
        )

        llm = ChatOpenAI(model="gpt-3.5-turbo-0125")

        def format_docs(docs):
            return "\n\n".join(doc.page_content for doc in docs)

        chain = (
            {"context": retriever | format_docs, "question": RunnablePassthrough()}
            | prompt
            | llm
            | StrOutputParser()
        )

        chain.invoke("What is the ImageBind model?")

    .. code-block:: none

         'The ImageBind model is an approach to learn a joint embedding across six different modalities - images, text, audio, depth, thermal, and IMU data...'
Fget_full_documentsqueryrun_managerreturnc                d    U R                   (       a  U R                  US9$ U R                  U5      $ )N)r   )r   loadget_summaries_as_docs)selfr   r   s      \/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_community/retrievers/arxiv.py_get_relevant_documents&ArxivRetriever._get_relevant_documentsV   s/     ""9959))--e44     N)__name__
__module____qualname____firstlineno____doc__r   bool__annotations__strr   r   r   r   __static_attributes__r   r   r   r	   r	   
   s7    GR  %$55*H5	h5r   r	   N)typingr   langchain_core.callbacksr   langchain_core.documentsr   langchain_core.retrieversr   #langchain_community.utilities.arxivr   r	   r   r   r   <module>r&      s$     C - 3 ?R5]O R5r   