
    dh                     V    S SK JrJrJrJr  S SKJr  S SKJr  S SK	J
r
   " S S\5      rg)    )AnyIteratorListOptional)Document)
BaseLoader)ArxivAPIWrapperc                   b    \ rS rSrSr SS\S\\   S\4S jjr	S\
\   4S	 jrS\\   4S
 jrSrg)ArxivLoader	   a  Load a query result from `Arxiv`.
The loader converts the original PDF format into the text.

Setup:
    Install ``arxiv`` and ``PyMuPDF`` packages.
    ``PyMuPDF`` transforms PDF files downloaded from the arxiv.org site
    into the text format.

    .. code-block:: bash

        pip install -U arxiv pymupdf


Instantiate:
    .. code-block:: python

        from langchain_community.document_loaders import ArxivLoader

        loader = ArxivLoader(
            query="reasoning",
            # load_max_docs=2,
            # load_all_available_meta=False
        )

Load:
    .. code-block:: python

        docs = loader.load()
        print(docs[0].page_content[:100])
        print(docs[0].metadata)

    .. code-block:: python
        Understanding the Reasoning Ability of Language Models
        From the Perspective of Reasoning Paths Aggre
        {
            'Published': '2024-02-29',
            'Title': 'Understanding the Reasoning Ability of Language Models From the
                    Perspective of Reasoning Paths Aggregation',
            'Authors': 'Xinyi Wang, Alfonso Amayuelas, Kexun Zhang, Liangming Pan,
                    Wenhu Chen, William Yang Wang',
            'Summary': 'Pre-trained language models (LMs) are able to perform complex reasoning
                    without explicit fine-tuning...'
        }


Lazy load:
    .. code-block:: python

        docs = []
        docs_lazy = loader.lazy_load()

        # async variant:
        # docs_lazy = await loader.alazy_load()

        for doc in docs_lazy:
            docs.append(doc)
        print(docs[0].page_content[:100])
        print(docs[0].metadata)

    .. code-block:: python

        Understanding the Reasoning Ability of Language Models
        From the Perspective of Reasoning Paths Aggre
        {
            'Published': '2024-02-29',
            'Title': 'Understanding the Reasoning Ability of Language Models From the
                    Perspective of Reasoning Paths Aggregation',
            'Authors': 'Xinyi Wang, Alfonso Amayuelas, Kexun Zhang, Liangming Pan,
                    Wenhu Chen, William Yang Wang',
            'Summary': 'Pre-trained language models (LMs) are able to perform complex reasoning
                    without explicit fine-tuning...'
        }

Async load:
    .. code-block:: python

        docs = await loader.aload()
        print(docs[0].page_content[:100])
        print(docs[0].metadata)

    .. code-block:: python

        Understanding the Reasoning Ability of Language Models
        From the Perspective of Reasoning Paths Aggre
        {
            'Published': '2024-02-29',
            'Title': 'Understanding the Reasoning Ability of Language Models From the
                    Perspective of Reasoning Paths Aggregation',
            'Authors': 'Xinyi Wang, Alfonso Amayuelas, Kexun Zhang, Liangming Pan,
                    Wenhu Chen, William Yang Wang',
            'Summary': 'Pre-trained language models (LMs) are able to perform complex reasoning
                    without explicit fine-tuning...'
        }

Use summaries of articles as docs:
    .. code-block:: python

        from langchain_community.document_loaders import ArxivLoader

        loader = ArxivLoader(
            query="reasoning"
        )

        docs = loader.get_summaries_as_docs()
        print(docs[0].page_content[:100])
        print(docs[0].metadata)

    .. code-block:: python

        Pre-trained language models (LMs) are able to perform complex reasoning
        without explicit fine-tuning
        {
            'Entry ID': 'http://arxiv.org/abs/2402.03268v2',
            'Published': datetime.date(2024, 2, 29),
            'Title': 'Understanding the Reasoning Ability of Language Models From the
                    Perspective of Reasoning Paths Aggregation',
            'Authors': 'Xinyi Wang, Alfonso Amayuelas, Kexun Zhang, Liangming Pan,
                    Wenhu Chen, William Yang Wang'
        }
Nquerydoc_content_chars_maxkwargsc                 4    Xl         [        SSU0UD6U l        g)zInitialize with search query to find documents in the Arxiv.
Supports all arguments of `ArxivAPIWrapper`.

Args:
    query: free text which used to find documents in the Arxiv
    doc_content_chars_max: cut limit for the length of a document's content
r   N )r   r	   client)selfr   r   r   s       b/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_community/document_loaders/arxiv.py__init__ArxivLoader.__init__   s%     
% 
"7
;A
    returnc              #   j   #    U R                   R                  U R                  5       Sh  vN   g N7f)zLazy load Arvix documentsN)r   	lazy_loadr   r   s    r   r   ArxivLoader.lazy_load   s!     ;;((444s   )313c                 L    U R                   R                  U R                  5      $ )zBUses papers summaries as documents rather than source Arvix papers)r   get_summaries_as_docsr   r   s    r   r   !ArxivLoader.get_summaries_as_docs   s    {{00<<r   )r   r   )N)__name__
__module____qualname____firstlineno____doc__strr   intr   r   r   r   r   r   r   __static_attributes__r   r   r   r   r   	   sR    wt BF

19#
QT
 58H- 5=tH~ =r   r   N)typingr   r   r   r   langchain_core.documentsr   )langchain_community.document_loaders.baser   #langchain_community.utilities.arxivr	   r   r   r   r   <module>r,      s"    0 0 - @ ?P=* P=r   