
    dh                     J    S SK Jr  S SKJrJrJr  S SKJrJr   " S S\5      r	g)    )Path)AnyListUnion)UnstructuredFileLoadervalidate_unstructured_versionc                   Z   ^  \ rS rSrSr S
S\\\4   S\S\4U 4S jjjr	S\
4S jrS	rU =r$ )UnstructuredMarkdownLoader
   a3  Load `Markdown` files using `Unstructured`.

You can run the loader in one of two modes: "single" and "elements".
If you use "single" mode, the document will be returned as a single
langchain Document object. If you use "elements" mode, the unstructured
library will split the document into elements such as Title and NarrativeText.
You can pass in additional unstructured kwargs after mode to apply
different unstructured settings.

Setup:
    Install ``langchain-community``.

    .. code-block:: bash

        pip install -U langchain-community

Instantiate:
    .. code-block:: python

        from langchain_community.document_loaders import UnstructuredMarkdownLoader

        loader = UnstructuredMarkdownLoader(
            "./example_data/example.md",
            mode="elements",
            strategy="fast",
        )

Lazy load:
    .. code-block:: python

        docs = []
        docs_lazy = loader.lazy_load()

        # async variant:
        # docs_lazy = await loader.alazy_load()

        for doc in docs_lazy:
            docs.append(doc)
        print(docs[0].page_content[:100])
        print(docs[0].metadata)

    .. code-block:: python

        Sample Markdown Document
        {'source': './example_data/example.md', 'category_depth': 0, 'last_modified': '2024-08-14T15:04:18', 'languages': ['eng'], 'filetype': 'text/markdown', 'file_directory': './example_data', 'filename': 'example.md', 'category': 'Title', 'element_id': '3d0b313864598e704aa26c728ecb61e5'}


Async load:
    .. code-block:: python

        docs = await loader.aload()
        print(docs[0].page_content[:100])
        print(docs[0].metadata)

    .. code-block:: python

        Sample Markdown Document
        {'source': './example_data/example.md', 'category_depth': 0, 'last_modified': '2024-08-14T15:04:18', 'languages': ['eng'], 'filetype': 'text/markdown', 'file_directory': './example_data', 'filename': 'example.md', 'category': 'Title', 'element_id': '3d0b313864598e704aa26c728ecb61e5'}

References
----------
https://unstructured-io.github.io/unstructured/core/partition.html#partition-md
	file_pathmodeunstructured_kwargsc                 V   > [        U5      n[        S5        [        TU ]  " SXS.UD6  g)z

Args:
    file_path: The path to the Markdown file to load.
    mode: The mode to use when loading the file. Can be one of "single",
        "multi", or "all". Default is "single".
    **unstructured_kwargs: Any kwargs to pass to the unstructured.
z0.4.16)r   r   N )strr   super__init__)selfr   r   r   	__class__s       e/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_community/document_loaders/markdown.pyr   #UnstructuredMarkdownLoader.__init__K   s,     	N	%h/O9O;NO    returnc                 J    SSK Jn  U" SSU R                  0U R                  D6$ )Nr   )partition_mdfilenamer   )unstructured.partition.mdr   r   r   )r   r   s     r   _get_elements(UnstructuredMarkdownLoader._get_elements]   s"    :PT^^Pt7O7OPPr   r   )single)__name__
__module____qualname____firstlineno____doc__r   r   r   r   r   r   r   __static_attributes____classcell__)r   s   @r   r
   r
   
   sV    >F Pd#P P  #	P P$Qt Q Qr   r
   N)
pathlibr   typingr   r   r   1langchain_community.document_loaders.unstructuredr   r   r
   r   r   r   <module>r+      s$     # #VQ!7 VQr   