
    dh                     N    S SK r S SKJrJrJrJrJr  S SKJrJ	r	   " S S\5      r
g)    N)AnyListOptionalSequenceUnion)BaseDocumentTransformerDocumentc                       \ rS rSrSr    SS\\\\\   4      S\\\\\   4      S\	S\S\
S	S4S
 jjrS\\   S\
S	\\   4S jrSrg)MarkdownifyTransformer   a1  Converts HTML documents to Markdown format with customizable options for handling
links, images, other tags and heading styles using the markdownify library.

Arguments:
    strip: A list of tags to strip. This option can't be used with the convert option.
    convert: A list of tags to convert. This option can't be used with the strip option.
    autolinks: A boolean indicating whether the "automatic link" style should be used when a a tag's contents match its href. Defaults to True.
    heading_style: Defines how headings should be converted. Accepted values are ATX, ATX_CLOSED, SETEXT, and UNDERLINED (which is an alias for SETEXT). Defaults to ATX.
    kwargs: Additional options to pass to markdownify.

Example:
    .. code-block:: python
        from langchain_community.document_transformers import MarkdownifyTransformer
        markdownify = MarkdownifyTransformer()
        docs_transform = markdownify.transform_documents(docs)

More configuration options can be found at the markdownify GitHub page:
https://github.com/matthewwithanm/python-markdownify
Nstripconvert	autolinksheading_stylekwargsreturnc                     [        U[        5      (       a  U/OUU l        [        U[        5      (       a  U/OUU l        X0l        X@l        XPl        g )N)
isinstancestrr   r   r   r   additional_options)selfr   r   r   r   r   s         m/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_community/document_transformers/markdownify.py__init__MarkdownifyTransformer.__init__   sC     !+5# 6 6eWE
$.w$<$<y'"*"(    	documentsc           
          SSK J n  / nU H  nU" S
UR                  U R                  U R                  U R
                  U R                  S.U R                  D6R                  SS5      R                  5       n[        R                  " SSU5      nUR                  [        XuR                  S	95        M     U$ ! [         a    [        S5      ef = f)Nr   )markdownifyz`markdownify package not found, please 
                install it with `pip install markdownify`)htmlr   r   r   r        z\n\s*\nz

)metadata )r   ImportErrorpage_contentr   r   r   r   r   replaceresubappendr	   r"   )r   r   r   r   converted_documentsdocmarkdown_contentcleaned_markdowns           r   transform_documents*MarkdownifyTransformer.transform_documents*   s    
	/ !C ))** LL"nn"&"4"4 -- %   "vvj&:JK&&)LLA! ( #"7  	= 	s   B7 7C)r   r   r   r   r   )NNTATX)__name__
__module____qualname____firstlineno____doc__r   r   r   r   boolr   r   r   r	   r.   __static_attributes__r#   r   r   r   r      s    , 2637")c49n-.) %T#Y/0) 	)
 ) ) 
)"#H%"# "# 
(		"#r   r   )r'   typingr   r   r   r   r   langchain_core.documentsr   r	   r   r#   r   r   <module>r:      s     	 7 7 FE#4 E#r   