
    dh=                     >    S r SSKJrJrJr  SSKJr   " S S\5      rg)z+Load Documents from Docusarus Documentation    )AnyListOptional)SitemapLoaderc                   `   ^  \ rS rSrSr SS\S\\\      S\4U 4S jjjr	S\S\4S	 jr
S
rU =r$ )DocusaurusLoader   a  Load from Docusaurus Documentation.

It leverages the SitemapLoader to loop through the generated pages of a
Docusaurus Documentation website and extracts the content by looking for specific
HTML tags. By default, the parser searches for the main content of the Docusaurus
page, which is normally the <article>. You can also define your own
custom HTML tags by providing them as a list, for example: ["div", ".main", "a"].
urlcustom_html_tagskwargsc                    > UR                  S5      (       d  U S3nU=(       d    S/U l        [        TU ]  " U4SUR                  S5      =(       d    U R                  0UD6  g)aA  Initialize DocusaurusLoader

Args:
    url: The base URL of the Docusaurus website.
    custom_html_tags: Optional custom html tags to extract content from pages.
    kwargs: Additional args to extend the underlying SitemapLoader, for example:
        filter_urls, blocksize, meta_function, is_local, continue_on_failure
is_localz/sitemap.xmlzmain articleparsing_functionN)getr   super__init___parsing_function)selfr
   r   r   	__class__s       g/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_community/document_loaders/docusaurus.pyr   DocusaurusLoader.__init__   sg     zz*%%E&C 0 D^4D	
#ZZ(:;Ut?U?U	
 	
    contentreturnc                     UR                  SR                  U R                  5      5      nU H  nX2;  d  M
  UR                  5         M     [	        UR                  5       5      $ )z0Parses specific elements from a Docusaurus page.,)selectjoinr   	decomposestrget_text)r   r   relevant_elementselements       r   r   "DocusaurusLoader._parsing_function+   sR    #NN388D4I4I+JK(G/!!# ) 7##%&&r   )r   )N)__name__
__module____qualname____firstlineno____doc__r    r   r   r   r   r   __static_attributes____classcell__)r   s   @r   r   r      sT     15

 #49-
 	
 
2' ' ' 'r   r   N)r)   typingr   r   r   ,langchain_community.document_loaders.sitemapr   r    r   r   <module>r/      s    1 & & F+'} +'r   