
    dh                     v    S r SSKrSSKJrJrJr  SSKJr  SSKJ	r	  \R                  " \5      r " S S\	5      rg)z1Loader that uses unstructured to load HTML files.    N)AnyIteratorList)Document)
BaseLoaderc                   x    \ rS rSrSr    SS\\   S\S\S\S\S\S	S
4S jjr	S	\\
   4S jrS	\\
   4S jrSrg
)NewsURLLoader   a  Load news articles from URLs using `Unstructured`.

Args:
    urls: URLs to load. Each is loaded into its own document.
    text_mode: If True, extract text from URL and use that for page content.
        Otherwise, extract raw HTML.
    nlp: If True, perform NLP on the extracted contents, like providing a summary
        and extracting keywords.
    continue_on_failure: If True, continue loading documents even if
        loading fails for a particular URL.
    show_progress_bar: If True, use tqdm to show a loading progress bar. Requires
        tqdm to be installed, ``pip install tqdm``.
    **newspaper_kwargs: Any additional named arguments to pass to
        newspaper.Article().

Example:
    .. code-block:: python

        from langchain_community.document_loaders import NewsURLLoader

        loader = NewsURLLoader(
            urls=["<url-1>", "<url-2>"],
        )
        docs = loader.load()

Newspaper reference:
    https://newspaper.readthedocs.io/en/latest/
urls	text_modenlpcontinue_on_failureshow_progress_barnewspaper_kwargsreturnNc                      SSK nUR                  U l        Xl        X l        X0l        X@l        X`l        XPl	        g! [         a    [        S5      ef = f)zInitialize with file path.r   NzMnewspaper package not found, please install it with `pip install newspaper3k`)
	newspaper__version___NewsURLLoader__versionImportErrorr   r   r   r   r   r   )selfr   r   r   r   r   r   r   s           a/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_community/document_loaders/news.py__init__NewsURLLoader.__init__+   s]    	&22DN 	"#6  0!2  	, 	s	   < Ac                     U R                  5       nU R                  (       a   SSKJn  U" U5      n[	        U5      $ ! [         a  n[        S5      UeS nAff = f)Nr   )tqdmzPackage tqdm must be installed if show_progress_bar=True. Please install with 'pip install tqdm' or set show_progress_bar=False.)	lazy_loadr   r   r   list)r   iterr   es       r   loadNewsURLLoader.loadF   s]    ~~!!% :DDz  !/ 	s   < 
AAAc              #   0  #     SSK Jn  U R                   GH  n U" U40 U R                  D6nUR                  5         UR                  5         U R                  (       a  UR                  5         [        USS5      [        US[        US	S5      5      [        US
/ 5      [        USS5      [        USS5      [        USS5      S.nU R                  (       a  UR                  nOUR                  nU R                  (       a   [        US/ 5      US'   [        USS5      US'   [!        XeS9v   GM     g ! [         a  n[        S5      UeS nAff = f! [         a:  nU R                  (       a"  [        R                  SU SU 35         S nAGMv  UeS nAff = f7f)Nr   )ArticlezFCannot import newspaper, please install with `pip install newspaper3k`zError fetching or processing z, exception: title urlcanonical_linkauthors	meta_langmeta_descriptionpublish_date)r%   linkr)   languagedescriptionr,   keywordssummary)page_contentmetadata)r   r$   r   r   r   downloadparser   	Exceptionr   loggererrorgetattrr   texthtmlr   )r   r$   r    r'   articler3   contents          r   r   NewsURLLoader.lazy_loadT   so    	) 99C!#?)>)>?  "88KKM !'26AQSU0VW"7Ir:#G["=&w0BBG ' DH ~~!,,!,,xx'.w
B'G$&-gy"&E#CCE   	X	  ++LL#@]STRU!VWGsX   FD1 FAE/CF1
E;EEF
F,FFFFF)	__versionr   r   r   r   r   r   )TFTF)__name__
__module____qualname____firstlineno____doc__r   strboolr   r   r   r!   r   r   __static_attributes__     r   r	   r	      s    @ $("'33i3 3 	3
 "3  3  3 
36d8n *D8H- *DrI   r	   )rD   loggingtypingr   r   r   langchain_core.documentsr   )langchain_community.document_loaders.baser   	getLoggerr@   r7   r	   rH   rI   r   <module>rO      s7    7  & & - @			8	$qDJ qDrI   