
    dh                     B    S SK JrJr  S SKJr  S SKJr   " S S\5      rg)    )AnyList)Document)WebBaseLoaderc                   \    \ rS rSrSrS\\   4S jrS\S\\   4S jr	S\S\\   4S jr
S	rg
)HNLoader   z[Load `Hacker News` data.

It loads data from either main page results or the comments page.returnc                     U R                  5       nSU R                  ;   a  U R                  U5      $ U R                  U5      $ )zGet important HN webpage information.

HN webpage components are:
    - title
    - content
    - source url,
    - time of post
    - author of the post
    - number of comments
    - rank of the post
item)scrapeweb_pathload_commentsload_results)self	soup_infos     _/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_community/document_loaders/hn.pyloadHNLoader.load   s=     KKM	T]]"%%i00$$Y//    r   c           	          UR                  S5      nUR                  S5      R                  S5      nU Vs/ sH2  n[        UR                  R                  5       U R                  US.S9PM4     sn$ s  snf )zLoad comments from a HN post.ztr[class='athing comtr']ztr[id='pagespace']title)sourcer   )page_contentmetadata)select
select_onegetr   textstripr   )r   r   commentsr   comments        r   r   HNLoader.load_comments   s{    ##$>?$$%9:>>wG $

 $	 $\\//1$(MMEB $
 	
 
s   8A1soupc           
         UR                  S5      n/ nU H  nUR                  S5      R                  nUR                  SSS05      R                  S5      R	                  S5      nUR                  SSS05      R                  R                  5       nU R                  UUUS.nUR                  [        XvXXS	95        M     U$ )
zLoad items from an HN page.ztr[class='athing']zspan[class='rank']spanclass	titlelineahref)r   r   linkranking)r   r+   r,   r   )	r   r   r   findr   r    r   appendr   )	r   r$   items	documentslineItemr,   r+   r   r   s	            r   r   HNLoader.load_results+   s    01	H))*>?DDG==';)?@EEcJNNvVDMM&7K*@AFFLLNE--"	H !&7  r    N)__name__
__module____qualname____firstlineno____doc__r   r   r   r   r   r   __static_attributes__r3   r   r   r   r      sF    I0d8n 0$

s 

tH~ 

 h r   r   N)typingr   r   langchain_core.documentsr   -langchain_community.document_loaders.web_baser   r   r3   r   r   <module>r=      s     - G6} 6r   