
    dh #                         S SK r S SKrS SKrS SKJrJr  S SKJrJrJ	r	J
r
JrJrJrJrJr  S SKrS SKrS SKJr  S SKJr  S SKJr  \R0                  " \5      r\" 5       SSS	S
SS
S.rS\S\S\4S jr " S S\5      rg)    N)FutureThreadPoolExecutor)	AnyAsyncIteratorDictIteratorListOptionalTupleUnioncast)Document)
BaseLoader)get_user_agentzJtext/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8zen-US,en;q=0.5zhttps://www.google.com/1z
keep-alive)
User-AgentAcceptzAccept-LanguageRefererDNT
ConnectionzUpgrade-Insecure-Requestssoupurlreturnc                    SU0nU R                  S5      =n(       a  UR                  5       US'   U R                  SSS0S9=n(       a  UR                  SS5      US'   U R                  S	5      =n(       a  UR                  S
S5      US'   U$ )z)Build metadata from BeautifulSoup output.sourcetitlemetanamedescription)attrscontentzNo description found.htmllangzNo language found.language)findget_textget)r   r   metadatar   r   r"   s         g/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_community/document_loaders/async_html.py_build_metadatar*   &   s    #H		'""u"!NN,iiv}.EiFF{F"-//)=T"Uyy  t #xx0DEO    c                      \ rS rSrSr          S(SSS.S\\\\   4   S\\	   S	\\
   S
\\	   S\
S\\   S\S\S\\\\4      S\
S\
S\
S\
4S jjjrS\S\4S jr\S\SS4S j5       r S)S\S\S\S\S\4
S jjrS\S\R*                  S\\\4   4S jrS \\   S\
S\\\\4      4S! jrS \\   S\\   4S" jrS\S#\S\4S$ jrS\\   4S% jrS\\   4S& jrS'r g)*AsyncHtmlLoader2   zLoad `HTML` asynchronously.NTF)preserve_order	trust_envweb_pathheader_template
verify_sslproxiesautoset_encodingencodingdefault_parserrequests_per_secondrequests_kwargsraise_for_statusignore_load_errorsr/   r0   c                   [        U[        5      (       a	  U/U l        O[        U[        5      (       a  Xl        U=(       d    [        nUR                  S5      (       d   SSKJn  U" 5       R                  US'   [        R                  " 5       U l        [        U5      U R                  l        X0R                  l        U(       a%  U R                  R$                  R'                  U5        Xl        Xpl        U	=(       d    0 U l        Xl        XPl        X`l        Xl        Xl        Xl        g! [         a    [        R                  S5         Nf = f)zInitialize with a webpage path.r   r   )	UserAgentzxfake_useragent not found, using default user agent.To get a realistic header for requests, `pip install fake_useragent`.N)
isinstancestr	web_pathsr	   default_header_templater'   fake_useragentr=   randomImportErrorloggerinforequestsSessionsessiondictheadersverifyr4   updater8   r7   r9   r:   r5   r6   r;   r/   r0   )selfr1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r/   r0   rK   r=   s                   r)   __init__AsyncHtmlLoader.__init__5   s   , h$$&ZDN$''%N!<%<{{<((	4(1(:(:%  '')#G}(LL  ''0#6 ,.4" 0 0 "4,"/  4s    D+ +EEr   r   c                 .   U R                   (       a(   U R                  R                  " U40 U R                  D6$ U R                  R                  " U40 U R                  D6$ ! [         a)  n[
        R                  " [        U5      5         S nAg S nAff = fN)r;   rI   r'   r9   	Exceptionwarningswarnr?   )rN   r   es      r)   _fetch_valid_connection_docs,AsyncHtmlLoader._fetch_valid_connection_docso   sw    ""||''Dt/C/CDD
 ||<t';';<<	  c!f%s   &A! !
B+BBparserc                 V    / SQnX;  a   [        SSR                  U5      -   S-   5      eg)z#Check that parser is valid for bs4.)html.parserlxmlxmlzlxml-xmlhtml5libz`parser` must be one of z, .N)
ValueErrorjoin)rY   valid_parserss     r)   _check_parserAsyncHtmlLoader._check_parsery   s8     O&*TYY}-EEK  'r+   retriescooldownbackoffc                   #    [         R                  " U R                  S9 IS h  vN n[        U5       H  n [	        SU R
                  R                  U R
                  R                  R                  5       S.U R                  D6nU R
                  R                  (       d  SUS'   UR                  " U40 UD6 IS h  vN n UR                  5       I S h  vN n	U	sS S S 5      IS h  vN   s  sS S S 5      IS h  vN   $    S S S 5      IS h  vN   [-        S5      e N Nb NK! [         a    [        R                  SU 35        Sn	 Nof = f Ne NT! , IS h  vN  (       d  f       GM7  = f! [         R                   ["        4 a  n
XbS-
  :X  aD  U R$                  (       a3  [        R'                  SU S	U S
35         S n
A
  S S S 5      IS h  vN    gXbS-
  :X  a  e [        R'                  SU SUS-    SU SU
 S3	5        [(        R*                  " X4U-  -  5      I S h  vN     S n
A
GM  S n
A
ff = f GN'! , IS h  vN  (       d  f       GN== f7f)N)r0   )rK   cookiesFsslzFailed to decode content from     zError fetching z after z	 retries.z with attempt /z: z. Retrying...zretry count exceeded )aiohttpClientSessionr0   rangerJ   rI   rK   ri   get_dictr9   rL   r'   textUnicodeDecodeErrorrE   errorClientConnectionErrorTimeoutErrorr;   warningasynciosleepr`   )rN   r   re   rf   rg   rI   ikwargsresponsers   rV   s              r)   _fetchAsyncHtmlLoader._fetch   s     ((4>>BBg7^C#' $ $ 4 4 $ 4 4 = = ?$ ..$F
  <<..(-u&{{      "&)1#8D  $      CBB# CB@ /00A C 
 $91 &"LL+I#)OP#%D&  C         55|D CaK'D,C,CWWIY'WX!/ CBB0 k)-cU. 1ugQwir!MC &mmHz,ABBBBC) CBBBsA  #IDIH9BE/>D!
?E/ED%D#
D%EE/)E
*E/.H90I<E=IH9IH6I!E/#D%%$E		EE	EE/IE,EE,&E/)H9,E//H3	5H.>H9IGIAH.!H$
"H.'H9.H33H96I9I?I II	semaphorec                    #    U IS h  vN   XR                  U5      I S h  vN 4sS S S 5      IS h  vN   $  N/ N N	! , IS h  vN  (       d  f       g = f7frR   )r~   )rN   r   r   s      r)   _fetch_with_rate_limit&AsyncHtmlLoader._fetch_with_rate_limit   s5      9kk#... 99. 999sH   A9A?;?A=A?AAAAAurlsc           	     n  #    [         R                  " U R                  5      nU Vs/ sH(  n[         R                  " U R	                  XC5      5      PM*     nn SSKJn  U(       a  U" USSSS9 H  nUI S h  vN 7v   M     g UR                  USSSS9 H  nUI S h  vN 7v   M     g s  snf  N8 N! [         au    [        R                  " S5        U(       a*  [         R                  " U6 I S h  vN   H  nU7v   M
      g [         R                  " U5       H  nUI S h  vN  7v   M      g f = f7f)Nr   )tqdm_asynciozFetching pagesTrl   )descasciiminintervalz2For better logging of progress, `pip install tqdm`)ry   	Semaphorer8   create_taskr   tqdm.asyncior   as_completedrD   rT   rU   gather)	rN   r   r/   r   r   tasksr   taskresults	            r)   _lazy_fetch_allAsyncHtmlLoader._lazy_fetch_all   s     %%d&>&>?	 
  ; ;C KL 	 
	%1( 0!D !%*$
 )55 0! 6 D !%*$
 %
 % 	%MMNO$+NNE$:::F L ; $007D $**$ 8	%s   %D5.B*D5B3 6B/7B3 D5B3 B1B3 )D5/B3 1B3 3=D20C31D2D5D2"D%#D2/D51D22D5c                 p   #    U R                  US5       VVs/ s Sh  vN u  p#UPM   N

 snn$ s  snnf 7f)z/Fetch all urls concurrently with rate limiting.TN)r   )rN   r   _docs       r)   	fetch_allAsyncHtmlLoader.fetch_all   s+     (,(<(<T4(HIIfaIIIs$   60+)+0+0
6rs   c                     SSK Jn  UR                  S5      (       a  SnOU R                  nU R	                  U5        U" X$5      n[        XQ5      n[        X&S9$ )Nr   )BeautifulSoupz.xmlr]   )page_contentr(   )bs4r   endswithr7   rc   r*   r   )rN   r   rs   r   rY   r   r(   s          r)   _to_documentAsyncHtmlLoader._to_document   sQ    %<<F((F6"T*"4-T==r+   c              #   "  #     [         R                  " 5         [        SS9 nUR                  [         R                  U R                  U R                  5      5      nUR                  5       nSSS5        [        [        [        [           W5      5       H&  u  pEU R                  U R                  U   U5      v   M(     g! , (       d  f       NY= f! [         a2    [         R                  " U R                  U R                  5      5      n Nf = f7f)+Lazy load text from the url(s) in web_path.rl   )max_workersN)ry   get_running_loopr   submitrunr   r@   r   RuntimeError	enumerater   r	   r?   r   )rN   executorfutureresultsr{   rs   s         r)   	lazy_loadAsyncHtmlLoader.lazy_load   s     	B$$& $2h,4OOKKNN4>>2- !--/ 3 !d3i!9:GA##DNN1$5t<< ; 32  	Bkk$.."@AG	BsM   DC A
B?,C 4AD?
C	C DC 9D	DDDc                   #    U R                  U R                  U R                  5        Sh  vN u  pU R                  X5      7v   M!   N
 g7f)r   N)r   r@   r/   r   )rN   r   rs   s      r)   
alazy_loadAsyncHtmlLoader.alazy_load   sH     #33NND// 
 	/)# ##C..	/  
s%   'AAA
AA
AA)r5   r7   r6   r;   r/   r:   r9   r8   rI   r0   r@   )
NTNTNr[      NFF)   r   g      ?)!__name__
__module____qualname____firstlineno____doc__r   r?   r	   r
   rJ   boolintr   r   rO   rW   staticmethodrc   floatr~   ry   r   r   r   r   r   r   r   r   r   r   r   __static_attributes__rn   r+   r)   r-   r-   2   s   %
 +/%)"&!%"&+#$48!&#(8#  $8#T#Y'8# "$8# TN	8#
 $8# 8# 3-8# 8# !8# "$sCx.18# 8# !8# 8# 8#t= = = c d   OR#1#1!$#147#1FK#1	#1J//#*#4#4/	sCx/%I%/3%	uS#X	'%<JDI J$s) J
> 
>3 
>8 
>=8H- =(/-"9 /r+   r-   ) ry   loggingrT   concurrent.futuresr   r   typingr   r   r   r   r	   r
   r   r   r   ro   rG   langchain_core.documentsr   )langchain_community.document_loaders.baser   $langchain_community.utils.user_agentr   	getLoggerr   rE   rA   r?   rJ   r*   r-   rn   r+   r)   <module>r      s       9
 
 
   - @ ?			8	$ !"'(!$	 	# 	C 	D 	B/j B/r+   