
    dh                         S r SSKrSSKJrJrJrJrJr  \(       a  SSKJ	r	J
r
  SSKJr  SSKJr  \R                  " \5      r " S S\5      rg)	zRLoader that uses Selenium to load a page, then uses unstructured to load the html.    N)TYPE_CHECKINGListLiteralOptionalUnionChromeFirefox)Document)
BaseLoaderc                       \ rS rSrSrSSSSS/ 4S\\   S\S\S	   S
\	\   S\	\   S\S\\   4S jjr
S\S   4S jrS\S\S   S\4S jrS\\   4S jrSrg)SeleniumURLLoader   aq  Load `HTML` pages with `Selenium` and parse with `Unstructured`.

This is useful for loading pages that require javascript to render.

Attributes:
    urls (List[str]): List of URLs to load.
    continue_on_failure (bool): If True, continue loading other URLs on failure.
    browser (str): The browser to use, either 'chrome' or 'firefox'.
    binary_location (Optional[str]): The location of the browser binary.
    executable_path (Optional[str]): The path to the browser executable.
    headless (bool): If True, the browser will run in headless mode.
    arguments [List[str]]: List of arguments to pass to the browser.
TchromeNurlscontinue_on_failurebrowser)r   firefoxbinary_locationexecutable_pathheadless	argumentsc                      SSK n SSKn	Xl        X l        X0l        X@l        XPl        X`l        Xpl	        g! [         a    [        S5      ef = f! [         a    [        S5      ef = f)z4Load a list of URLs using Selenium and unstructured.r   NzIselenium package not found, please install it with `pip install selenium`zQunstructured package not found, please install it with `pip install unstructured`)
seleniumImportErrorunstructuredr   r   r   r   r   r   r   )
selfr   r   r   r   r   r   r   r   r   s
             i/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_community/document_loaders/url_selenium.py__init__SeleniumURLLoader.__init__   s    		 	#6 .. ")  	) 	  	- 	s   6 A AA%returnr   c                 F   U R                   R                  5       S:X  a  SSKJn  SSKJn  SSKJn  U" 5       nU R                   H  nUR                  U5        M     U R                  (       a"  UR                  S5        UR                  S5        U R                  b  U R                  Ul        U R                  c  U" US9$ U" UU" U R                  S	9S
9$ U R                   R                  5       S:X  a  SSKJn  SSKJn  SSKJn  U" 5       nU R                   H  nUR                  U5        M     U R                  (       a  UR                  S5        U R                  b  U R                  Ul        U R                  c  U" US9$ U" UU" U R                  S	9S
9$ [!        S5      e)zCreate and return a WebDriver instance based on the specified browser.

Raises:
    ValueError: If an invalid browser is specified.

Returns:
    Union[Chrome, Firefox]: A WebDriver instance for the specified browser.
r   r   )r	   )Options)Servicez
--headlessz--no-sandbox)options)r   )r%   servicer   )r
   z5Invalid browser specified. Use 'chrome' or 'firefox'.)r   lowerselenium.webdriverr	   !selenium.webdriver.chrome.optionsr#   !selenium.webdriver.chrome.servicer$   r   add_argumentr   r   r   r
   "selenium.webdriver.firefox.options"selenium.webdriver.firefox.service
ValueError)	r   r	   ChromeOptionsr$   chrome_optionsargr
   FirefoxOptionsfirefox_optionss	            r   _get_driverSeleniumURLLoader._get_driverB   sq    <<8+1RA*_N~~++C0 & }}++L9++N;##/151E1E.##+n55&0D0DE  \\!Y.2TB,.O~~,,S1 & }},,\:##/262F2F/##+77'0D0DE 
 TUU    urldriverc                    SSK Jn  SSKJn   USSSS.nUR                  =n(       a  XeS'    UR                  UR                  S	5      =n(       a  UR                  S
5      =(       d    SUS'    UR                  UR                  S5      =n(       a  UR                  S5      =(       d    SUS'   U$ ! U a     NKf = f! U a     U$ f = f)Nr   )NoSuchElementException)ByzNo title found.zNo description found.zNo language found.)sourcetitledescriptionlanguager=   z//meta[@name="description"]contentr>   htmllangr?   )	selenium.common.exceptionsr:   selenium.webdriver.common.byr;   r=   find_elementXPATHget_attributeTAG_NAME)	r   r7   r8   r:   r;   metadatar=   r>   html_tags	            r   _build_metadata!SeleniumURLLoader._build_metadataw   s    E3A&2,	
 LL 5  %W	$117 {   --i8S<S '
	!..r{{FCCxC**62J6J $
  & 		 & 		s%   A B0 .A B< 0B98B9<CCc           	         SSK Jn  [        5       nU R                  5       nU R                   Hz  n UR                  U5        UR                  nU" US9nSR                  U Vs/ sH  n[        U5      PM     sn5      nU R                  XC5      n	UR                  [        XS95        M|     UR!                  5         U$ s  snf ! [         a9  n
U R                  (       a!  [        R                  SU SU
 35         Sn
A
M  U
eSn
A
ff = f)	zLoad the specified URLs using Selenium and create Document instances.

Returns:
    List[Document]: A list of Document instances with loaded content.
r   )partition_html)textz

)page_contentrI   zError fetching or processing z, exception: N)unstructured.partition.htmlrN   listr4   r   getpage_sourcejoinstrrK   appendr   	Exceptionr   loggererrorquit)r   rN   docsr8   r7   rP   elementselrO   rI   es              r   loadSeleniumURLLoader.load   s     	?#v!!#99C

3%11)|<{{h#?hCGh#?@//<H$JK  	 $@  ++LL#@]STRU!VWWG	s/   2C#B=
70C=C
D,D >D  D)r   r   r   r   r   r   r   )__name__
__module____qualname____firstlineno____doc__r   rV   boolr   r   r   r   r4   dictrK   r   r`   __static_attributes__ r6   r   r   r      s    " %)08)-)-!!#3i!# "!# ,-	!#
 "#!# "#!# !# 9!#F3VU#67 3Vj3 6I0J t >d8n r6   r   )rf   loggingtypingr   r   r   r   r   r(   r	   r
   langchain_core.documentsr   )langchain_community.document_loaders.baser   	getLoggerrb   rY   r   rj   r6   r   <module>rp      s;    X  @ @2 - @			8	$`
 `r6   