o
    g9                     @   sx   d dl Z d dlmZ d dlmZmZmZ ddee dede	fdd	Z
d
d Zedkr:dgZe e
eZee dS dS )    N)List)AsyncWebCrawler	CacheModeCrawlerRunConfigFurlswith_imagesreturnc              
      sT  t tjdd}z)t 4 I d H }|j| |dI d H }W d   I d H  n1 I d H s+w   Y  W n tyL } ztd|  g }W Y d }~nd }~ww z)t 4 I d H }|j| |dI d H }W d   I d H  n1 I d H spw   Y  W n ty } ztd|  g }W Y d }~nd }~ww |rdd |D }ndd |D }tt| |S )N   )
cache_modesemaphore_count)configzError crawling pages: c                 S   sF   g | ]}|d urt |drt |drt|jdd|jdg dqS )Nmarkdown_v2mediamarkdown_with_citations images)markdownr   )hasattrgetattrr   r   get.0result r   7/var/www/html/XCapMarket/utils/webscrape/crawl_pages.py
<listcomp>   s    zcrawl_pages.<locals>.<listcomp>c                 S   s&   g | ]}|d urt |dr|jjqS )Nr   )r   r   r   r   r   r   r   r   +   s
    )	r   r   ENABLEDr   	arun_many	Exceptionprintdictzip)r   r   r   crawlerresultser   r   r   crawl_pages   sB   ((r%   c                 C   s   t t| S )N)asynciorunr%   )r   r   r   r   crawl_pages_non_async5   s   r(   __main__zhttps://www.aldi.us/)F)r&   typingr   crawl4air   r   r   strboolr    r%   r(   __name__r   r'   r#   r   r   r   r   r   <module>   s    .