import asyncio
from typing import List

from crawl4ai import AsyncWebCrawler, CacheMode, CrawlerRunConfig


async def crawl_pages(urls: List[str], with_images: bool = False) -> dict:

    config = CrawlerRunConfig(
        cache_mode=CacheMode.ENABLED,
        # check_robots_txt=True,  # Will respect robots.txt for each URL
        semaphore_count=6,  # Max concurrent requests
    )

    try:
        async with AsyncWebCrawler() as crawler:
            results = await crawler.arun_many(urls, config=config)
    except Exception as e:
        print(f"Error crawling pages: {e}")
        results = []

    # TODO: Add error handling
    try:
        async with AsyncWebCrawler() as crawler:
            results = await crawler.arun_many(urls, config=config)
    except Exception as e:
        print(f"Error crawling pages: {e}")
        results = []

    if with_images:
        results = [
            {
                "markdown": result.markdown_v2.markdown_with_citations,
                "images": result.media["images"],
            }
            for result in results
        ]
    else:
        results = [result.markdown_v2.markdown_with_citations for result in results]

    # print(results)
    return dict(zip(urls, results))


def crawl_pages_non_async(urls):
    return asyncio.run(crawl_pages(urls))


if __name__ == "__main__":

    results = asyncio.run(crawl_pages(urls))
    import json

    with open(r"C:\Users\sagar\OneDrive\Desktop\Test\.json", "w") as json_file:
        json.dump(results, json_file, indent=4)
