"Scheduled task to scrape websites and update bubble database"

import asyncio
import logging
import os
import time
from datetime import datetime

import requests
from dotenv import load_dotenv

from services.company_profile.company_profile import CompanyProfileMaker
from utils.url_parser import parsed_url

load_dotenv()

BUBBLE_URL = "https://sagarsoni.bubbleapps.io/api/1.1/wf/xcm_send_websites"

BUBBLE_WEBSITE_OBJS = "https://app.xcapmarket.com/api/1.1/obj/websites"
BUBBLE_PRIVATE_KEY = os.environ.get("BUBBLE_PRIVATE_KEY")


def get_websites_to_scrape():
    """Get the websites that need to be scraped from the bubble database"""

    r = requests.get(
        url=BUBBLE_WEBSITE_OBJS, data={"Authorization": BUBBLE_PRIVATE_KEY}, timeout=10
    )

    if r.status_code != 200:
        logging.error(
            "%s error assessing bubble website database",
            datetime.strftime(datetime.today(), "%m/%d/%Y %H:%M%S"),
        )

    results = r.json()["response"]["results"]

    urls_to_scrape = []
    for result in results:
        if "scrapped" in result:
            if result["scrapped"] is False and result["error"] is False:
                urls_to_scrape.append(result["Website_url"])

    return urls_to_scrape


def post_to_bubble(response):
    """Post the response to the bubble database"""

    r = requests.post(url=BUBBLE_URL, data=response, timeout=10)

    if r.status_code != 200:
        logging.error(
            "%s error assessing bubble website database",
            datetime.strftime(datetime.today(), "%m/%d/%Y %H:%M%S"),
        )


def process_websites(url):
    """Process the websites"""

    # convert the url to XCM format
    company_url = parsed_url(url)

    # check the company info for the company

    try:
        company_profile = CompanyProfileMaker(company_url.url)
        asyncio.run(company_profile.main())

        post_to_bubble(
            {
                "url_sent": url,
                "root_url": company_url.url,
                "error": None,
                "status code": 200,
            }
        )

    # except Exception as e:
    except KeyError as e:

        logging.error(
            "Couldn't parse %s at %s",
            company_url.url,
            datetime.strftime(datetime.today(), "%m/%d/%Y %H:%M%S"),
        )

        post_to_bubble(
            {
                "url_sent": url,
                "root_url": url,
                "error": str(e),
                "status code": 500,
            }
        )


if __name__ == "__main__":

    # process_websites("https://www.flexport.com/")

    while True:

        urls = get_websites_to_scrape()
        print(urls)
        for url in urls:
            try:
                process_websites(url)
            except Exception as e:
                post_to_bubble(
                    {
                        "url_sent": url,
                        "root_url": url,
                        "error": True,
                        "status code": 200,
                    }
                )
                logging.error(
                    "Couldn't process %s at %s",
                    url,
                    datetime.strftime(datetime.today(), "%m/%d/%Y %H:%M%S"),
                )

        time.sleep(60)