"Manual processing of company URLs"

import asyncio
import logging
import signal
import time
from datetime import datetime

import boto3

from services.company_profile.company_profile import CompanyProfileMaker
from utils import validate_url
from utils.url_parser import parsed_url

current_dynamodb = boto3.resource("dynamodb")
company_list_table = current_dynamodb.Table("company_list")
llm_table = current_dynamodb.Table("LLM_results")
company_information_table = current_dynamodb.Table("company_information")


def get_table_items(table):

    items = []
    response = table.scan()

    items = response["Items"]

    while "LastEvaluatedKey" in response:
        time.sleep(5)
        response = table.scan(
            ExclusiveStartKey=response["LastEvaluatedKey"],
        )
        items += response["Items"]

    return items


company_list_items = get_table_items(company_list_table)
company_to_scrape = []

# companies_to_remove = []
for company in company_list_items:

    if "scrapped" not in company:
        company_to_scrape.append(company)
    elif not company["scrapped"]:
        company_to_scrape.append(company)


# print(companies_to_remove)

# convert company_to_scrape to a CSV
# import csv

# with open("company_to_scrape.csv", "w", newline="") as f:
#     writer = csv.writer(f)
#     # writer.writerow(company_to_scrape[0].keys())
#     writer.writerows([company['root_url'] for company in company_to_scrape])


def timeout_handler(signum, frame):
    """Raise exception if the function takes too long"""
    raise Exception("Processing Timeout")


signal.signal(signal.SIGALRM, timeout_handler)


for company in company_to_scrape:
    ticker = None
    sic_code = None

    time.sleep(5)

    signal.alarm(900)  # 15 minutes

    try:
        logging.info("Processing %s", company["root_url"])
        if "ticker" in company:
            ticker = company["ticker"]

        if "stock_ticker" in company:
            ticker = company["stock_ticker"]

        if "sic_code" in company:
            sic_code = company["sic_code"]

        # check if the URL has been created in the last week
        company_url = parsed_url(company["root_url"]).url
        company_info = company_information_table.get_item(Key={"root_url": company_url})

        if "Item" in company_info:
            if (
                datetime.now()
                - datetime.fromisoformat(company_info["Item"]["created_date"])
            ).days > 7:
                logging.info("URL created more than 7 days ago. Skipping")
                continue

        company_profile = CompanyProfileMaker(
            company["root_url"],
            ticker,
        )

        asyncio.run(company_profile.main(sic_code=sic_code))
        ## if company_url != company["root_url"]: update the database entry to be scraped = True
        if company_url != company["root_url"]:
            company_list_table.update_item(
                Key={"root_url": company_url},
                UpdateExpression="SET scrapped = :val",
                ExpressionAttributeValues={":val": True},
            )
        logging.info("Finished %s", company["root_url"])

    except Exception as e:

        logging.error("Error processing %s: %s", company["root_url"], e)
        print(e)
        continue
