import json
import os
import time

from selenium.webdriver.common.by import By

from services.company_profile.logo_standalone import download_logo
from utils.selenium_driver_chrome import SeleniumDriverChrome


class CrunchbaseScraper:
    def __init__(self, urls, headless_mode=False):
        """
        Initialize the scraper with a list of URLs.

        Args:
            urls (list): List of Crunchbase URLs to scrape.
            headless_mode (bool): Whether to run the browser in headless mode.
        """
        self.urls = urls
        self.acquisition_info = []
        self.driver = SeleniumDriverChrome(headless_mode=headless_mode)

    def get_inner_json_text(self, url):
        """
        Loads the URL and retrieves the inner JSON from the element with id 'ng-state'.

        Args:
            url (str): The URL to load.

        Returns:
            dict: Parsed JSON from the page.
        """
        self.driver.get_url(url)
        inner_text = self.driver.driver.find_element(By.ID, "ng-state").get_attribute(
            "innerText"
        )
        return json.loads(inner_text)

    def get_deal_details(self, inner_text_json):
        """
        Extract deal details from the inner JSON structure.

        Args:
            inner_text_json (dict): The JSON content retrieved from the page.

        Returns:
            dict: A dictionary containing transaction date, acquirer, target, and PR links.
        """
        details = inner_text_json["HttpState"]
        # Accessing the second key in HttpState which holds the deal details
        deal_details = details[list(inner_text_json["HttpState"])[1]]

        acquirer = deal_details["data"]["cards"]["acquirer_image_list"]
        acquiree = deal_details["data"]["cards"]["acquiree_image_list"]
        transaction_date = deal_details["data"]["cards"]["overview_fields"][
            "announced_on"
        ]["value"]

        pr_links_info = deal_details["data"]["cards"]["timeline"]
        pr_links = [
            pr["properties"]["activity_properties"]["url"]["value"]
            for pr in pr_links_info["entities"]
        ]

        return {
            "transaction_date": transaction_date,
            "acquirer": {
                "name": acquirer[0]["identifier"]["value"],
                "img": "https://images.crunchbase.com/image/upload/"
                + acquirer[0]["identifier"]["image_id"],
                "description": acquirer[0]["short_description"],
            },
            "target": {
                "name": acquiree[0]["identifier"]["value"],
                "img": "https://images.crunchbase.com/image/upload/"
                + acquiree[0]["identifier"]["image_id"],
                "description": acquiree[0]["short_description"],
            },
            "pr_links": pr_links,
        }

    def scrape(self, delay=1):
        """
        Iterate through the list of URLs and scrape the acquisition details.

        Args:
            delay (int): Time in seconds to wait between each URL request.

        Returns:
            list: A list of dictionaries with the acquisition details.
        """
        for url in self.urls:
            time.sleep(delay)
            inner_json = self.get_inner_json_text(url)
            details = self.get_deal_details(inner_json)
            details["url"] = url
            self.acquisition_info.append(details)
        return self.acquisition_info

    def generate_markdown_table(self, headers):
        """
        Generate a markdown table from the scraped acquisition information.

        Args:
            headers (list): A list of header names to include in the table.
                Available headers:
                    - URL
                    - Transaction Date
                    - Acquirer Name
                    - Acquirer Image
                    - Acquirer Desc
                    - Target Name
                    - Target Image
                    - Target Desc
                    - PR Links
                    - Rationale

        Returns:
            str: A markdown-formatted table as a string.
        """

        def format_img(name, url):
            return f"![{name}]({url})" if url else ""

        header_mapping = {
            "URL": lambda rec: rec.get("url", ""),
            "Transaction Date": lambda rec: rec.get("transaction_date", ""),
            "Acquirer Name": lambda rec: rec.get("acquirer", {}).get("name", ""),
            "Acquirer Image": lambda rec: format_img(
                rec.get("acquirer", {}).get("name", ""),
                rec.get("acquirer", {}).get("img", ""),
            ),
            "Acquirer Desc": lambda rec: rec.get("acquirer", {}).get("description", ""),
            "Target Name": lambda rec: rec.get("target", {}).get("name", ""),
            "Target Image": lambda rec: format_img(
                rec.get("target", {}).get("name", ""),
                rec.get("target", {}).get("img", ""),
            ),
            "Target Desc": lambda rec: rec.get("target", {}).get("description", ""),
            "PR Links": lambda rec: ", ".join(rec.get("pr_links", [])),
            "Rationale": lambda rec: rec.get("rationale", ""),
        }

        # Build the markdown table header
        table = []
        header_row = "| " + " | ".join(headers) + " |"
        separator_row = "| " + " | ".join(["---"] * len(headers)) + " |"
        table.append(header_row)
        table.append(separator_row)

        for rec in self.acquisition_info:
            row = []
            for header in headers:
                func = header_mapping.get(header, lambda rec: "")
                row.append(func(rec))
            table.append("| " + " | ".join(row) + " |")

        return "\n".join(table)

    def download_logos(self, base_dir="logos"):
        """
        Download the logos for both acquirer and target and save them in respective directories.

        Args:
            base_dir (str): Base directory to save logos. Defaults to 'logos'.
        """
        acquirer_dir = os.path.join(base_dir, "acquirer")
        target_dir = os.path.join(base_dir, "target")
        os.makedirs(acquirer_dir, exist_ok=True)
        os.makedirs(target_dir, exist_ok=True)

        for item in self.acquisition_info:
            print(f"Getting images for {item['acquirer']['name']}")
            acquirer_path = os.path.join(acquirer_dir, f"{item['acquirer']['name']}")
            target_path = os.path.join(target_dir, f"{item['target']['name']}")
            download_logo(item["acquirer"]["img"], acquirer_path)
            download_logo(item["target"]["img"], target_path)


if __name__ == "__main__":

    crunchbase_urls = [
        "https://www.crunchbase.com/acquisition/emtec-acquires-codescience--6820e40e",
        "https://www.crunchbase.com/acquisition/the-clean-space-acquires-abelian--0bde0ea6",
        "https://www.crunchbase.com/acquisition/infogain-acquires-impaqtive--09bec631",
        "https://www.crunchbase.com/acquisition/icreon-communications-acquires-revolve-softech--4a3a3778",
        "https://www.crunchbase.com/acquisition/bullhorn-acquires-kona-datasearch--3b244d42",
        "https://www.crunchbase.com/acquisition/stand-8-acquires-peergenics--4967cfde",
        "https://www.crunchbase.com/acquisition/zs-associates-acquires-digital-additive--10193fac",
        "https://www.crunchbase.com/acquisition/trilliad-acquires-sercante--ca2e3796",
        "https://www.crunchbase.com/acquisition/techtorch-acquires-astuteforce--89b3994d",
        "https://www.crunchbase.com/acquisition/sbi-the-growth-advisory-acquires-carabiner-group--1d7861e6",
        "https://www.crunchbase.com/acquisition/a5-acquires-snap-business-intelligence--b509fcf9",
        "https://www.crunchbase.com/acquisition/diabsolut-acquires-emelar-consulting-group--34b800f7",
        "https://www.crunchbase.com/acquisition/salesforce-acquires-phennecs--dec6c514",
        "https://www.crunchbase.com/acquisition/canidium-acquires-allyn-it--64d2a2a0",
        "https://www.crunchbase.com/acquisition/osf-global-services-acquires-original-shift--f1de8962",
        "https://www.crunchbase.com/acquisition/wunderman-acquires-fenom-digital--8942de7a",
        "https://www.crunchbase.com/acquisition/milestone-technologies-inc-acquires-suyati-technologies--539417f7",
        "https://www.crunchbase.com/acquisition/cambridge-technology-enterprises-acquires-appshark-software--01216004",
        "https://www.crunchbase.com/acquisition/dispatch-2-acquires-youreka-labs--78113245",
        "https://www.crunchbase.com/acquisition/citiustech-acquires-wilcosource--7b1fb9e0",
        "https://www.crunchbase.com/acquisition/accenture-acquires-incapsulate--bc5f7254",
        "https://www.crunchbase.com/acquisition/majescomastek-acquires-mst-solutions-llc--ebf9d66e",
        "https://www.crunchbase.com/acquisition/bitwise-industries-acquires-esor-consulting-group--aa0bb191",
        "https://www.crunchbase.com/acquisition/apps-associates-acquires-forcivity-inc--2305e367",
        "https://www.crunchbase.com/acquisition/wipfli-llp-acquires-apostletech--c2dfe8a9",
        "https://www.crunchbase.com/acquisition/intelliswift-software-newark-california-acquires-global-infotech--6abe4a45",
        "https://www.crunchbase.com/acquisition/digital-mass-8931-acquires-trail-two--4901f52c",
        "https://www.crunchbase.com/acquisition/k2-partnering-solutions-acquires-cloud-pacific--b98fd786",
        "https://www.crunchbase.com/acquisition/invisory-acquires-gtm-guides--55abb657",
        "https://www.crunchbase.com/acquisition/nmi-acquires-iris-crm--74732e2d",
        "https://www.crunchbase.com/acquisition/mphasis-acquires-silverline--9f5d3159",
        "https://www.crunchbase.com/acquisition/salesforce-acquires-troops-inc--ed3150ac",
    ]
    scraper = CrunchbaseScraper(crunchbase_urls, headless_mode=False)
    scraper.scrape()
    print(scraper.generate_markdown_table())
    scraper.download_logos()
