import sys

sys.path.append(".")

import inspect
# from utils.logger import ServiceLogger
import logging
from datetime import datetime, timezone
from typing import List, Literal

import feedparser
import requests
from bs4 import BeautifulSoup

from services.PressRelease.PRSources.data_models.PR_Model import PRModel
from utils.dynamo_db import DynamoDB
from utils.url_parser import parsed_url

XCM_logger = logging.getLogger()


class PRParser:
    "Class to parse press release data from RSS feeds"

    request_headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }

    def __init__(self, feed_url: str, upload_to_db: bool = True, check_db: bool = True):
        "Initialize the PRParser with the feed URL"

        self.feed_url = feed_url
        self.upload_to_db = upload_to_db
        self.check_db = check_db

        self.source = parsed_url(feed_url).domain
        XCM_logger.debug("Initialized PRParser with feed URL: %s", feed_url)

        self.pr_feed_data = self.fetch_data_from_rss_feed()

        if upload_to_db:
            self.store_data_in_dynamodb()

    def fetch_data_from_rss_feed(self) -> List[dict]:
        "Fetch data from the RSS feed and return a list of dictionaries"
        XCM_logger.debug("Fetching data from RSS feed: %s", self.feed_url)
        response = requests.get(
            self.feed_url, verify=False, timeout=10, headers=self.request_headers
        )
        feed = feedparser.parse(response.content)

        if feed.bozo:  # Check if the feed was parsed successfully
            XCM_logger.error(
                "Error parsing feed: %s", feed.bozo_exception, exc_info=True
            )
            return []

        entries = feed.entries
        data = []
        for entry in entries:

            if self.check_PR_in_db(entry.link):
                continue

            pr_entry = PRModel(
                title=entry.title,
                url=entry.link,
                date=entry.get("published", entry.get("updated", "")),
                company_name=getattr(entry, "author", None),
                company_url=entry.get("author_detail", {}).get("href"),
                text=self.fetch_pr_text(entry.link),
                source=self.source,
            )
            pr_entry.update_modified()
            data.append(pr_entry)

        XCM_logger.info("Fetched %s entries from RSS feed", len(data))
        return data

    @classmethod
    def fetch_pr_text(cls, url: str) -> str:
        "Fetch the text content of the press release from the URL"
        try:
            XCM_logger.info("Fetching PR text from URL: %s", url)
            response = requests.get(
                url, verify=False, timeout=10, headers=cls.request_headers
            )
            response.raise_for_status()

            soup = BeautifulSoup(response.content, "html.parser")

            # Remove script and style elements
            for script in soup(["script", "style"]):
                script.decompose()

            # Get text
            text = soup.get_text()

            # Break into lines and remove leading and trailing space on each
            lines = (line.strip() for line in text.splitlines())
            # Break multi-headlines into a line each
            chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
            # Drop blank lines
            text = "\n".join(chunk for chunk in chunks if chunk)

            XCM_logger.info("Fetched PR text from URL: %s", url)
            return text

        except requests.exceptions.HTTPError as e:

            XCM_logger.error("Error fetching PR text from URL: %s", url, exc_info=True)

            return ""

    def check_PR_in_db(self, pr_link: str) -> bool:
        "Check if the press release with the given link is already in the database"
        if not self.check_db:
            return False

        dynamo_db = DynamoDB()
        table = dynamo_db.press_releases
        item = dynamo_db.get_item(table, pr_link)
        if item is None:
            return False
        return True

    def store_data_in_dynamodb(self, items: List[PRModel]):
        "Store the press release data in DynamoDB"
        dynamo_db = DynamoDB()
        table = dynamo_db.press_releases

        if not items:
            items = self.pr_feed_data

        for pr_entry in items:
            pr_entry_dict = pr_entry.model_dump()
            pr_entry_dict["date"] = pr_entry.date.isoformat()
            pr_entry_dict["created_date"] = pr_entry.created_date.isoformat()
            pr_entry_dict["modified_date"] = pr_entry.modified_date.isoformat()
            dynamo_db.upload_to_dynamodb(table, pr_entry_dict)

        XCM_logger.info("Stored %s entries in DynamoDB", len(self.pr_feed_data))

    @classmethod
    def upload_pr_model(cls, item: PRModel) -> bool:
        "Return the press release information"
        cls.store_data_in_dynamodb([item])


if __name__ == "__main__":
    prlog_rss_url = "https://www.prlog.org/news/ind/financial/rss.xml"
    parsed_pr_feed = PRParser(prlog_rss_url, upload_to_db=False, check_db=False)
    print(parsed_pr_feed.pr_feed_data)

    pressrelease_rss_url = "https://rss.24-7pressrelease.com/rss/business_finance.xml"
    handler = PRParser(pressrelease_rss_url, upload_to_db=False, check_db=False)
    print(handler.pr_feed_data)
