import sys

sys.path.append(".")

import asyncio

# from utils.logger import ServiceLogger
import logging
import os
import typing

from dotenv import load_dotenv
from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain.prompts import ChatPromptTemplate
from langchain.tools import tool
from langchain_openai import ChatOpenAI

from configs.config import OPENAI_EF, OPENAI_MODEL_4O, OPENAI_MODEL_MINI
from services.ppt_generator.data_classes.project import Project
from utils.chroma_db import ChromaDB

# from utils.config import OPENAI_EF, OPENAI_MODEL_MINI, OPENAI_MODEL_4O
from utils.client_check import ClientConfig
from utils.document_loader.DocUploader import DocUploader
from utils.document_loader.word_loader import ConvertTable
from utils.researcher.perplexity import PerplexityResearch
from utils.researcher.researcher_v2 import Researcher as ResearcherV2

XCM_logger = logging.getLogger()

load_dotenv()

llm35 = ChatOpenAI(
    api_key=os.getenv("OPENAI_API_KEY"),
    model=OPENAI_MODEL_MINI,
    temperature=0.1,
    request_timeout=5,
    max_retries=3,
)

llm35_temp_05 = ChatOpenAI(
    api_key=os.getenv("OPENAI_API_KEY"),
    model=OPENAI_MODEL_MINI,
    temperature=0.5,
    request_timeout=5,
    max_retries=3,
)

llm4o_sub_query = ChatOpenAI(
    model=OPENAI_MODEL_4O,
    api_key=os.getenv("OPENAI_API_KEY"),
    temperature=0.5,
    request_timeout=5,
    max_retries=3,
)

DocClassificationType = typing.Literal[*DocUploader.classification]


class RetrieveFromVectorDB:
    """
    A class to retrieve data from the Chroma database.

    This class provides methods to interact with a vector database collection
    using a specified project ID and client Chroma collection.
    """

    async_timeout = 120
    async_timeout_internal = 60

    def __init__(
        self,
        project_id: str,
        client_chroma_collection: str,
    ):
        "Retrieve data from the chroma database"
        self.project_id = project_id

        # ChromaDB collection
        chroma_db = ChromaDB()
        self.vectordb_collection = chroma_db.chroma_client.get_collection(
            client_chroma_collection, embedding_function=OPENAI_EF
        )

        self.agent_executor = None
        # check if table, if so return the table

    def project_contain_docs(self):
        "Check if the project contains documents"
        ids = self.vectordb_collection.query(where={"project_id": self.project_id})
        return len(ids) > 0

    def query(
        self,
        query: str,
        exclude_doc_types: typing.Literal[tuple(DocUploader.classification)] = None,
        n_results: int = 5,
    ):
        "Query the chroma database"
        try:
            n_results = max(n_results, 5)
            if exclude_doc_types is None:
                exclude_doc_types = []

            items = self.vectordb_collection.query(
                where=self.create_where_filter(exclude_doc_types),
                query_texts=[query],
                n_results=n_results,
            )
            return items
        except Exception as e:
            logging.error(e)
            return []

    def create_where_filter(self, exclude_doc_types: list = None):
        "Create a where filter for the query."

        if exclude_doc_types is None or len(exclude_doc_types) == 0:
            return {
                "project_id": self.project_id,
            }

        if isinstance(exclude_doc_types, str):
            exclude_doc_types = [exclude_doc_types]

        where = {
            "$and": [
                {"project_id": self.project_id},
                {"doc_classification": {"$nin": exclude_doc_types}},
            ]
        }
        return where

    def return_docs_or_tables(self, items):
        "Return the documents or tables from the items."
        docs_strs = []
        for i, doc in enumerate(items["metadatas"][0]):
            if doc.get("table_name", None) not in [None, ""]:
                table = self.get_table_from_dynamo(doc["table_name"])
                docs_strs.append("Table below is in markdown: \n" + table)
            else:
                docs_strs.append(items["documents"][0][i])
        return docs_strs

    def get_table_from_dynamo(self, table_name):
        "Get the table from dynamo db."
        table = ConvertTable.get_table_from_dynamodb(table_name)
        table_markdown = ConvertTable.decode_table_text(table["table_str"])
        return table_markdown

    async def process_doc(self, doc_str: str, question: str):
        """Process a single document."""

        ## check if the loop is already running
        loop = asyncio.get_running_loop()
        if loop.is_running():
            print("Loop is running before the async call")
        else:
            print("Loop is not running, starting the async call")

        try:
            chat_prompt = ChatPromptTemplate.from_messages(
                [
                    (
                        "system",
                        """\
                        You are an Investment Banking managing director, creating a detail overview of a company for a buyer. \
                        You strive to provide data-points when answering the question.
                        You are not providing an opinion but rather a factual answer and current analysis. \
                        You want to make your answer specific to the company and quantitative, if possible, in nature. \
                        When comparing company trends to industry trends and you don't have enough information use [XXX] as a placeholder. \
                        If you don't have enough information, do not try to answer the question and only return 'Documents don't contain enough information'.\
                        """,
                    ),
                    (
                        "system",
                        """
                        Answer the question based on the information provided and append to the answer if relevant information is provided. \
                        You want to understand the previous answer before adding to it.
                        """,
                    ),
                    ("human", "The question you are answering is: {question}"),
                    ("human", "Information you have access to: {doc_str}"),
                ]
            )

            chain = chat_prompt | llm35_temp_05
            chain_output = await chain.ainvoke(
                {"question": question, "doc_str": doc_str},
                max_retries=3,
                timeout=10,
            )
            # print(chain_output)
            return chain_output.content
        except Exception as e:
            import traceback

            traceback.print_exc()
            print(e)
            XCM_logger.error(e, exc_info=True)
            return "This document wasn't processed, you can try a different one."

    # def answer_question_from_docs(self, docs: list, question: str):
    #     """Answer the question from the documents."""
    #     past_summary = []

    #     async def process_docs(docs, question):
    #         tasks = [self.process_doc(doc_str, question) for doc_str in docs]
    #         results = await asyncio.gather(*tasks, return_exceptions=True)
    #         return results

    #     results = asyncio.run(process_docs(docs, question))
    #     # results = results.result()
    #     for result in results:
    #         past_summary.append(result)
    #     return "\n\n".join(past_summary)

    def answer_question_from_docs(self, docs: list, question: str):
        chat_prompt = ChatPromptTemplate.from_messages(
            [
                (
                    "system",
                    """\
                        You are an Investment Banking managing director, creating a detail overview of a company for a buyer. \
                        You strive to provide data-points when answering the question.
                        You are not providing an opinion but rather a factual answer and current analysis. \
                        You want to make your answer specific to the company and quantitative, if possible, in nature. \
                        When comparing company trends to industry trends and you don't have enough information use [XXX] as a placeholder. \
                        If you don't have enough information, do not try to answer the question and only return 'Documents don't contain enough information'.\
                        """,
                ),
                (
                    "system",
                    """
                        Answer the question based on the information provided and append to the answer if relevant information is provided. \
                        You want to understand the previous answer before adding to it.
                        """,
                ),
                ("human", "The question you are answering is: {question}"),
                ("human", "Information you have access to: {doc_str}"),
            ]
        )

        chain = chat_prompt | llm35_temp_05
        batch_output = chain.batch(
            [{"question": question, "doc_str": doc_str} for doc_str in docs]
        )

        return "\n\n".join([result.content for result in batch_output])


class InternalSearch:
    "Class for the primary research for the project."
    async_timeout = 120

    def __init__(self, project: Project, client: ClientConfig = None, query=""):

        self.project = project
        self.client = client
        self.main_question = query

        # global retrieve_from_vector_db
        if client is None:
            raise ValueError("Client cannot be empty.")

    def project_contain_docs(self):
        "Check if the project contains documents"
        retrieve_from_vector_db = RetrieveFromVectorDB(
            project_id=self.project.project_id,
            client_chroma_collection=self.client.chroma_db,
        )
        ids = retrieve_from_vector_db.vectordb_collection.get(
            where={"project_id": self.project.project_id}
        )
        return len(ids["ids"]) > 0

    @staticmethod
    @tool
    def search_internal(
        query: str,
        n_results: int = 5,
        project_id: str = None,
        client_chroma_collection: str = None,
    ) -> str:
        """
        Search the internal database for information

        Args:
            query (str): Question to search for in the database which should be at minimum of 20 words
            project_id (str): Project to search in
            client_chroma_collection (str): Collection to search in
            n_results (int, optional): Number of document chunks to search for. Defaults to 5.

        Returns:
            str: Answer to the question
        """
        if project_id is None:
            return "Project ID cannot be empty."
        if client_chroma_collection is None:
            return "Client chroma collection cannot be empty."
        if query is None:
            return "Query cannot be empty."

        try:
            retrieve_from_vector_db = RetrieveFromVectorDB(
                project_id=project_id,
                client_chroma_collection=client_chroma_collection,
            )
            docs = retrieve_from_vector_db.return_docs_or_tables(
                retrieve_from_vector_db.query(query, n_results=n_results)
            )
            return retrieve_from_vector_db.answer_question_from_docs(docs, query)
        except Exception as e:
            import traceback

            traceback.print_exc()
            print(e)
            XCM_logger.error(e, exc_info=True)
            return "This search was unsuccessful, you can try a different question."

    @staticmethod
    @tool
    def question_generation(question: str) -> str:
        """
        Taking a question, generate the relevant sub-questions to answer the main question.
        """

        chat_prompt = ChatPromptTemplate.from_messages(
            [
                (
                    "system",
                    """
            You are an investment banking managing director.
            You are doing a deep dive into a company and need to answer the question provided.
            You have access to an internal database where you can ask natural language questions.
            Take the question and a sub-question that needs to be answered to get a comprehensive answer.
            """,
                ),
                ("human", """The main question is: {question}"""),
                (
                    "human",
                    """Your output should be a sub-question that assists in answering the main question. It will be passed to another LLM like you. So provide the necessary details.""",
                ),
            ]
        )

        chain = chat_prompt | llm4o_sub_query
        chain_output = chain.invoke({"question": question})
        return chain_output.content

    @staticmethod
    @tool
    def simple_web_search(
        question: str,
        persona: str = "You are a research expert specifically for investment banking needs",
    ) -> str:
        """Search the web for information.

        Args:
            question (str): The question to search for.
            persona (str, optional): The persona to use. Defaults to None.

        Returns:
            answer (dict): The answer to the question.
                answer: str, citations: list(url)
        """
        try:
            perplexity_research = PerplexityResearch()
            messages = [("system", "You are a helpful research agent.")]
            if persona:
                messages.append(("system", persona))
            messages.append(("human", question))
            response = perplexity_research.ask_perplexity(messages)
            return perplexity_research.pretty_print(response)
        except Exception as e:
            print(e)
            return "This search was unsuccessful, you can try a different question."

    def break_down_question_and_answer(self, question: str) -> str:
        "Breakdown the main question and answer it with relevant docs."

        # tools = [self.search_internal, self.question_generation, self.simple_web_search]
        tools = [self.search_internal, self.question_generation]

        chat_prompt = ChatPromptTemplate.from_messages(
            [
                (
                    "system",
                    """You are an Investment Banking managing director.
            You are going to be asked a high-level question about a company. \
            You should aim to answer the question comprehensively and provide a detailed answer.\
            You want to make sure to provide a detailed answer with data points and comparisons to the relevant industry.\
            """,
                ),
                (
                    "system",
                    """You have access to tools. You should use them to answer the question.\
                Before breaking down the question further, always try to answer the question with the internal search tool.\
                If the answer is not sufficient, then breakdown the question to get sub-questions and a research plan. \
                Rely on the internal search tool to get information and then only augment the information with external sources using the simple web search tool.""",
                ),
                (
                    "human",
                    """
            Breakdown the slide into the questions you want the slide to answer and then use the tools to answer those questions.
            Ensure the questions are answered, otherwise invoke the tools again to answer the questions for a maximum of 4 times.
            If there is a statistic about the company, you want to make sure to compare it against the industry overall or an average using the Researcher tool.
            For financial questions, return the numerical value and the source of the information.""",
                ),
                ("human", "The company we are researching is: {company_name}"),
                (
                    "human",
                    """Some tools are going to ask for a project Id: {project_id} and \
                    a client chroma collection: {client_chroma_collection}. Make sure to pass it in.""",
                ),
                (
                    "human",
                    "You have the following information about the company: {company_project}",
                ),
                ("human", "The question you are answering is: {main_question}"),
                ("placeholder", "{agent_scratchpad}"),
            ]
        )

        agent = create_tool_calling_agent(
            tools=tools,
            llm=llm35,
            prompt=chat_prompt,
        )

        self.agent_executor = AgentExecutor(
            agent=agent,
            tools=tools,
            verbose=True,
        )

        retry = 1
        while retry < 3:
            try:
                agent_output = self.agent_executor.invoke(
                    {
                        "main_question": question,
                        "company_name": "\n".join(self.project.company_alt_names),
                        "project_id": self.project.project_id,
                        "client_chroma_collection": self.client.chroma_db,
                        "company_project": self.project.stringify_company_info(),
                    }
                )
                return agent_output["output"]
            except Exception as e:
                retry += 1
                print(e)
                print("retrying...")

        return "Failed to get an answer"


if __name__ == "__main__":

    pr = Project.check_project_in_db("sonivate_xcm")
    customer = ClientConfig("xcm").get_client_config()

    search = InternalSearch(
        pr,
        customer,
    )
    search_answer = search.break_down_question_and_answer(
        "How much much has the company raised? "
    )
    print(search_answer)