"Format Public Comps as a table using LLM"

import sys

sys.path.append(r".")

import json
import os

from dotenv import load_dotenv

load_dotenv()

from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from pydantic import BaseModel, Field

from configs.config import OPENAI_MODEL_35


class TableOutput(BaseModel):
    data: list[list[str | float | int]] = Field(
        ..., title="List of list for the table's data"
    )


def format_for_table(data: dict) -> list[list[str | float | int]]:
    llm = ChatOpenAI(
        model=OPENAI_MODEL_35, temperature=0.2, api_key=os.getenv("OPENAI_API_KEY")
    )

    llm_input = {}
    for ticker in data:
        comp_table = data[ticker]["comp_table"]
        # divide the numbers by 10^9 to make them more readable
        # make it a general function
        for key in comp_table:
            if isinstance(comp_table[key], int):
                if comp_table[key] > 10**5:
                    comp_table[key] = float(comp_table[key] / 10**9)
            elif isinstance(comp_table[key], dict):
                for sub_key in comp_table[key]:
                    if isinstance(comp_table[key][sub_key], int):
                        if comp_table[key][sub_key] > 10**5:
                            comp_table[key][sub_key] = comp_table[key][sub_key] / 10**9
        llm_input[ticker] = comp_table

    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", "You are an expert with the Python PPTX library"),
            (
                "system",
                "You are going to create a table for the data provided. Return the data as a list of list",
            ),
            (
                "system",
                "Don't provide the ticker column and add an empty column between Name and the data",
            ),
            (
                "system",
                "If there are units such as Billions or Millions consistent across the column, add it to the header. Don't add it to the Multiples like EV / Revenue",
            ),
            ("human", "The data you have to work with is: {data}"),
            ("human", "Return the in JSON: {json_format}"),
            ("placeholder", "{agent_scratchpad}"),
        ]
    )

    parser = JsonOutputParser(pydantic_model=TableOutput)

    chain = prompt | llm | parser
    output = chain.invoke(
        {"data": json.dumps(llm_input), "json_format": parser.get_format_instructions()}
    )

    return output


if __name__ == "__main__":

    from services.stock_info.public_comparable import PublicComparables

    pc = PublicComparables(["AAPL", "TSLA", "META", "MSFT"])

    print(format_for_table(pc.data))
