o
    rgnF                     @   s  d dl Z e jd d dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
mZ d dlmZ d dlmZ d dlmZ d dlmZmZmZ d d	lmZ d d
lmZ d dlmZ d dlmZmZ d dlm Z  d dl!m"Z" d dl#m$Z% e& Z'e  ee(deddddZ)ee(deddddZ*eee(dddddZ+G dd dZ,G dd dZ-e.dkre/dZ0ed1 Z2e-e0e2Z3e34dZ5e6e5 dS dS )    N.)load_dotenv)AgentExecutorcreate_tool_calling_agent)ChatPromptTemplate)tool)
ChatOpenAI)	OPENAI_EFOPENAI_MODEL_4OOPENAI_MODEL_MINI)Project)ChromaDB)ClientConfig)DocUploaderDOCUMENT_CLASSIFICATIONS)ConvertTable)PerplexityResearch)
ResearcherOPENAI_API_KEYg?      )api_keymodeltemperaturerequest_timeoutmax_retriesg      ?)r   r   r   r   r   c                   @   s   e Zd ZdZdZdZdedefddZdd	 Z	
	ddede	j
ee defddZddefddZdd Zdd ZdedefddZdedefddZd
S ) RetrieveFromVectorDBz
    A class to retrieve data from the Chroma database.

    This class provides methods to interact with a vector database collection
    using a specified project ID and client Chroma collection.
    x   <   
project_idclient_chroma_collectionc                 C   s(   || _ t }|jj|td| _d| _dS )z&Retrieve data from the chroma database)embedding_functionN)r   r   chroma_clientget_collectionr	   vectordb_collectionagent_executor)selfr   r    	chroma_db r(   =/var/www/html/XCapMarket/utils/researcher/chroma_search_v2.py__init__G   s   
zRetrieveFromVectorDB.__init__c                 C   s    | j jd| jid}t|dkS )'Check if the project contains documentsr   wherer   )r$   queryr   len)r&   idsr(   r(   r)   project_contain_docsX   s   z)RetrieveFromVectorDB.project_contain_docsNr   r.   exclude_doc_types	n_resultsc              
   C   sj   zt |d}|du rg }| jj| ||g|d}|W S  ty4 } zt| g W  Y d}~S d}~ww )zQuery the chroma databaser   N)r-   query_textsr3   )maxr$   r.   create_where_filter	Exceptionloggingerror)r&   r.   r2   r3   itemser(   r(   r)   r.   ]   s   

zRetrieveFromVectorDB.queryc                 C   sL   |du s
t |dkrd| jiS t|tr|g}dd| jidd|iigi}|S )z$Create a where filter for the query.Nr   r   z$anddoc_classificationz$nin)r/   r   
isinstancestr)r&   r2   r-   r(   r(   r)   r6   s   s   

z(RetrieveFromVectorDB.create_where_filterc                 C   sf   g }t |d d D ]&\}}|dddvr%| |d }|d|  q
||d d |  q
|S )z.Return the documents or tables from the items.	metadatasr   
table_nameNN zTable below is in markdown: 
	documents)	enumerategetget_table_from_dynamoappend)r&   r:   	docs_strsidoctabler(   r(   r)   return_docs_or_tables   s   z*RetrieveFromVectorDB.return_docs_or_tablesc                 C   s   t |}t |d }|S )zGet the table from dynamo db.	table_str)r   get_table_from_dynamodbdecode_table_text)r&   r@   rK   table_markdownr(   r(   r)   rF      s   
z*RetrieveFromVectorDB.get_table_from_dynamodoc_strquestionc           	   
      s   t  }| rtd ntd ztg d}|tB }|j||ddddI dH }|jW S  t	yU } zd	dl
}|  t| tj|d
d W Y d}~dS d}~ww )zProcess a single document.z%Loop is running before the async callz,Loop is not running, starting the async call)systema                          You are an Investment Banking managing director, creating a detail overview of a company for a buyer.                         You strive to provide data-points when answering the question.
                        You are not providing an opinion but rather a factual answer and current analysis.                         You want to make your answer specific to the company and quantitative, if possible, in nature.                         When comparing company trends to industry trends and you don't have enough information use [XXX] as a placeholder.                         If you don't have enough information, do not try to answer the question and only return 'Documents don't contain enough information'.                        )rT   z
                        Answer the question based on the information provided and append to the answer if relevant information is provided.                         You want to understand the previous answer before adding to it.
                        )humanz-The question you are answering is: {question})rU   z)Information you have access to: {doc_str}rR   rQ   r   
   )r   timeoutNr   Texc_infoz<This document wasn't processed, you can try a different one.)asyncioget_running_loop
is_runningprintr   from_messagesllm35_temp_05ainvokecontentr7   	traceback	print_exc
XCM_loggerr9   )	r&   rQ   rR   loopchat_promptchainchain_outputr;   rc   r(   r(   r)   process_doc   s0   
z RetrieveFromVectorDB.process_docdocsc                    sB   t g d}|tB }| fdd|D }ddd |D S )NrS   c                    s   g | ]} |d qS )rV   r(   ).0rQ   rR   r(   r)   
<listcomp>   s    zBRetrieveFromVectorDB.answer_question_from_docs.<locals>.<listcomp>z

c                 S   s   g | ]}|j qS r(   )rb   )rl   resultr(   r(   r)   rn      s    )r   r_   r`   batchjoin)r&   rk   rR   rg   rh   batch_outputr(   rm   r)   answer_question_from_docs   s   z.RetrieveFromVectorDB.answer_question_from_docs)Nr   )N)__name__
__module____qualname____doc__async_timeoutasync_timeout_internalr>   r*   r1   typingLiteraltupler   intr.   listr6   rL   rF   rj   rs   r(   r(   r(   r)   r   <   s0    

Cr   c                   @   s   e Zd ZdZdZddedefddZd	d
 Ze	e
			ddededededef
ddZe	e
dedefddZe	e
	ddededefddZdedefddZdS )InternalSearchz/Class for the primary research for the project.r   NrB   projectclientc                 C   s&   || _ || _|| _|d u rtdd S )NzClient cannot be empty.)r   r   main_question
ValueError)r&   r   r   r.   r(   r(   r)   r*      s   zInternalSearch.__init__c                 C   s:   t | jj| jjd}|jjd| jjid}t|d dkS )r+   r   r    r   r,   r0   r   )r   r   r   r   r'   r$   rE   r/   )r&   retrieve_from_vector_dbr0   r(   r(   r)   r1   
  s   
z#InternalSearch.project_contain_docsr   r.   r3   r   r    returnc              
   C   s   |du rdS |du rdS | du rdS zt ||d}||j| |d}||| W S  tyO } zddl}|  t| tj	|dd	 W Y d}~d
S d}~ww )a  
        Search the internal database for information

        Args:
            query (str): Question to search for in the database which should be at minimum of 20 words
            project_id (str): Project to search in
            client_chroma_collection (str): Collection to search in
            n_results (int, optional): Number of document chunks to search for. Defaults to 5.

        Returns:
            str: Answer to the question
        NzProject ID cannot be empty.z)Client chroma collection cannot be empty.zQuery cannot be empty.r   )r3   r   TrY   ?This search was unsuccessful, you can try a different question.)
r   rL   r.   rs   r7   rc   rd   r^   re   r9   )r.   r3   r   r    r   rk   r;   rc   r(   r(   r)   search_internal  s.   zInternalSearch.search_internalrR   c                 C   s*   t g d}|tB }|d| i}|jS )ze
        Taking a question, generate the relevant sub-questions to answer the main question.
        ))rT   au  
            You are an investment banking managing director.
            You are doing a deep dive into a company and need to answer the question provided.
            You have access to an internal database where you can ask natural language questions.
            Take the question and a sub-question that needs to be answered to get a comprehensive answer.
            )rU   z The main question is: {question})rU   zYour output should be a sub-question that assists in answering the main question. It will be passed to another LLM like you. So provide the necessary details.rR   )r   r_   llm4o_sub_queryinvokerb   )rR   rg   rh   ri   r(   r(   r)   question_generationA  s   z"InternalSearch.question_generationCYou are a research expert specifically for investment banking needspersonac              
   C   sr   z!t  }dg}|r|d|f |d| f ||}||W S  ty8 } zt| W Y d}~dS d}~ww )a5  Search the web for information.

        Args:
            question (str): The question to search for.
            persona (str, optional): The persona to use. Defaults to None.

        Returns:
            answer (dict): The answer to the question.
                answer: str, citations: list(url)
        )rT   z!You are a helpful research agent.rT   rU   Nr   )r   rG   ask_perplexitypretty_printr7   r^   )rR   r   perplexity_researchmessagesresponser;   r(   r(   r)   simple_web_search_  s   
z InternalSearch.simple_web_searchc              
   C   s   | j | jg}tg d}t|t|d}t||dd| _d}|dk rbz| j|d	| j
j| j
j| jj| j
 d}|d	 W S  ty] } z|d7 }t| td
 W Y d}~nd}~ww |dk s"dS )z=Breakdown the main question and answer it with relevant docs.))rT   ac  You are an Investment Banking managing director.
            You are going to be asked a high-level question about a company.             You should aim to answer the question comprehensively and provide a detailed answer.            You want to make sure to provide a detailed answer with data points and comparisons to the relevant industry.            )rT   a  You have access to tools. You should use them to answer the question.                Before breaking down the question further, always try to answer the question with the internal search tool.                If the answer is not sufficient, then breakdown the question to get sub-questions and a research plan.                 Rely on the internal search tool to get information and then only augment the information with external sources using the simple web search tool.)rU   a  
            Breakdown the slide into the questions you want the slide to answer and then use the tools to answer those questions.
            Ensure the questions are answered, otherwise invoke the tools again to answer the questions for a maximum of 4 times.
            If there is a statistic about the company, you want to make sure to compare it against the industry overall or an average using the Researcher tool.
            For financial questions, return the numerical value and the source of the information.)rU   z1The company we are researching is: {company_name})rU   zSome tools are going to ask for a project Id: {project_id} and                     a client chroma collection: {client_chroma_collection}. Make sure to pass it in.)rU   zGYou have the following information about the company: {company_project})rU   z2The question you are answering is: {main_question})placeholderz{agent_scratchpad})toolsllmpromptT)agentr   verbose   r   
)r   company_namer   r    company_projectoutputzretrying...NzFailed to get an answer)r   r   r   r_   r   llm35r   r%   r   rq   r   company_alt_namesr   r   r'   stringify_company_infor7   r^   )r&   rR   r   rg   r   retryagent_outputr;   r(   r(   r)   break_down_question_and_answer{  sD   (
	z-InternalSearch.break_down_question_and_answerrA   )r   NN)r   )rt   ru   rv   rw   rx   r   r   r*   r1   staticmethodr   r>   r}   r   r   r   r   r(   r(   r(   r)   r      sF    
*r   __main__sonivate_xcmxcmz&How much much has the company raised? )7syspathrG   r[   r8   osrz   dotenvr   langchain.agentsr   r   langchain.promptsr   langchain.toolsr   langchain_openair   configs.configr	   r
   r   +services.ppt_generator.data_classes.projectr   utils.chroma_dbr   utils.client_checkr   !utils.document_loader.DocUploaderr   r   !utils.document_loader.word_loaderr   utils.researcher.perplexityr   utils.researcher.researcher_v2r   ResearcherV2	getLoggerre   getenvr   r`   r   r   r   rt   check_project_in_dbprget_client_configcustomersearchr   search_answerr^   r(   r(   r(   r)   <module>   st     A O
