o
    sgR                     @   s"  d dl Z e jd d dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZmZ d d	lmZmZ d d
lmZ d dlmZ d dlmZ eeejd dZG dd dZG dd dZ e!dkre Z"e"#e"j$dZ%edi e%Z%dZ&e e&e%Z'dS dS )    N.)Document)InvalidXmlError)Table)	Paragraph)ChatPromptTemplate)
ChatOpenAIOpenAIEmbeddings)EMBEDDING_MODELOPENAI_MODEL_35)Project)DynamoDB)S3BucketStorageOPENAI_API_KEY)modelapi_keyc                   @   s   e Zd ZdZdZdZdZee	de
ddZdZd	ed
edefddZd#dedefddZedefddZedefddZd#dedefddZdedefddZded
efd d!Zd"S )$ConvertTablezConvert a table to text.z	|z



z_+_r   r   )r   r   temperaturei  tabledocument_nameprojectc                 C   s   || _ || _|| _d S Nr   r   r   )selfr   r   r    r   =/var/www/html/XCapMarket/utils/document_loader/word_loader.py__init__.   s   
zConvertTable.__init__ 	prior_strreturnc              	   C   sv   g }| j jD ]}z| jdd |jD }|| W q ty$   Y qw | j|}| ||}| 	||}||fS )zConverts a table to text.c                 S   s   g | ]}|j qS r   )text).0cellr   r   r   
<listcomp>:   s    z6ConvertTable.convert_table_to_text.<locals>.<listcomp>)
r   rowscol_delimiter_textjoincellsappendr   row_delimiter_textget_table_summaryupload_table_to_dynamodb)r   r   	rows_textrowrow_text	table_strtable_summary
table_namer   r   r   convert_table_to_text3   s   z"ConvertTable.convert_table_to_textr1   c                 C   s   t  }||j|}|S )zGet the table from dynamodb)r   get_itemdocument_tables)clsr1   dbr   r   r   r   get_table_from_dynamodbG   s   z$ConvertTable.get_table_from_dynamodb
table_textc                 C   s8   | | j}d}|D ]}|d|| jd d 7 }q
|S )z!Decode the table text to markdownr   |)splitr)   replacer%   )r5   r8   
table_rowstable_markdownr-   r   r   r   decode_table_textO   s
   zConvertTable.decode_table_textr/   c              
   C   sT   t ddd| j dfddddd	d
g}|| jB }|||| j| j| jd}|jS )z*Use openAI to get the summary of the table)systemzYou are a financial analyst and you have been given a table with company data.
              You need to summarize the table in 3-4 sentences.humanzThe document name is z where the table originates.)r@   z/The information before the table is {prior_str})r@   z&The rows of the table are: {table_str})r@   z)The column delimiter is: {delimiter_text})r@   z*The row delimiter is: {row_delimiter_text})r?   zEPlease summarize the table in less than {summary_token_limit} tokens.)r?   z&Only return the summary, nothing else.)r   r/   delimiter_textr)   summary_token_limit)	r   from_messagesr   llm35invoker%   r)   rB   content)r   r/   r   chat_promptchainchain_outputr   r   r   r*   [   s.   

zConvertTable.get_table_summaryr0   c                 C   s8   |  | jj| j}|||d}t }|j|j|d |S )zUpload the table to dynamodb)	unique_idr/   r0   )r   data)create_table_namer   
project_idr   r   upload_to_dynamodbr4   )r   r/   r0   
_unique_id_itemr6   r   r   r   r+      s   z%ConvertTable.upload_table_to_dynamodbrM   c                 C   s   | j ||tt g}|S )zget table name)file_delimiter_textr&   strtime)r   rM   r   r1   r   r   r   rL      s   zConvertTable.create_table_nameNr   )__name__
__module____qualname____doc__r%   r)   rQ   r   osgetenvr   rD   rB   r   rR   r   r   r2   classmethodr7   r>   r*   r+   rL   r   r   r   r   r   !   s$    &r   c                   @   s*   e Zd ZdZdedefddZdd ZdS )	LoadWordDocz%Load a word doc and extract the text.doc_pathr   c                 C   s(   || _ || _tj|| _|  | _d S r   )r]   r   rY   pathbasename	file_nameload_docdocument_text_chunked)r   r]   r   r   r   r   r      s   zLoadWordDoc.__init__c                 C   sR  | j drt }|| j }tt|}n| j dr.tj| j dd}tt|j	}nt| j }d}d}g }|
 D ]]}t|trT|d|j 7 }|jdvrS|j}q=t|tr|dkre||dd	 dd
d |dD dd }	|d }t|| j| jd}
|
j|	d\}}||7 }||d|d d}q=|dkr||dd	 |S )zLoad the word doczs3://zhttps://
   )timeoutr   
)r    re   r    )r    typec                 S   s   g | ]}|d kr|qS rT   r   )r!   i_strr   r   r   r#      s    z(LoadWordDoc.load_doc.<locals>.<listcomp>Nr   )r   r   )r    rg   r1   )r]   
startswithr   get_file_from_s3_bucketr   ioBytesIOrequestsgetrF   iter_inner_content
isinstancer   r    r   r(   r&   r:   r   r`   r   r2   )r   s3brF   docrlast_str	chunk_strrb   rP   table_prior_chunk
conv_tabler/   r1   r   r   r   ra      sR   





zLoadWordDoc.load_docN)rU   rV   rW   rX   rR   r   r   ra   r   r   r   r   r\      s    r\   __main__navis_pack_shipzYs3://xcap-storage-dev/Sunbelt/navis_pack_ship/Navis - Questionnaire w Larry Feedback.docxr   )(sysr^   r(   rl   rY   rS   rn   docxr   docx.oxml.exceptionsr   
docx.tabler   docx.text.paragraphr   langchain.promptsr   langchain_openair   r	   configs.configr
   r   +services.ppt_generator.data_classes.projectr   utils.dynamo_dbr   utils.s3_storager   environ	openai_efr   r\   rU   r6   r3   projectsr   r]   ra   r   r   r   r   <module>   s:   vD