o
    gB                     @   s   d Z ddlZddlZddlZddlmZ ddlZddlZddl	m
Z
 ddlmZ G dd dZedkrJe ZejejZeed	d
Zee dS dS )Get items from dynamodb    N)Decimal)Attr)DynamoConnectorc                   @   s  e Zd ZdZdZdd Zdd Zdd Zd9d
dZd:ddZ	de
fddZejejejjdd		d:ddZdefddZdd Zdd Zdd Zdd Zd d! Zed"d# Z	d;d%d&Zd<d(d)Zd9d*d+Z								d=d,d-Z			.d>d/e
d0e
d1e fd2d3Z!		d9d4d5Z"d?d7d8Z#d	S )@DynamoDBr   slides_researchc                 C   s   t  }|j| _| jd| _| jd| _| jd| _| jd| _| jd| _| jd| _| jd| _	| jd| _
| jd	| _| jd
| _| jd| _| jd| _| jd| _d S )NuserTeamcompany_informationLLM_resultscompany_listprojects
projectsV2document_tables	fund_infopress_releases	web_pagesproject_docsproject_structure)r   dynamodbTabler   teamcompany_info_table	llm_tablecompany_list_tabler   r   r   fund_info_tabler   r   r   r   )selfdb_connector r   +/var/www/html/XCapMarket/utils/dynamo_db.py__init__   s   zDynamoDB.__init__c                 C   s   |j S zGet the table name)name)r   tabler   r   r   get_table_name$   s   zDynamoDB.get_table_namec                 C   s   | j |S r!   )r   r   )r   
table_namer   r   r   	get_table(   s   zDynamoDB.get_tableNc                 C   s   |j d d }d}t|j dkr&|dur&|j d d }|j||||id}n2t|j dkrP|du rP|jt||d}d|vsHt|d dkrJdS |d d S |j||id}d	|vr^dS |d	 }|S )
r   r   AttributeNameN      KeyFilterExpressionItemsItem)
key_schemalenget_itemscanr   eq)r   r#   key_idsort_key_idroot_key_namesort_key_nameresponseitemr   r   r   r2   ,   s"   
zDynamoDB.get_itemc                 C   s   g }|du r
|  }|du r|j |d}n|j ||d}||dg  d|v r>| ||||}||dg  d|v s)|S )z>Get all items from dynamodb with an optional filter expressionNr,   )r-   ExpressionAttributeValuesr.   LastEvaluatedKey)r3   extendgetscan_with_backoff)r   r#   filter_expressionexpression_valuesitemsr9   r   r   r   
scan_tableD   s"   zDynamoDB.scan_tablelook_up_valuec                 C   s~   |j d d }ddlm} |j|||d}|dg }d|v r=|j||||d d}||dg  d|v s"|S )zQuery items from dynamodbr   r'   r*   )KeyConditionExpressionr.   r<   )rE   ExclusiveStartKey)r0   boto3.dynamodb.conditionsr+   queryr4   r>   r=   )r   r#   rD   table_partition_keyr+   r9   	documentsr   r   r   query_items[   s    zDynamoDB.query_items   )	max_triesc                 C   sF   |d u r|j |d dS |d u r|j |d |dS |j |d ||dS )Nr<   rF   rF   r-   )rF   r-   r;   )r3   )r   r9   r#   r@   rA   r   r   r   r?   s   s   zDynamoDB.scan_with_backoffpartition_keysc                 C   s2   |j |i}| jj|d}|di |j g }|S )z?get all of the items in the table with the given partition keysRequestItems	Responses)r"   r   batch_get_itemr>   )r   r#   rP   keysr9   rB   r   r   r   rT      s   
zDynamoDB.batch_get_itemc                 C   s   d}d}|d D ]}|d dkr|d }|d dkr|d }q|d }d|v r3|d	ks/|du r3|d }|d	ks;|du rK|d
  dd  dd  }||d
 t|d tr\|d d nd||dS )zEConvert the dynamodb items to the format outputted by the recommendorNr   categoryOverviewr9   
HQ_Foundedcompany_namer"    root_urlzwww..r   logologo_url)rY   company_urlr^   overviewhq_n_founded)split
capitalize
isinstancedict)r   company_info
llm_resultra   rb   resultsrY   r   r   r   convert_for_recommender   s,   z DynamoDB.convert_for_recommenderc                 C   s(   |  | j|}|  | j|}| ||S r   )r2   r   r   rj   )r   r[   rg   rh   r   r   r   get_item_for_recommender   s   z!DynamoDB.get_item_for_recommenderc                 C   s   |  | j|}|S rk   )r2   r   )r   r[   r   r   r   r   get_item_for_fund_recommender   s   z&DynamoDB.get_item_for_fund_recommenderc                    sV   t |tr fdd| D S t |tr fdd|D S t |tr)tt|S |S )Nc                    s   i | ]
\}}|  |qS r   convert_floats_to_decimals.0kvr   r   r   
<dictcomp>   s    z7DynamoDB.convert_floats_to_decimals.<locals>.<dictcomp>c                    s   g | ]}  |qS r   rn   )rq   elementrt   r   r   
<listcomp>   s    z7DynamoDB.convert_floats_to_decimals.<locals>.<listcomp>)re   rf   rB   listfloatr   str)r   r:   r   rt   r   ro      s   


z#DynamoDB.convert_floats_to_decimalsc                 C   sH   |  |}z	|j|d}W |S  ty#   td |j|d}Y |S w )zPut item into collectionr/   
   )ro   put_item	Exceptiontimesleep)r   r#   datar9   r   r   r   upload_to_dynamodb   s   

zDynamoDB.upload_to_dynamodbc                 C   s   d dd | D S )z5Remove non-printable characters from the text fields.rZ   c                 s   s    | ]	}|  r|V  qd S N)isprintable)rq   cr   r   r   	<genexpr>   s    z*DynamoDB._sanitize_text.<locals>.<genexpr>)join)textr   r   r   _sanitize_text   s   zDynamoDB._sanitize_text   c                    sF   j |}g }|D ]} fdd| D }|| q
tdt|dD ]{}	||	|	d  }
d}||krz!| }|
D ]}|j|d q=W d   n1 sPw   Y  W nI tj	j
y } ztd| d|d	  d
| d |d	7 }t| W Y d}~nd}~w ty } ztd|  W Y d}~n	d}~ww ||ks5q%dS )ag  
        Batch upload items into the DynamoDB table with retry logic.

        :param table_name: Name of the DynamoDB table.
        :param data_list: A list of dictionaries, each representing an item to upload.
        :param max_retries: Number of retries for unprocessed items.
        :param retry_delay: Time in seconds to wait before retrying.
        c                    s*   i | ]\}}|t |tr |n|qS r   )re   rz   r   rp   rt   r   r   ru      s    z5DynamoDB.batch_upload_to_dynamodb.<locals>.<dictcomp>r      r{   NzDynamoDB error: z, retrying r)   /z...zUnexpected error: )r   r   rB   appendranger1   batch_writerr}   botocore
exceptionsClientErrorprintr   r   r~   )r   r%   	data_listmax_retriesretry_delayr#   cleaned_data_listr:   sanitized_itemibatch_chunkretriesbatcher   rt   r   batch_upload_to_dynamodb   sB   

z!DynamoDB.batch_upload_to_dynamodbFc              
   C   s   |  | j|}|dur$| jjd|id|t tj ddd |S | jj||dt tj dt tj ddd	 dS )
z Get or update item in collectionNr[   z7set scrapped = :s, modified_by = :m, modified_date = :dz%Y-%m-%d %H:%M:%S)z:sz:mz:d)r+   UpdateExpressionr;   r   )r[   scrappedsize
created_bycreated_datemodified_bymodified_dater{   )	r2   r   update_itemsocketgethostnamedatetimenowstrftimer}   )r   r[   r   r9   r   r   r   create_or_update_company_list   s4   	

z&DynamoDB.create_or_update_company_listc                 C   s   g }|du r|  }n|j |d}|d }d|v rXz|du r'|j |d d}n	|j |d |d}||d 7 }W n tyS   td |j |d d}||d 7 }Y nw d|v s|S )z-For a given table, get all items in the tableNr,   r.   r<   rN   rO   r|   )r3   r~   r   r   )r   r#   r@   rB   r9   r   r   r   get_all_table_items  s2   

zDynamoDB.get_all_table_itemsc                 C   s   g }| d}	|p
i }||d< |r|r|	d| d7 }	||d< |	|d}
|r*||
d< |j di |
}||dg  d	|v rZ|d	 |
d
< |j di |
}||dg  d	|v s?|S )a  
        Query items from DynamoDB based on partition key, optionally sort key, and with an optional filter expression.
        Args:
            table: DynamoDB table object.
            partition_key_name: The name of the partition key.
            partition_key_value: The value of the partition key.
            sort_key_name: (Optional) The name of the sort key.
            sort_key_value: (Optional) The value of the sort key.
            filter_expression: (Optional) Additional filter expression.
            expression_values: (Optional) Expression attribute values for the filter expression.

        Returns:
            List of items matching the query.
        z = :partition_keyz:partition_keyz AND z = :sort_keyz	:sort_key)rE   r;   r-   r.   r<   rF   Nr   rH   r=   r>   )r   r#   partition_key_namepartition_key_valuer8   sort_key_valuer@   rA   rB   key_conditionquery_paramsr9   r   r   r   query_table>  s(   
zDynamoDB.query_table
project_id
index_namer   returnc                 C   s   g }|j || d| d| |id}||dg  d|v rC|j || d| d| |i|d d}||dg  d|v s!|S )zo
        Query items from DynamoDB based on index name, key condition, and optional expression values.
        z = ::)	IndexNamerE   r;   r.   r<   )r   rE   r;   rF   r   )r   r#   r   r   rA   sort_keyrB   r9   r   r   r   query_indexu  s"   	zDynamoDB.query_indexc                 C   s   |j ||||dS )zUpdate item in collection)r+   r   r;   ReturnValues)r   )r   r#   keyupdate_expressionrA   return_valuesr   r   r   r     s   zDynamoDB.update_itemd   c                 C   sp   g }t dt||D ]+}||||  }| jj|jd|iid}d|v r5|j|d v r5||d |j  q
|S )a/  
        Batch get items from DynamoDB table using a list of keys.

        Args:
            table: DynamoDB table object
            keys: List of key dictionaries
            chunk_size: Size of each batch request (max 100)

        Returns:
            List of items retrieved from DynamoDB
        r   KeysrQ   rS   )r   r1   r   rT   r"   r=   )r   r#   rU   
chunk_sizerB   r   chunkr9   r   r   r   batch_get_items  s   zDynamoDB.batch_get_itemsr   )NN)r   rL   )F)NNNN)Nr   )r   )$__name__
__module____qualname____doc__slides_research_tabler    r$   r&   r2   rC   rz   rK   backoffon_exceptionexpor   r   r   r?   rf   rT   rj   rl   rm   ro   r   staticmethodr   r   r   r   r   rx   r   r   r   r   r   r   r   r      sR    





.
#$
<

r   __main__r   greenjunkremoval)r   r   r   r   decimalr   r   botocore.exceptionsr   rG   r   utils.dynamo_connectorr   r   r   dbr   r   r   r#   r   rB   r   r   r   r   r   <module>   s&       ,