o
    g[M                     @   s  d Z ddlZddlZddlZddlZddlZddlZddlmZ ddl	m
Z
 ejd ddlmZ ddlmZmZ ddlmZmZmZ dd	lmZmZ dd
lmZ ddlmZ ddlmZ ddlm Z  ddl!m"Z" ddl#m$Z$ ddl%m&Z& ddl'm(Z( e  ej)d Z*ej)d Z+e Z,G dd dZ-de.de/fddZ0e1dkrddl2Z2e22 a3dD ] Z4dZ5dZ6e-e4e5e6dZ7e8e79  e22 Z:e;e:t3  qdS dS )z#Class to create the company profile    N)load_dotenvDocument.)pic_file_extension)LLM_queriespe_LLM_queries)CompanyInfoFundInfoLogo)
Leadership
LLMResults)load_sic_codes)ChromaDB)DynamoDB)LLMChat)
Compressor)GoogleSearch)SeleniumDriverChrome)
parsed_urlOPENAI_API_KEYBRAND_FETCH_API_KEYc                	   @   s   e Zd ZdZi ZdZdZdZg ZdZ	dZ
dZg Z	d/dedededdfdd	Zd0d
dZdd Zdd Zdd ZdefddZdd Zdd Zdd Zdd Zdd Zd1dd Zd!d" Zd#d$ Zd%d& Zd0d'efd(d)Zd*e fd+d,Z!d*e fd-d.Z"dS )2CompanyProfileMakerz2Create the company profile for a given company urlNFcompany_urlstock_ticker
is_pe_shopreturnc                 C   s6   t || _ | j j| _| j j| _|| _|| _t | _dS )z3Create the company profile for a given company url.N)	r   domaincompany_domainurlr   r   pe_shopr   llm_chat)selfr   r   r    r#   D/var/www/html/XCapMarket/services/company_profile/company_profile.py__init__4   s   


zCompanyProfileMaker.__init__c                    s   t d| j t|  |  |  I dH  t| || 	 I dH }|d | _
t|  |  I dH  t d| j dS )z+Main function to create the company profilez%%s: Starting company profile creationN   z%s: Company profile created)logginginfor   asynciogather
get_imagesget_brand_from_urlllm_runsproduct_to_sicmap_leadership
leadershipupload_to_dynamodbupload_to_chromadb)r"   sic_coderesults_sic_leadershipr#   r#   r$   main?   s   


zCompanyProfileMaker.mainc                    s    j stjntj} fdd|D }tj| I dH   |I dH } |I dH }|D ]}|d D ]}||d d|d< q3q-tj fdd|D  I dH }|D ]}|	d qR| _
dS )	zRun the llm queryc                    s   g | ]}  |d qS )search)search_google).0	llm_queryr"   r#   r$   
<listcomp>\   s    z0CompanyProfileMaker.llm_runs.<locals>.<listcomp>Nsearch_resultslink 	page_textc                    s   g | ]}  |qS r#   )ask_llm)r8   queryr:   r#   r$   r;   m   s    )r    r   queriesr   r)   r*   get_urls_from_resultsscrape_urlsgetpopllm_results)r"   llm_queriessearch_taskssearch_urlsurl_text_mapr9   search_resultr#   r:   r$   r-   U   s&   


zCompanyProfileMaker.llm_runsc                    s   dd |D S )z$Get the urls from the search resultsc                 S   s"   h | ]}|d  D ]}|d qqS )r<   r=   r#   )r8   resultr   r#   r#   r$   	<setcomp>w   s   " z<CompanyProfileMaker.get_urls_from_results.<locals>.<setcomp>r#   )r"   resultsr#   r#   r$   rC   u   s   z)CompanyProfileMaker.get_urls_from_resultsc                    sz   t  jd| jj d|d  ||dd| jdI dH }|s7t  jd| jj d|d  |d| jdI dH }||d< |S )	zSearch google for a given query[z] r6   check_domainF)domain_checkdomain_name_to_checkNr<   )r   r7   r   r   rE   r   )r"   r9   search_typer<   r#   r#   r$   r7   y   s    
z!CompanyProfileMaker.search_googler9   c                    s   t dd |d D s|ddstd| j d|d  t }d	d
 |d D }|| |j|d ddI dH }ddd |D }| j	||d I dH }||d< |S )z+Ask the LLM the questions for the llm_queryc                 s   s    | ]}|d  V  qdS )r?   Nr#   r8   rL   r#   r#   r$   	<genexpr>   s    
z.CompanyProfileMaker.ask_llm.<locals>.<genexpr>r<   allow_emptyTzNo search results for z - vector_db_queryc                 S   s2   g | ]}|d  rt |d  |d |d ddqS )r?   r=   title)sourcerY   )page_contentmetadatar   rU   r#   r#   r$   r;      s    z/CompanyProfileMaker.ask_llm.<locals>.<listcomp>   )kN
c                 s   s    | ]}|j V  qd S N)r[   )r8   docr#   r#   r$   rV      s    	LLM_queryresponse)
anyrE   
ValueErrorr   r   compress	aretrievejoinr!   openai_request)r"   r9   	retriever	documentsrO   combined_contentllm_responser#   r#   r$   r@      s(   

zCompanyProfileMaker.ask_llmc                 C   s2   || j v r
| j | S t }||}|| j |< |S )+Scrape data from a given url asynchronously)visited_websitesr   get_url_text)r"   url_to_scrapedriverr?   r#   r#   r$   
scrape_url   s   



zCompanyProfileMaker.scrape_urlc                    s(   ddl m} t|}||I dH }|S )rn   r   )crawl_pagesN)utils.webscrape.crawl_pagesrt   set)r"   urls_to_scrapert   rO   r#   r#   r$   rD      s
   zCompanyProfileMaker.scrape_urlsc                    s   ddt  d}d| jdd  }ztj||dd}|jd	kr.| jd
d  | _W dS W n tj	j
y;   Y dS w | }d|v rH|d n| j| _d|v rZ| |d I dH nd| _| |d I dH | _dS )a#  
        Fetches brand information from a given URL using the brandfetch API

        Args:
            url (str): The URL to fetch the text from.

        Returns:
            brand (dict):
                company_name (str): The name of the company
                logos (dict):
                    logo_url (str): The url of the logo
                    logo_extension (int): The file extension of the logo
                    logo_dark (bool): If the logo is dark or not
                linkedin (str): The linkedin url of the company
        zapplication/jsonzBearer )acceptAuthorizationz$https://api.brandfetch.io/v2/brands/z//r&      )headerstimeout   r   r   Nnamelogoslinks)BRAND_FETCH_APIr   splitrequestsrE   status_coder   rY   company_name
exceptionsReadTimeoutjsonfind_logo_from_logoslogofind_linkedin_from_reqcompany_linkedin)r"   request_headerrequest_urlreqr#   r#   r$   r,      s*   
z&CompanyProfileMaker.get_brand_from_urlc           
         s   t |dkrtd dS d}d}d}|D ]3}|d D ],}|d }|d dkr/t|d	  d nd
}|d dk}	t |dksA||k rG|}|}|	}qqt|||dS )z%Find the logo from the logos returnedr   zNO LOGO FOUNDFr>   formatssrcthemedarkformat
   )logo_urllogo_extension	logo_dark)lenprintr   r   )
r"   r   final_logo_urlfinal_logo_extensionfinal_logo_darkr   fr   r   r   r#   r#   r$   r      s4   z(CompanyProfileMaker.find_logo_from_logosc                    s*   |D ]}|d dkr|d g  S qg S )z)Find the linkedin from the links returnedr~   linkedinr   r#   )r"   r   r=   r#   r#   r$   r     s   z*CompanyProfileMaker.find_linkedin_from_reqc              	      s   | j r	dddS |du r"|du rt| jdn|}| j|I dH }zttt| d | _ttt| d | _	W n t
tfyK   d| _d| _	Y nw | j| j	dS )zlGet the SIC code for the company,
        if running outside of the class, product_info is required
        FundPrivate Equity)industrysectorNProductsr   r   )r    find_llm_responserG   r!   r.   sic_dictintfloatr   r   KeyErrorre   )r"   r3   product_infor#   r#   r$   r.   "  s*   
z"CompanyProfileMaker.product_to_sicc                    s   t  }|d| jj ddd| jdI dH }t|dk r8|d| jj ddd| jI dH }td	| j| j d
d |D | _	dS )zGet the images for the companyrP   z ] products | solutions | companyimagesT2   N   Fz"%s: Not enough images found for %sc                 S   s   g | ]
}d |d ddqS )i  imageUrlimg)countr   typer#   )r8   rM   r#   r#   r$   r;   W  s    z2CompanyProfileMaker.get_images.<locals>.<listcomp>)
r   r7   r   r   r   r   r'   warningr   r   )r"   google_searchr<   r#   r#   r$   r+   ?  s0   

zCompanyProfileMaker.get_imagesc                    s  t  }| jr9t| j| jd}t| j| j| j| j| j	d| j
| jd| jdur(| jnd|jd}||j|  dS t| j| j| j| j| j	d| j
| jd| jdurR| jndd
}||j|  t| j| j| jdurl| jndd}||jt|  |j| jdd dS )z&Upload the company profile to dynamodb)root_urlLLM_resultsN)r   r   r   r   r   business_modelr   r   stock_exchanger   r   )
r   r   r   r   r   r   r   r   r   r   )r   r   r0   T)scrapped)r   r    r   r   rG   r
   r   r   r   r   r   r   r   r   r1   fund_info_table
model_dumpr	   company_info_tabler0   	llm_tabler   loadsmodel_dump_jsoncreate_or_update_company_list)r"   dbllm_info	fund_infocompany_infor#   r#   r$   r1   \  sR   z&CompanyProfileMaker.upload_to_dynamodbc                    s   t  }| jr*| jt| jdt| jd t| jd dddd}||j|d d
S | jt| jd	t| jd| jd
ur>| jnd| jd
urG| jnd| j	d
urOdnd| j	d
urX| j	nd
d}||j
|d ||j|d d
S )z&Upload the company profile to chromadbOverviewzInvestment ThesisAUMr   r   F)r   documentr   r   publicr   r   Nr>   T)r   productoverviewr   r   r   r   r   r   )r   r    r   r   rG   add_itemfund_collectionr   r   r   product_collectionoverview_collection)r"   	chroma_dbchroma_itemr#   r#   r$   r2     s0   




z&CompanyProfileMaker.upload_to_chromadbcompany_leadershipc                    s   |du r#| j du rtdt| j d}|du sd|v r#dddigi}t }tdI dH  ||I dH }|ddsD||I dH }g }|d D ]}|t	d	i | qJd|iS )
z
        Map the leadership to linkedin and convert to structured data.
        If running outside of the lcass, company leadership is required"
        Nz*LLM results are required to map leadershipr   z	not foundr0   r~   g?Fr#   )
rG   re   r   r   r)   sleepr/   rE   appendr   )r"   r   r0   r!   new_leadershipleaderr#   r#   r$   r/     s    
z"CompanyProfileMaker.map_leadershipsupplied_company_infoc                 C   sn   |    | j|_| jr| jn|j|_| jr%|js| jg|_n|j| j |  t }||j	|
  |S )
        Should be used only if the brand is not found in the company_info
        The function needs a supplied company info input
        r,   r   r   r   r   extendupdate_modifyr   r1   r   r   r"   r   r   r#   r#   r$   non_async_load_brand  s   z(CompanyProfileMaker.non_async_load_brandc                    sv   |   I dH  | j|_| jr| jn|j|_| jr)|js"| jg|_n|j| j |  t }||j	|
  |S )r   Nr   r   r#   r#   r$   
load_brand  s   zCompanyProfileMaker.load_brand)NFr`   )NN)#__name__
__module____qualname____doc__ro   r   r   r   rG   r   r   r0   r   strboolr%   r5   r-   rC   r7   dictr@   rs   rD   r,   r   r   r.   r+   r1   r2   r/   r	   r   r   r#   r#   r#   r$   r   &   sL    

 (+
9(r   	responsescategory_to_findc                 C   s.   d}| D ]}|d |kr||d d 7 }q|S )z,Get the specific item from the LLM responsesr>   categoryrc   r_   r#   )r   r   result_returnrc   r#   r#   r$   r     s   r   __main__)zhttps://www.softworksgroup.com/F)r   r   )<r   r)   r   r'   ossysr   dotenvr   langchain_core.documentsr   pathr   configs.configr   services.company_profiler   r   2services.company_profile.data_classes.company_infor	   r
   r   1services.company_profile.data_classes.llm_resultsr   r   utilsr   utils.chroma_dbr   utils.dynamo_dbr   utils.llm_company_profiler   utils.query.retriverr   utils.search_googler   utils.selenium_driver_chromer   utils.url_parserr   environr   r   r   r   listr   r   r   time
start_timer   COMPANY_STOCK_TIKCER
IS_PE_SHOPcompany_profile_makerrunr5   end_timer   r#   r#   r#   r$   <module>   sZ    

   m