o
    ñ¢øgc  ã                   @   sf   d dl Z d dlmZ d dlmZ d dlmZ G dd„ dƒZedkr1dZ	ee	ƒZ
e
 ¡ Zeeƒ dS dS )	é    N)ÚAmazonTextractPDFLoader)ÚProject)ÚClientConfigc                   @   s&   e Zd ZdZdefdd„Zdd„ ZdS )ÚLoadPDFz Load a pdf and extract the text.Údoc_pathc                 C   s(   || _ tj |¡| _d| _|  ¡ | _d S )Nz_+_)r   ÚosÚpathÚbasenameÚ	file_nameÚdelimiter_textÚload_docÚdocument_text_chunked)Úselfr   © r   ú</var/www/html/XCapMarket/utils/document_loader/pdf_loader.pyÚ__init__   s   zLoadPDF.__init__c                 C   sJ   t | jƒ}| ¡ }g }|D ]}|j}d | d¡¡}| |ddœ¡ q|S )zLoad the docÚ
Útext)r   Útype)r   r   ÚloadÚpage_contentÚjoinÚsplitÚappend)r   ÚdocÚpagesr   ÚpageÚ	chunk_strr   r   r   r      s   
zLoadPDF.load_docN)Ú__name__Ú
__module__Ú__qualname__Ú__doc__Ústrr   r   r   r   r   r   r      s    r   Ú__main__z€s3://xcap-storage-dev/cab33c2f-7058-4a57-af87-88a1e4bc234d/linkedin-1a8b646e-1002-4733-bfc7-3120af611d01/fina_sample_reports.pdf)r   Ú$langchain_community.document_loadersr   Ú+services.ppt_generator.data_classes.projectr   Úutils.client_checkr   r   r   r   Ú
doc_loaderr   Útext_chunksÚprintr   r   r   r   Ú<module>   s   ü