
    dh#                         S SK r S SKrS SKrS SKJr  S SKJrJrJ	r	  S SK
Jr  S SKJr  S SKJr  \ R                   " \5      rSr " S S	\5      rg)
    N)BytesIO)ListOptionalSequence)ElementTree)Document)
BaseLoader@   c                   F   \ rS rSrSr S SS.S\S\S\\   S\4S	 jjjr	      S!S\\
\      S\\
\      S\\   S\S\S\S\
\   4S jjrS\S\S\
\   SS
4S jrS\\   S\S\S\
\   4S jrS\S\S\S\\   4S jrS\S\4S jrS\S\4S jr\S\S\4S j5       rSrg
)"
QuipLoader   zWLoad `Quip` pages.

Port of https://github.com/quip/quip-api/tree/master/samples/baqup
F)allow_dangerous_xml_parsingapi_urlaccess_tokenrequest_timeoutr   c                     SSK Jn  U" X!US9U l        U(       d  [	        S5      eg! [         a    [        S5      ef = f)a]  
Args:
    api_url: https://platform.quip.com
    access_token: token of access quip API. Please refer:
        https://quip.com/dev/automation/documentation/current#section/Authentication/Get-Access-to-Quip's-APIs
    request_timeout: timeout of request, default 60s.
    allow_dangerous_xml_parsing: Allow dangerous XML parsing, defaults to False
r   )
QuipClientz?`quip_api` package not found, please run `pip install quip_api`)r   base_urlr   ac  The quip client uses the built-in XML parser which may causesecurity issues when parsing XML data in some cases. Please see https://docs.python.org/3/library/xml.html#xml-vulnerabilities For more information, set `allow_dangerous_xml_parsing` as True if you are sure that your distribution of the standard library is not vulnerable to XML vulnerabilities.N)quip_api.quipr   ImportErrorquip_client
ValueError)selfr   r   r   r   r   s         a/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_community/document_loaders/quip.py__init__QuipLoader.__init__   s[     	0 &%
 +<  +  	Q 	s   ' =N
folder_ids
thread_idsmax_docsinclude_all_foldersinclude_commentsinclude_imagesreturnc                    U(       d  U(       d  U(       d  [        S5      eU=(       d    / nU(       a  U H  nU R                  USU5        M     U(       aR  U R                  R                  5       nSU;   a  U R                  US   SU5        SU;   a  U R                  US   SU5        [	        [        USU 5      5      nU R                  X&U5      $ )a  
Args:
    :param folder_ids: List of specific folder IDs to load, defaults to None
    :param thread_ids: List of specific thread IDs to load, defaults to None
    :param max_docs: Maximum number of docs to retrieve in total, defaults 1000
    :param include_all_folders: Include all folders that your access_token
           can access, but doesn't include your private folder
    :param include_comments: Include comments, defaults to False
    :param include_images: Include images, defaults to False
z_Must specify at least one among `folder_ids`, `thread_ids` or set `include_all`_folders as Truer   group_folder_idsshared_folder_idsN)r   get_thread_ids_by_folder_idr   get_authenticated_userlistsetprocess_threads)	r   r   r   r   r    r!   r"   	folder_idusers	            r   loadQuipLoader.load=   s    & *5H7 
  %2
'	00AzJ ( ##::<D!T)00+,a #d*00,-q* #j(345
##J@PQQ    r,   depthc           	         SSK JnJn   U R                  R	                  U5      nUS   R                  SSU-  5      n[        R                  " SU SU 35        US    H@  n	SU	;   a  U R                  U	S   US-   U5        M$  SU	;   d  M,  UR                  U	S   5        MB     g	! U ac  nUR
                  S:X  a   [        R                  " SU SU SU 35        O)[        R                  " SU SU SUR
                   35         S	nAg	S	nAfU a3  n[        R                  " SU SU S
UR
                   35         S	nAg	S	nAff = f)z4Get thread ids by folder id and update in thread_idsr   )	HTTPError	QuipErrori  zdepth z!, Skipped over restricted folder z, z, Skipped over folder z due to unknown error Nz due to HTTP error foldertitlez	Folder %sz, Processing folder childrenr,      	thread_id)r   r3   r4   r   
get_foldercodeloggingwarninggetinfor'   append)
r   r,   r1   r   r3   r4   r5   er6   childs
             r   r'   &QuipLoader.get_thread_ids_by_folder_idj   s_    	7	%%00;F& x $$WkI.EFveW$8@AJ'Ee#00+&	: %!!%"45 (+  
	vv}UG#DYKrRSQTU UG#9) E,,-FF85  	OO5i[ A%%&VVH. 	s$   B# #E)AD	E)D>>Einclude_messagesc                 l    / nU H+  nU R                  XRU5      nUc  M  UR                  U5        M-     U$ )z2Process a list of thread into a list of documents.)process_threadr@   )r   r   r"   rD   docsr9   docs          r   r+   QuipLoader.process_threads   s>     #I%%iAQRCC  $ r0   r9   c           
      t   U R                   R                  U5      nUS   S   nUS   S   nUS   S   nUS   S   n[        R                  U5      n[        R                  SU SU SU S	U 35        S
U;   an   U R                   R                  US
   5      n	UUUUS.nSnU(       a  U R                  U	5      nU(       a  US-   U R                  U5      -   n[        US
   U-   US9$ g ! [        R                  R                  R                   a(  n
[        R                  SU SU SU
 35         S n
A
g S n
A
ff = f)Nthreadidr6   linkupdated_useczprocessing thread z title z link z update_ts htmlzError parsing thread  z, skipping, )r6   	update_tsrL   source z/n)page_contentmetadata)r   
get_threadr   _sanitize_titleloggerr?   parse_document_htmlxmletreecElementTree
ParseErrorerrorprocess_thread_imagesprocess_thread_messagesr   )r   r9   r"   rD   rK   r6   rM   rQ   sanitized_titletreerA   rU   texts                r   rF   QuipLoader.process_thread   sr    !!,,Y78$T*	x )h'8$^4	$44U; 7?2C D6YK1	

 V'';;F6NK )&	H D11$7d{T%A%A)%LL#F^d2!  / 99))44 4UG1YK|TUSVWXs   9C' '(D7D22D7rb   c                    Sn SSK Jn  SSKJn  UR	                  S5       H  nUR                  S5      nU(       a  UR                  S5      (       d  M3  UR                  S	5      u    pxn	U R                  R                  X5      n
 UR                  [        U
R                  5       5      5      nUS
-   UR                  U5      -   nM     U$ ! [         a    [        S5      ef = f! [         a  n[        R!                  SU 35        UeS nAff = f)NrS   r   )Image)pytesseractzg`Pillow or pytesseract` package not found, please run `pip install Pillow` or `pip install pytesseract`imgsrcz/blob/
z!failed to convert image to text, )PILrf   rg   r   iterr>   
startswithsplitr   get_blobopenr   readimage_to_stringOSErrorrX   r^   )r   rb   rc   rf   rg   rh   ri   _r9   blob_idblob_responseimagerA   s                r   r_    QuipLoader.process_thread_images   s   	!/ 99U#C''%.CcnnW55'*yy~$AqW ,,55iIM

7=+=+=+?#@Ad{[%@%@%GG $ '  	D 	   @DEs#   C
 ?C#
C #
D-DDc                     S n/ n U R                   R                  XSS9nUR                  U5        U(       a  US   S   S-
  nOOMA  UR                  5         U Vs/ sH  oUS   PM	     nnSR	                  U5      $ s  snf )Nd   )max_created_useccountcreated_usecr8   rc   rk   )r   get_messagesextendreversejoin)r   r9   r|   messageschunkmessagetextss          r   r`   "QuipLoader.process_thread_messages   s    $$11C 2 E OOE"#(9^#<q#@   	089W9yy :s   A;r6   c                     [         R                  " SSU 5      n[         R                  " SSU5      n[        U5      [        :  a	  US [         nU$ )Nz\srP   z(?u)[^- \w.]rS   )resublen_MAXIMUM_TITLE_LENGTH)r6   ra   s     r   rW   QuipLoader._sanitize_title   sH    &&U3&&"oF"77-.D/DEOr0   )r   )<   )NNi  FFF)__name__
__module____qualname____firstlineno____doc__strr   intboolr   r   r   r.   r'   r   r+   rF   r   r_   r`   staticmethodrW   __static_attributes__ r0   r   r   r      s    *,	$ -2$$ $ "#	$ &*$P +/*."&$)!&$+RT#Y'+R T#Y'+R 3-	+R
 "+R +R +R 
h+RZ#6#6%(#66:3i#6	#6J	"3-	9=	QU		h	**.2*FJ*	(	*X+ # 6     $ s s  r0   r   )r<   r   xml.etree.cElementTreerZ   ior   typingr   r   r   xml.etree.ElementTreer   langchain_core.documentsr   )langchain_community.document_loaders.baser	   	getLoggerr   rX   r   r   r   r0   r   <module>r      sD     	   + + - - @			8	$ h hr0   