
    dhP5                        S SK r S SKrS SKrS SKrS SKJr  S SKJrJrJ	r	J
r
JrJrJr  S SKrS SKJr  S SKJr  S SKJrJr  S SKJr  SrS	rS
rSrSrSrSrSrSr \RB                  " \"5      r#\" SSSS9 " S S\\5      5       r$g)    N)Path)AnyDictListMappingOptionalSequenceUnion)
deprecated)Document)	BaseModelmodel_validator)
BaseLoaderz#{http://www.w3.org/1999/xhtml}tablexpathidsourcename	structuretagprojectsz#https://api.docugami.com/v1preview1z0.0.24z1.0z!docugami_langchain.DocugamiLoader)sinceremovalalternative_importc                   P   \ rS rSr% Sr\r\\S'    \	R                  R                  S5      r\\   \S'    Sr\\S'    Sr\\S	'    S
r\\S'    Sr\\S'    Sr\\S'    Sr\\S'    Sr\\S'    Sr\\S'    Sr\\   \S'    Sr\\\      \S'    \\\\\4         \S'    Sr\\S'    \" SS9\S\ \\!4   S\!4S j5       5       r"  S,S\#S \\   S!\\$   S\%\&   4S" jjr'S\S\%\    4S# jr(S\S\%\    4S$ jr)S%\ S\ 4S& jr*  S,S'\S\S \\   S(\\$   S\%\&   4
S) jjr+S\%\&   4S* jr,S+r-g)-DocugamiLoader   z\Load from `Docugami`.

To use, you should have the ``dgml-utils`` python package installed.
apiDOCUGAMI_API_KEYaccess_tokeni   max_text_length    min_text_lengthi   max_metadata_lengthFinclude_xml_tagsr   parent_hierarchy_levelsdoc_idparent_id_keysub_chunk_tablesTwhitespace_normalize_textN	docset_iddocument_ids
file_paths(include_project_metadata_in_doc_metadatabefore)modevaluesreturnc                 P   UR                  S5      (       a!  UR                  S5      (       a  [        S5      eUR                  S5      (       d!  UR                  S5      (       d  [        S5      eUR                  S5      (       a!  UR                  S5      (       d  [        S5      eU$ )zValidate that either local file paths are given, or remote API docset ID.

Args:
    values: The values to validate.

Returns:
    The validated values.
r,   r*   z7Cannot specify both file_paths and remote API docset_idz6Must specify either file_paths or remote API docset_idr   z7Must specify access token if using remote API docset_id)get
ValueError)clsr0   s     e/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_community/document_loaders/docugami.pyvalidate_local_or_remote'DocugamiLoader.validate_local_or_remoteT   s     ::l##

;(?(?VWWzz,''

;0G0GUVV::k""6::n+E+EVWW    contentdocument_nameadditional_doc_metadatac           
      \  ^ ^^  SSK Jn   SSKJn  SSKJn  SUS[        4UUU 4S	 jjnUR                  [        R                  " U5      5      nUR                  5       n	U" U	T R                  T R                  T R                  T R                  T R                   T R"                  S
9n
0 nU
 H  nU" U5      nUR$                  R'                  [(        5      nU(       d  M3  XU'   UR*                  (       d  MJ  U" UR*                  5      nUR$                  R'                  [(        5      nU(       d  M  UR,                  (       d  M  UUR$                  T R.                  '   XU'   M     [1        UR3                  5       5      $ ! [         a    [        S5      ef = f! [         a    [        S5      ef = f)z6Parse a single DGML document into a list of Documents.r   etreePCould not import lxml python package. Please install it with `pip install lxml`.)Chunk)
get_chunkszaCould not import from dgml-utils python package. Please install it with `pip install dgml-utils`.dg_chunkr1   c                   > [         R                  " U R                  R                  5       5      R	                  5       n[
        U R                  [        U[        T[        T[        U R                  [        U R                  0nU R                  nT(       a"  TR                  (       a  UR                  T5        [!        US TR"                   US9$ )N)page_contentmetadata)hashlibmd5textencode	hexdigest	XPATH_KEYr   ID_KEYDOCUMENT_NAME_KEYDOCUMENT_SOURCE_KEYSTRUCTURE_KEYr   TAG_KEYr   r-   updater   r    )rC   
_hashed_idrF   rI   r<   r;   selfs       r6   _build_framework_chunk:DocugamiLoader._parse_dgml.<locals>._build_framework_chunk   s     X]]%9%9%;<FFHJ8>>
!=#]x11H ==D&@@OO$;<!"8D$8$89! r9   )r"   r    r)   r(   r$   r%   )lxmlr?   ImportErrordgml_utils.modelsrA   dgml_utils.segmentationrB   r   parseioBytesIOgetrootr"   r    r)   r(   r$   r%   rF   r3   rM   parentrE   r'   listr0   )rT   r:   r;   r<   r?   rA   rB   rU   treeroot	dg_chunksframework_chunksrC   framework_chunkchunk_idframework_parent_chunk	parent_ids   ` ``             r6   _parse_dgmlDocugamiLoader._parse_dgmlj   s   	"	/:	U 	x 	 	. {{2::g./||~ 00 00&*&D&D!22!22$($@$@
	 13!H4X>O&//33F;Hx-<*???-CHOO-T* 6 ? ? C CF KI y%;%H%H%HGP001C1CD6L3 " $++-..A  	= 	  	C 	s   E< F <FF+c                 ^   U R                    SU S3n/ nU(       a  [        R                  " USSU R                   30S9nUR                  (       a7  UR                  5       nUR                  US   5        UR                  SS5      nO[        S	U S
UR                   S35      eU(       a  M  U$ )z1Gets all document details for the given docset ID	/docsets/z
/documentsAuthorizationBearer )headers	documentsnextNFailed to download 
 (status: ))	r   requestsr3   r   okjsonextend	Exceptionstatus_code)rT   r*   urlall_documentsresponsedatas         r6   _document_details_for_docset_id.DocugamiLoader._document_details_for_docset_id   s    
)I;j9||(GD4E4E3F*GHH {{}}$$T+%67hhvt,)#j9M9M8NaP  c r9   c                 `   U R                    SU 3n/ nU(       a  [        R                  " SUSSU R                   300 S9nUR                  (       a7  UR                  5       nUR                  US   5        UR                  SS5      nO[        S	U S
UR                   S35      eU(       a  M  U$ )z0Gets all project details for the given docset IDz/projects?docset.id=GETrm   rn   ro   r~   r   rq   Nrr   rs   rt   )
r   ru   requestr   rv   rw   rx   r3   ry   rz   )rT   r*   r{   all_projectsr}   r~   s         r6   _project_details_for_docset_id-DocugamiLoader._project_details_for_docset_id   s    
.yk:''(GD4E4E3F*GH	H {{}}##D$45hhvt,)#j9M9M8NaP  c  r9   projectc           	         UR                  [        5      nU R                   SU S3n/ n0 nU(       a  [        R                  " SUSSU R
                   300 S9nUR                  (       a7  UR                  5       nUR                  US   5        UR                  SS	5      nO.UR                  S
:X  a  U$ [        SU SUR                   S35      eU(       a  M  U GH  nUR                  S5      n	UR                  S5      n
UR                  S5      nU	S:X  d  M?  U
(       d  MH  U(       d  MQ  U[           n0 n[        R                  " SU
 S3SSU R
                   300 S9nUR                  (       a   SSKJn  UR                  [        R                   " UR"                  5      5      nUR%                  5       nUR&                  nUR)                  SUS9nU Hp  nUR)                  SUS9S   R*                  nSR-                  UR)                  SUS9S   R/                  5       5      R1                  5       nUS	U R2                   UU'   Mr     XU'   GMu  [        SU
 S3S-   5      e   U$ ! [         a    [        S5      ef = f)z#Gets project metadata for all filesz
/projects/z/artifacts/latestr   rm   rn   r   	artifactsrq   Ni  rr   rs   rt   r   r{   documentzreport-values.xmlz/contentr   r>   r@   z
//pr:Entry)
namespacesz./pr:Heading z
./pr:Valuez	/content z (status: {response.status_code}))r3   rM   r   ru   r   r   rv   rw   rx   rz   ry   rW   r?   rX   r[   r\   r]   r:   r^   nsmapr   rI   joinitertextstripr#   )rT   r   
project_idr{   all_artifactsper_file_metadatar}   r~   artifactartifact_nameartifact_urlartifact_docr&   rF   r?   artifact_treeartifact_rootnsentriesentryheadingvalues                         r6   _metadata_for_project$DocugamiLoader._metadata_for_project   s   [[(

*ZL0AB"$''(GD4E4E3F*GH	H {{}}$$T+%67hhvt,%%,(()#j9M9M8NaP  c& &H$LL0M#<<.L#<<
3L 33%f-!# $++#nH-,8I8I7J.KL	 ;;. %*KK

8;K;K0L$MM$1$9$9$;M&,,B+11,21NG!("'++n+"LQ"O"T"T #!KKKDQGPPR!%'  -22LD4L4L,M) ") 19f-#-l^9E<= K &T ! - ' )I s   1I%%I;document_idadditional_metadatac                    U R                    SU SU S3n[        R                  " SUSSU R                   300 S9nUR                  (       a  U R                  UR                  UUS9$ [        S	U S
UR                   S35      e)zLoad chunks for a document.rl   z/documents/z/dgmlr   rm   rn   r   )r:   r;   r<   rr   rs   rt   )	r   ru   r   r   rv   ri   r:   ry   rz   )rT   r   r*   r;   r   r{   r}   s          r6   _load_chunks_for_document(DocugamiLoader._load_chunks_for_document(  s     
)I;k+eL##$0A0A/B&CD	
 ;;## ((+(; $   %cU*X5I5I4J!L r9   c           	         / nU R                   (       Ga9  U R                  (       Ga'  U R                  U R                  5      nU R                  (       a)  U Vs/ sH  o3[           U R                  ;   d  M  UPM     nnU R                  U R                  5      n0 nU(       aV  U R                  (       aE  U H?  nU R                  U5      nU H%  nX;  a  Xx   XX'   M  XX   R                  Xx   5        M'     MA     U HP  n	U	[           n
U	R                  [        5      nUR                  U
5      nXR                  U
U R                  UUS9-  nMR     U$ U R                  (       a]  U R                   HM  n[        U5      n[        US5       nXR                  UR!                  5       UR"                  S9-  nSSS5        MO     U$ s  snf ! , (       d  f       Mg  = f)zLoad documents.)r   r*   r;   r   rb)r:   r;   N)r   r*   r   r+   rM   r   r-   r   rR   r3   rN   r   r,   r   openri   readr   )rT   chunks_document_detailsd_project_detailscombined_project_metadatar   rF   file_iddocr&   doc_namedoc_metadatapathfiles                  r6   loadDocugamiLoader.loadD  s   !# $ D DT^^ T  0%0!fIARAR4RA0 " %  $BB4>>R9;%D$Q$Q  0G#99'BH#+"CAIAR5>5>EEhFWX	 $,  0 )V77#458<<VD88 &"nn"*(4	 9  	 )(  __Dz$%.. $		&*ii /  F &% ( K%> &%s   F>4F>+G
G	 )NN).__name__
__module____qualname____firstlineno____doc__DEFAULT_API_ENDPOINTr   str__annotations__osenvironr3   r   r   r    intr"   r#   r$   boolr%   r'   r(   r)   r*   r+   r	   r
   r   r-   r   classmethodr   r   r7   bytesr   r   r   ri   r   r   r   r   r   __static_attributes__r   r9   r6   r   r      s   
 $C#+"$**..1C"DL(3-D/OS,OSS""/"d"8#$S$K!M3!)"d"9&*t*'  $Ix}#,,0L(8C=)0/%c	"2344&59,d9V(#d38n    $. (,59	I/I/  }I/ "*'!2	I/
 
hI/V d * T
 .E!T E!d E!V (,15   }	
 &g. 
h8-d8n -r9   r   )%rG   r\   loggingr   pathlibr   typingr   r   r   r   r   r	   r
   ru   langchain_core._api.deprecationr   langchain_core.documentsr   pydanticr   r   )langchain_community.document_loaders.baser   
TABLE_NAMErL   rM   rO   rN   rP   rQ   PROJECTS_KEYr   	getLoggerr   loggerr   r   r9   r6   <module>r      s     	  	  F F F  6 - / @2
		  
< 			8	$ 
:
NZ N
Nr9   