
    dh                         S SK r S SKrS SKrS SKJr  S SKJr  S SKJrJ	r	J
r
Jr  S SKJr  S SKJr  S SKJr   " S S	\\5      rg)
    N)ABC)Path)IteratorListSetTuple)Document)BaseBlobParser)Blobc                       \ rS rSrSrS\S\\   4S jrS\S\\   4S jr	S\
R                  S\S\\\\\4      4S	 jrS
\S\
R                  S\\   S\\   S\\   4
S jrSrg)
VsdxParser   zParser for vsdx files.blobreturnc                 $    U R                  U5      $ )zParse a vsdx file.)
lazy_parse)selfr   s     i/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_community/document_loaders/parsers/vsdx.pyparseVsdxParser.parse   s    t$$    c              #   x  #    UR                  5        n[        R                  " US5       nU R                  X1R                  5      nSSS5        SSS5        W VVVs/ sH  u  pVn[        UUR                  UUS.S9PM      snnn Sh  vN   g! , (       d  f       NN= f! , (       d  f       NW= fs  snnnf  N.7f)zgRetrieve the contents of pages from a .vsdx file
and insert them into documents, one document per page.rN)sourcepage	page_name)page_contentmetadata)as_bytes_iozipfileZipFileget_pages_contentr   r	   )r   r   pdf_file_objzfilepagespage_numberr   r   s           r   r   VsdxParser.lazy_parse   s      <s3u..ukkB 4   9>

 9>4 )"kk'!* 9>

 
	
 
	
 43  

 
	
sK   B:B BB B:$B1B:	B8
B:
B	B  
B.*B:r$   r   c                 "    SSK nSUR                  5       ;  a  [        SR	                  U5      5        gSUR                  5       ;  a  [        SR	                  U5      5        gSUR                  5       ;  a  [        S	R	                  U5      5        gUR                  UR                  S5      5      nUR                  UR                  S5      5      nUR                  UR                  S5      5      n[        US
   S   [        5      (       a(  US
   S    Vs/ sH  owS   R                  5       PM     nnOUS
   S   S   R                  5       /n[        US   S   [        5      (       a  US   S    Vs/ sH  nSUS   -   PM     n	nOSUS   S   S   -   /n	US   S   S   S   S[        U5       n
U
 Vs/ sH  oR                  5       PM     n
nU
 Vs/ sH$  nXR                  UR                  5       5         PM&     nn/ nU H  nUR                  U5      n[        R                  " UR                  U5      5      n[        R                  " SU5      n[        U5      S:  d  Ma  SR!                  U5      nSSSSSSS.nUR#                  5        H  u  nnUR%                  UU5      nM     UR'                  UUS.5        M     U Vs/ sHj  nS[)        U5      R*                   S3UR                  5       ;   d  M0  UUR                  UR                  S[)        U5      R*                   S35      5      S .PMl     nn/ n[-        [/        X5      5       H  u  nu  nnU R1                  XUU5      nSR!                  U Vs/ sH  nUS!   U;   d  M  US"   PM     snU Vs/ sH  nUS!   U:X  d  M  US"   PM     sn-   5      nUR'                  UUU45        M     U$ ! [         a    [        S5      ef = fs  snf s  snf s  snf s  snf s  snf s  snf s  snf )#aT  Get the content of the pages of a vsdx file.

Attributes:
    zfile (zipfile.ZipFile): The vsdx file under zip format.
    source (str): The path of the vsdx file.

Returns:
    list[tuple[int, str, str]]: A list of tuples containing the page number,
    the name of the page and the content of the page
    for each page of the vsdx file.
r   NzfThe xmltodict library is required to parse vsdx files. Please install it with `pip install xmltodict`.zvisio/pages/pages.xmlz'WARNING - No pages.xml file found in {}z visio/pages/_rels/pages.xml.relsz,WARNING - No pages.xml.rels file found in {}zdocProps/app.xmlz%WARNING - No app.xml file found in {}PagesPagez@NameRelationshipsRelationshipzvisio/pages/@Target
PropertiesTitlesOfPartsz	vt:vectorzvt:lpstrz("#text"\s*:\s*"([^\\"]*(?:\\.[^\\"]*)*)"
	-'   é   ô)z\nz\tz\u2013z\u2019z\u00e9rz\u00f4me)r   r   zvisio/pages/_rels/z	.xml.rels)pathcontentr   r   )	xmltodictImportErrornamelistprintformatr   read
isinstanceliststriplenindexjsondumpsrefindalljoinitemsreplaceappendr   stem	enumeratezipget_relationships)r   r$   r   r8   pagesxml_contentappxml_contentpagesxmlrels_contentreldisordered_namesdisordered_pathsordered_namesnameordered_pathsdisordered_pagesr6   r7   string_contentsamplesr   map_symboleskeyvalue	page_pathpagexml_relsordered_pagesr&   r   relationshipspage_s                                r   r"   VsdxParser.get_pages_content(   s   	 #%..*::;BB6JK-U^^5EE@GGOPU^^%559@@HI!*<S1T!U(uzz:L/MN%.__JJ9:&
 &w/7>>0@0I&0Q+0QG""$0Q  +
 !)&1':@@B+ *?;NKTRR 0@P+PC Y/P  + &7G	RS+ $2,#?#P$

$-,-$/ 3@@-$-@ &
% 33DJJLAB% 	 
 !Djj&G!ZZ	(@ANjj;^G 7|a#yy1"" $!%  #/"4"4"6JC#/#7#7U#CL #7 !''|(TU) "< +	
 +	#DO$8$8#9Cu~~GWW!$??JJ!3DO4H4H3IST + 	 	
 57.7-/
*K*$	 !22]LM  99 "2!1V}5 *E.)!1 "2!1V}, *E.)!1	L   +y,!GH%/
( W  	B 	.++ A
<	
(
sG   O O.5O36O8*O=.P9>P:P

	P
P)	PO+r   filelistr_   c           	      "  ^ [        T5      R                  n[        T5      R                  nUSU S3-  n[        U5      UR	                  5       ;  a
  [        5       $ [        U4S jU 5       5      n[        US   S   [        5      (       a  US   S    V	s/ sH  n	U	S   PM
     n
n	OUS   S   S   /n
[        U
 Vs/ sH  n[        Xk-  5      PM     sn5      R                  U5      nU H  n	XR                  XX45      -  nM     U$ s  sn	f s  snf )zGet the relationships of a page and the relationships of its relationships,
etc... recursively.
Pages are based on other pages (ex: background page),
so we need to get all the relationships to get all the content of a single page.
z_rels/z.relsc              3   @   >#    U H  oS    T:X  d  M  US   v   M     g7f)r6   r7   N ).0rb   r   s     r   	<genexpr>/VsdxParser.get_relationships.<locals>.<genexpr>   s%      $
*6-4:OE),s   r+   r,   r-   )r   rV   parentstrr:   setnextr>   r?   intersectionrN   )r   r   r$   rd   r_   	name_pathparent_path	rels_pathpagexml_rels_contentrR   targetstargetra   s    `           r   rN   VsdxParser.get_relationships   s4    JOO	4j''F9+U";;	y>!115L# $
*6$
  
 *?;NKTRR 0@PPC IP  G
 ,O<^LYWXG5<=W6S%&W=

,x
  	 !C),B,BH- M !
 ! >s   D<Drg   N)__name__
__module____qualname____firstlineno____doc__r   r   r	   r   r   r    r!   rl   r   r   intr"   dictr   rN   __static_attributes__rg   r   r   r   r      s     %$ %8H#5 %
t 
(: 
(|__|.1|	eCcM"	#||)) ) s)	)
 4j) 
S)r   r   )rC   rE   r    abcr   pathlibr   typingr   r   r   r   %langchain_community.docstore.documentr	   )langchain_community.document_loaders.baser
   1langchain_community.document_loaders.blob_loadersr   r   rg   r   r   <module>r      s3     	    - - : D BB Br   