
    dhO                         S SK r S SKrS SKrS SKJr  S SKJrJrJrJ	r	J
r
  S SKrS SKJr  S SKJr  \R                   " \5      r " S S\5      rg)    N)Path)AnyDictIteratorPatternUnion)Document)
BaseLoaderc                      \ rS rSr% Sr\R                  " S\R                  5      r\	\
S'   \R                  " S\R                  5      r\	\
S'   \R                  " S5      r\	\
S'   \R                  " S	\R                  5      r\	\
S
'   \R                  " S\R                  5      r\	\
S'   \R                  " S\R                  5      r\	\
S'     S#S\\\4   S\S\4S jjrS\\\4   S\R.                  S\4S jrS\S\\\4   S\4S jrS\S\4S jrS\S\4S jrS\S\4S jrS\S\4S jr S\S\4S jr!S\"\#   4S  jr$S!r%g")$ObsidianLoader   z%Load `Obsidian` files from directory.z^---\n(.*?)\n---\nFRONT_MATTER_REGEXz	{{(.*?)}}TEMPLATE_VARIABLE_REGEXz[^\S\/]#([a-zA-Z_]+[-_/\w]*)	TAG_REGEXz^\s*(\w+)::\s*(.*)$DATAVIEW_LINE_REGEXz\[(\w+)::\s*(.*)\]DATAVIEW_INLINE_BRACKET_REGEXz\((\w+)::\s*(.*)\)DATAVIEW_INLINE_PAREN_REGEXpathencodingcollect_metadatac                 (    Xl         X l        X0l        g)zInitialize with a path.

Args:
    path: Path to the directory containing the Obsidian files.
    encoding: Charset encoding, defaults to "UTF-8"
    collect_metadata: Whether to collect metadata from the front matter.
        Defaults to True.
N)	file_pathr   r   )selfr   r   r   s       e/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_community/document_loaders/obsidian.py__init__ObsidianLoader.__init__   s       0    placeholdersmatchreturnc                 J    S[        U5       S3nUR                  S5      X'   U$ )z/Replace a template variable with a placeholder.__TEMPLATE_VAR___   )lengroup)r   r   r   placeholders       r   _replace_template_var$ObsidianLoader._replace_template_var/   s.     (L(9':"=$)KKN!r   objc                    [        U[        5      (       a1  UR                  5        H  u  p4UR                  USU S35      nM     U$ [        U[        5      (       a.  UR                  5        H  u  pTU R                  XB5      X'   M     U$ [        U[        5      (       a'  [        U5       H  u  pgU R                  Xr5      X'   M     U$ )zIRestore template variables replaced with placeholders to original values.z{{z}})
isinstancestritemsreplacedict_restore_template_varslist	enumerate)r   r*   r   r'   valuekeyiitems           r   r1   %ObsidianLoader._restore_template_vars7   s    c3&2&8&8&:"kk+eWD/AB '; 
 T""!iik
66uK *
 
 T""$S>44TH *
r   contentc                 >   U R                   (       d  0 $ U R                  R                  U5      nU(       d  0 $ 0 n[        R                  " U R
                  U5      nU R                  R                  XBR                  S5      5      n [        R                  " U5      nU R                  Xc5      nSU;   a/  [        US   [        5      (       a  US   R                  S5      US'   U$ ! [        R                  R                    a    ["        R%                  S5        0 s $ f = f)zEParse front matter metadata from the content and return it as a dict.r$   tagsz, z Encountered non-yaml frontmatter)r   r   search	functoolspartialr(   r   subr&   yaml	safe_loadr1   r,   r-   splitparserParserErrorloggerwarning)r   r9   r   r   replace_template_varfront_matter_textfront_matters          r   _parse_front_matter"ObsidianLoader._parse_front_matterD   s    $$I''..w7I')(00&& 
 !88<< ++a.
	>>*;<L66|RL %*\&5I3*O*O'3F';'A'A$'GV${{&& 	NN=>I	s   AC$ $5DDmetadatac                     0 nUR                  5        H6  u  p4[        U5      [        [        [        1;   a  XBU'   M)  [        U5      X#'   M8     U$ )z4Convert a dictionary to a compatible with langchain.)r.   typer-   intfloat)r   rL   resultr5   r4   s        r   !_to_langchain_compatible_metadata0ObsidianLoader._to_langchain_compatible_metadatab   sF    "..*JCE{sC//#s!%j	 +
 r   c                     U R                   (       d
  [        5       $ U R                  R                  U5      nU(       d
  [        5       $ U Vs1 sH  o3iM     sn$ s  snf )z0Return a set of all tags in within the document.)r   setr   findall)r   r9   r   tags       r   _parse_document_tags#ObsidianLoader._parse_document_tagsl   sH    $$5L&&w/5L$%uu%%%s   
Ac                 v   U R                   (       d  0 $ 0 U R                  R                  U5       Vs0 sH  nUS   US   _M     snEU R                  R                  U5       Vs0 sH  nUS   US   _M     snEU R                  R                  U5       Vs0 sH  nUS   US   _M     snE$ s  snf s  snf s  snf )zOParse obsidian dataview plugin fields from the content and return it
as a dict.r   r$   )r   r   rV   r   r   )r   r9   r   s      r   _parse_dataview_fields%ObsidianLoader._parse_dataview_fieldsw   s     $$I
 "55==gFFE a%("F
 "==EEgNNE a%("N
 "??GGPPE a%("P
 	
s   B,$B1B6c                 `    U R                   (       d  U$ U R                  R                  SU5      $ )z4Remove front matter metadata from the given content. )r   r   r?   )r   r9   s     r   _remove_front_matter#ObsidianLoader._remove_front_matter   s)    $$N&&**2w77r   c           
   #   @  #    [        [        U R                  5      R                  S5      5      nU GHT  n[	        X R
                  S9 nUR                  5       nS S S 5        U R                  W5      nU R                  U5      nU R                  U5      nU R                  U5      n[        UR                  5      [        U5      UR                  5       R                  UR                  5       R                  UR                  5       R                   S.U R#                  U5      EUEnU(       d  UR%                  S5      (       a9  SR'                  U[)        UR%                  S/ 5      =(       d    / 5      -  5      US'   [+        XHS9v   GMW     g ! , (       d  f       GN9= f7f)Nz**/*.md)r   )sourcer   createdlast_modifiedlast_accessedr;   ,)page_contentrL   )r2   r   r   globopenr   readrJ   rX   r[   r_   r-   namestatst_ctimest_mtimest_atimerR   getjoinrU   r	   )	r   pathsr   ftextrI   r;   dataview_fieldsrL   s	            r   	lazy_loadObsidianLoader.lazy_load   sL    T$..)..y9:Dd]]3qvvx 4  33D9L,,T2D"99$?O,,T2Ddii.D	99;//!%!5!5!%!5!5 88F "H |''//#&883|//;ArBB$  @@/ 33s   AF	FD2F
F	F)r   r   r   N)zUTF-8T)&__name__
__module____qualname____firstlineno____doc__recompileDOTALLr   r   __annotations__r   r   	MULTILINEr   r   r   r   r-   r   boolr   r   Matchr(   r   r1   r0   rJ   rR   rU   rX   r[   r_   r   r	   rv   __static_attributes__ r   r   r   r      s   /"$**-BBII"NN')zz,		'JWJ$CDIwD#%::.Dbll#SS-/ZZr||.!7  ,.::r||,   !%	1CI1 1 	1$ cN3588	# T#s(^ PS 3 4 <$ 4 	&C 	&C 	&
c 
d 
*8C 8C 8A8H- Ar   r   )r=   loggingr}   pathlibr   typingr   r   r   r   r   r@   langchain_core.documentsr	   )langchain_community.document_loaders.baser
   	getLoggerrx   rE   r   r   r   r   <module>r      s@      	  6 6  - @			8	$\AZ \Ar   