
    dh$                         S r SSKrSSKrSSKJr  SSKJr  SSKJr  SSK	J
r
JrJrJrJrJr  SSKJr  SSKJr  \R(                  " \5      r " S	 S
\5      rg)zDocument loader for EverNote ENEX export files.

This module provides functionality to securely load and parse EverNote notebook
export files (``.enex`` format) into LangChain Document objects.
    N)	b64decode)Path)strptime)AnyDictIteratorListOptionalUnion)Document)
BaseLoaderc                       \ rS rSrSrSS\\\4   S\4S jjr	S\
\   4S jrS\
\   4S jr\S	\S\4S
 j5       r\S\S\4S j5       r\SS\S\\   S\4S jj5       r\S\S\
\\\4      4S j5       rSrg)EverNoteLoader   a?  Document loader for EverNote ENEX export files.

Loads EverNote notebook export files (``.enex`` format) into LangChain Documents.
Extracts plain text content from HTML and preserves note metadata including
titles, timestamps, and attachments. Uses secure XML parsing to prevent
vulnerabilities.

The loader supports two modes:
- Single document: Concatenates all notes into one Document (default)
- Multiple documents: Creates separate Documents for each note

`Instructions for creating ENEX files <https://help.evernote.com/hc/en-us/articles/209005557-Export-notes-and-notebooks-as-ENEX-or-HTML>`__

Example:

.. code-block:: python

    from langchain_community.document_loaders import EverNoteLoader

    # Load all notes as a single document
    loader = EverNoteLoader("my_notebook.enex")
    documents = loader.load()

    # Load each note as a separate document:
    # documents = [ document1, document2, ... ]
    loader = EverNoteLoader("my_notebook.enex", load_single_document=False)
    documents = loader.load()

    # Lazy loading for large files
    for doc in loader.lazy_load():
        print(f"Title: {doc.metadata.get('title', 'Untitled')}")
        print(f"Content: {doc.page_content[:100]}...")

Note:
    Requires the ``lxml`` and ``html2text`` packages to be installed.
    Install with: ``pip install lxml html2text``
	file_pathload_single_documentc                 0    [        U5      U l        X l        g)a\  Initialize the EverNote loader.

Args:
    file_path: Path to the EverNote export file (``.enex`` extension).
    load_single_document: Whether to concatenate all notes into a single
        Document. If ``True``, only the ``source`` metadata is preserved.
        If ``False``, each note becomes a separate Document with its own
        metadata.
N)strr   r   )selfr   r   s      e/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_community/document_loaders/evernote.py__init__EverNoteLoader.__init__<   s     Y$8!    returnc              #     #    U R                  U R                  5       H`  nUR                  S5      c  M  [        US   0 UR	                  5        VVs0 sH  u  p#US;  d  M  X#_M     snnESU R                  0ES9v   Mb     gs  snnf 7f)a  Lazily load documents from the EverNote export file.

Lazy loading allows processing large EverNote files without
loading everything into memory at once. This method yields Documents
one by one by parsning the XML. Each document represents a note in the EverNote
export, containing the note's content as ``page_content`` and metadata including
``title``, ``created/updated`` ``timestamps``, and other note attributes.

Yields:
    Document: A Document object for each note in the export file.
contentN)r   content-rawresourcesourcepage_contentmetadata)_parse_note_xmlr   getr   items)r   notekeyvalues       r   
_lazy_loadEverNoteLoader._lazy_loadI   s      ((8Dxx	".!%i /3jjl.:
"*PP 'CJ.: $T^^4
 
 9
s   /B	B	B!B'"B	c              #   
  #    U R                   (       d  U R                  5        Sh  vN   g[        SR                  U R                  5        Vs/ sH  oR                  PM     sn5      SU R
                  0S9v   g NTs  snf 7f)a  Load documents from EverNote export file.

Depending on the ``load_single_document`` setting, either yields individual
Documents for each note or a single Document containing all notes.

Yields:
    Document: Either individual note Documents or a single combined Document.
N r   r    )r   r)   r   joinr!   r   )r   documents     r   	lazy_loadEverNoteLoader.lazy_loadc   sq      (((((WW;???;LM;Lx**;LM #DNN3	  ) Ns!   %BA<'BA>#B>Br   c                      SSK nUR                  U 5      R                  5       $ ! [         a  n[        S5      UeSnAff = f)a@  Parse HTML content from EverNote into plain text.

Converts HTML content to plain text using the ``html2text`` library.
Strips whitespace from the result.

Args:
    content: HTML content string from EverNote.

Returns:
    Plain text version of the content.

Raises:
    ImportError: If ``html2text`` is not installed.
r   NzCould not import `html2text`. Although it is not a required package to use LangChain, using the EverNote loader requires `html2text`. Please install `html2text` via `pip install html2text` and try again.)	html2textstripImportError)r   r2   es      r   _parse_contentEverNoteLoader._parse_contentv   sL     		&&w/5577 	X 		s   "% 
A ;A r   c                 >   0 nU  H  nUR                   S:X  ai  UR                  (       a  [        UR                  5      OSXR                   '   [        R                  " XR                      5      R                  5       US'   M|  UR                  XR                   '   M     U$ )a+  Parse resource elements from EverNote XML.

Extracts resource information like attachments, images, etc.
Base64 decodes data elements and generates MD5 hashes.

Args:
    resource: List of XML elements representing a resource.

Returns:
    Dictionary containing resource metadata and decoded data.
datar   hash)tagtextr   hashlibmd5	hexdigest)r   rsc_dictelems      r   _parse_resourceEverNoteLoader._parse_resource   sv     $&Dxx6!=AYYYtyy%9C"#*;;x/A#B#L#L#N %)YY"  r   Nr&   prefixc                   ^ 0 n/ nS[         S[         4U4S jjnU  GH%  nUR                  S:X  a<  [        R                  UR                  5      X%R                  '   UR                  US'   MP  UR                  S:X  a&  UR                  [        R                  U5      5        M  UR                  S:X  d  UR                  S:X  a$  [        UR                  S	5      X%R                  '   M  UR                  S
:X  a3  [        R                  XUR                  5      nUR                  U5        GM  UR                  X%R                  '   GM(     [        U5      S:  a  X2S'   UR                  5        VVs0 sH  u  pxU" U5      U_M     snn$ s  snnf )aQ  Parse a note element from EverNote XML.

Extracts note content, metadata, resources, and attributes.
Handles nested note-attributes recursively with prefixes.

Args:
    note: List of XML elements representing a note.
    prefix: Optional prefix for nested attribute names.

Returns:
    Dictionary containing note content and metadata.
element_tagr   c                    > Tc  U $ T SU  3$ )N. )rF   rD   s    r   
add_prefix.EverNoteLoader._parse_note.<locals>.add_prefix   s    ~""XQ{m,,r   r   r   r   createdupdatedz%Y%m%dT%H%M%SZznote-attributesr   )r   r;   r   r6   r<   appendrB   r   _parse_noteupdatelenr%   )	r&   rD   	note_dict	resourcesrJ   rA   additional_attributesr'   r(   s	    `       r   rO   EverNoteLoader._parse_note   s?    %'			-C 	-C 	-
 Dxx9$&4&C&CDII&N	((#+/99	-(Z'  !?!?!EFY&$((i*?&.tyy:J&K	((#..(6(B(B(()%   !67&*ii	((# " y>A$-j!9B9JK9J:3
3&9JKKKs   ,Fxml_filec           	   #      #     SSK Jn  UR                  U SSSSSS9nU H.  u  pEUR                  S	:X  d  M  [        R                  U5      v   M0     g! [         a  n[        R	                  S5        UeSnAff = f7f)
av  Parse EverNote XML file securely.

Uses ``lxml`` with secure parsing configuration to prevent XML vulnerabilities
including XXE attacks, XML bombs, and malformed XML exploitation.

Args:
    xml_file: Path to the EverNote export XML file.

Yields:
    Dictionary containing parsed note data for each note in the file.

Raises:
    ImportError: If ``lxml`` is not installed.
r   )etreezCould not import `lxml`. Although it is not a required package to use LangChain, using the EverNote loader requires `lxml`. Please install `lxml` via `pip install lxml` and try again.Nzutf-8FT)encodingresolve_entities
no_networkrecover	huge_treer&   )	lxmlrX   r4   loggererror	iterparser;   r   rO   )rV   rX   r5   contextactionrA   s         r   r#   EverNoteLoader._parse_note_xml   s      	" //" " 
 $LFxx6!$0066 $#  	LL?
 G	s,   A<A )A<A<
A9A44A99A<)r   r   )T)N)__name__
__module____qualname____firstlineno____doc__r   r   r   boolr   r   r   r)   r/   staticmethodr6   listdictrB   r	   r
   rO   r   r   r#   __static_attributes__rI   r   r   r   r      s    $L9%T	"2 9$ 9HX. 48H- &    4 $ 4  . )L$ )L )L )L )LV $7# $7(4S>*B $7 $7r   r   )ri   r=   loggingbase64r   pathlibr   timer   typingr   r   r   r	   r
   r   langchain_core.documentsr   )langchain_community.document_loaders.baser   	getLoggerre   r_   r   rI   r   r   <module>rw      sE         = = - @			8	$e7Z e7r   