
    dh                         S SK r S SKrS SKJr  S SKJrJrJrJrJ	r	  S SK
Jr  S SKJr  S SKJr  \ R                   " \5      r " S S\5      rg)	    N)Path)AnyIteratorListMappingOptional)Document)
BaseLoader)BibtexparserWrapperc                       \ rS rSrSrSSSSSS.S\S	\\   S
\\   S\\   S\	S\4S jjr
S\\\4   S\\   4S jrS\\   4S jrSrg)BibtexLoader   a  Load a `bibtex` file.

Each document represents one entry from the bibtex file.

If a PDF file is present in the `file` bibtex field, the original PDF
is loaded into the document text. If no such file entry is present,
the `abstract` field is used instead.
Ni  Fz
[^:]+\.pdf)parsermax_docsmax_content_charsload_extra_metadatafile_pattern	file_pathr   r   r   r   r   c                    Xl         U=(       d
    [        5       U l        X0l        X@l        XPl        [        R                  " U5      U l        g)a  Initialize the BibtexLoader.

Args:
    file_path: Path to the bibtex file.
    parser: The parser to use. If None, a default parser is used.
    max_docs: Max number of associated documents to load. Use -1 means
                   no limit.
    max_content_chars: Maximum number of characters to load from the PDF.
    load_extra_metadata: Whether to load extra metadata from the PDF.
    file_pattern: Regex pattern to match the file name in the bibtex.
N)	r   r   r   r   r   r   recompile
file_regex)selfr   r   r   r   r   r   s          c/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_community/document_loaders/bibtex.py__init__BibtexLoader.__init__   s:    * #5 3 5 !2#6 **\2    entryreturnc                    SS K n[        U R                  5      R                  nU R                  R                  UR                  SS5      5      nU(       d  g / nU H8  n UR                  X6-  5       nUR                  S U 5       5        S S S 5        M:     SR                  U5      =(       d    UR                  SS5      n	U R                  (       a  U	S U R                   n	U R                  R                  XR                   S9n
[#        U	U
S9$ ! , (       d  f       M  = f! [         a   n[        R                  U5         S nAM  S nAff = f)	Nr   file c              3   >   #    U H  oR                  5       v   M     g 7f)N)get_text).0pages     r   	<genexpr>+BibtexLoader._load_entry.<locals>.<genexpr>@   s      ?QTQs   
abstract)
load_extra)page_contentmetadata)fitzr   r   parentr   findallgetopenextendFileNotFoundErrorloggerdebugjoinr   r   get_metadatar   r	   )r   r   r.   
parent_dir
file_namestexts	file_namefecontentr-   s              r   _load_entryBibtexLoader._load_entry4   s   $..)00
__,,UYYvr-BC
#I YYz56!LL ?Q ?? 76 $ ))E"?eii
B&?!!6 6 67G;;++E>V>V+W 
 	
 76$  Q s6   D 2DD 
D	D D  
E
*EE
c              #      #     SSK nU R                  R                  U R                  5      nU R
                  (       a  USU R
                   nU H!  nU R                  U5      nU(       d  M  Uv   M#     g! [         a    [        S5      ef = f7f)zLoad bibtex file using bibtexparser and get the article texts plus the
article metadata.
See https://bibtexparser.readthedocs.io/en/master/

Returns:
    a list of documents with the document.page_content in text format
r   NzGPyMuPDF package not found, please install it with `pip install pymupdf`)r.   ImportErrorr   load_bibtex_entriesr   r   r@   )r   r.   entriesr   docs        r   	lazy_loadBibtexLoader.lazy_loadL   s     	 ++11$..A==o.GE""5)Cs	   	( 	s"   BA5 A B,	B5BB)r   r   r   r   r   r   )__name__
__module____qualname____firstlineno____doc__strr   r   intboolr   r   r   r	   r@   r   rG   __static_attributes__ r   r   r   r      s     15"&+0$))33 ,-	3
 3-3 $C=3 "3 38
c!2 
x7I 
08H- r   r   )loggingr   pathlibr   typingr   r   r   r   r   langchain_core.documentsr	   )langchain_community.document_loaders.baser
   $langchain_community.utilities.bibtexr   	getLoggerrI   r5   r   rR   r   r   <module>rZ      s;     	  9 9 - @ D			8	$T: Tr   