
    dhj"                     r    S SK r S SKJr  S SKJr  S SKJrJrJrJ	r	J
r
Jr  S SKJr  S SKJr   " S S\5      rg)	    N)PathLike)Path)AnyCallableDictIteratorOptionalUnion)Document)
BaseLoaderc                       \ rS rSrSr     SS\\\4   S\S\\   S\\	   S\\
\\/\4      S	\	S
\	4S jjrS\\   4S jrS\S\S\\   4S jrS\S\4S jrS\\\4   S\S\\\4   4S jrS\SS4S jrSrg)
JSONLoader   a  
Load a `JSON` file using a `jq` schema.

Setup:
    .. code-block:: bash

        pip install -U jq

Instantiate:
    .. code-block:: python

        from langchain_community.document_loaders import JSONLoader
        import json
        from pathlib import Path

        file_path='./sample_quiz.json'
        data = json.loads(Path(file_path).read_text())
        loader = JSONLoader(
                 file_path=file_path,
                 jq_schema='.quiz',
                 text_content=False)

Load:
    .. code-block:: python

        docs = loader.load()
        print(docs[0].page_content[:100])
        print(docs[0].metadata)

    .. code-block:: python

        {"sport": {"q1": {"question": "Which one is correct team name in
        NBA?", "options": ["New York Bulls"
        {'source': '/sample_quiz
        .json', 'seq_num': 1}

Async load:
    .. code-block:: python

        docs = await loader.aload()
        print(docs[0].page_content[:100])
        print(docs[0].metadata)

    .. code-block:: python

        {"sport": {"q1": {"question": "Which one is correct team name in
        NBA?", "options": ["New York Bulls"
        {'source': '/sample_quizg
        .json', 'seq_num': 1}

Lazy load:
    .. code-block:: python

        docs = []
        docs_lazy = loader.lazy_load()

        # async variant:
        # docs_lazy = await loader.alazy_load()

        for doc in docs_lazy:
            docs.append(doc)
        print(docs[0].page_content[:100])
        print(docs[0].metadata)

    .. code-block:: python

        {"sport": {"q1": {"question": "Which one is correct team name in
        NBA?", "options": ["New York Bulls"
        {'source': '/sample_quiz
        .json', 'seq_num': 1}
N	file_path	jq_schemacontent_keyis_content_key_jq_parsablemetadata_functext_content
json_linesc                      SSK nXl         [        U5      R                  5       U l        UR                  U5      U l        X@l        X0l        XPl	        X`l
        Xpl        g! [         a    [        S5      ef = f)a  Initialize the JSONLoader.

Args:
    file_path (Union[str, PathLike]): The path to the JSON or JSON Lines file.
    jq_schema (str): The jq schema to use to extract the data or text from
        the JSON.
    content_key (str): The key to use to extract the content from
        the JSON if the jq_schema results to a list of objects (dict).
        If is_content_key_jq_parsable is True, this has to be a jq compatible
        schema. If is_content_key_jq_parsable is False, this should be a simple
        string key.
    is_content_key_jq_parsable (bool): A flag to determine if
        content_key is parsable by jq or not. If True, content_key is
        treated as a jq schema and compiled accordingly. If False or if
        content_key is None, content_key is used as a simple string.
        Default is False.
    metadata_func (Callable[Dict, Dict]): A function that takes in the JSON
        object extracted by the jq_schema and the default metadata and returns
        a dict of the updated metadata.
    text_content (bool): Boolean flag to indicate whether the content is in
        string format, default to True.
    json_lines (bool): Boolean flag to indicate whether the input is in
        JSON Lines format.
r   Nz=jq package not found, please install it with `pip install jq`)jqImportErrorr   resolver   compile
_jq_schema_is_content_key_jq_parsable_content_key_metadata_func_text_content_json_lines)	selfr   r   r   r   r   r   r   r   s	            h/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_community/document_loaders/json_loader.py__init__JSONLoader.__init__T   su    D	G i002**Y/+E('+)%  	O 	s   
A A5returnc              #     #    SnU R                   (       af  U R                  R                  SS9 nU H=  nUR                  5       nU(       d  M  U R	                  X15       H  nUv   US-  nM     M?     SSS5        gU R	                  U R                  R                  SS9U5       H  nUv   US-  nM     g! , (       d  f       g= f7f)z-Load and return documents from the JSON file.r   z	utf-8-sig)encoding   N)r!   r   openstrip_parse	read_text)r"   indexflinedocs        r#   	lazy_loadJSONLoader.lazy_load   s     $$k$:aD::<Dt#';;t#;C"%I!QJE $<  ;: {{((+(> 	
	 ;:s#   -CB6%B63AC6
C Ccontentr.   c              #   P  #    U R                   R                  [        R                  " U5      5      nU R                  b  U R                  U5        [        X2S-   5       HC  u  pEU R                  US9nU R                  U[        U R                  5      US9n[        XgS9v   ME     g7f)z#Convert given content to documents.Nr)   )sample)r6   sourceseq_num)page_contentmetadata)r   inputjsonloadsr   _validate_content_key	enumerate	_get_text_get_metadatastrr   r   )r"   r4   r.   datair6   textr:   s           r#   r,   JSONLoader._parse   s     $$TZZ%89
 (&&t,"43IA>>>0D))c$..&91 * H @@ 4s   B$B&r6   c                 .   U R                   be  U R                  (       aE  U R                  R                  U R                   5      nUR	                  U5      R                  5       nOXR                      nOUnU R                  (       a0  [        U[        5      (       d  Ub  [        S[        U5       S35      e[        U[        5      (       a  U$ [        U[        [        45      (       a  U(       a  [        R                  " U5      $ S$ Ub  [        U5      $ S$ )zConvert sample to string formatz%Expected page_content is string, got z instead.                     Set `text_content=False` if the desired input for                     `page_content` is not a string )r   r   r   r   r;   firstr    
isinstancerB   
ValueErrortypedictlistr<   dumps)r"   r6   compiled_content_keyr4   s       r#   r@   JSONLoader._get_text   s    (//'+wwt7H7H'I$.44V<BBD !2!23Gj#&>&>7CV7W G3 4  %%N$..*14::g&9r9#*#63w<>B>    additional_fieldsc                     U R                   b@  U R                  X5      n[        U[        5      (       d  [        S[	        U5       S35      eU$ U$ )z
Return a metadata dictionary base on the existence of metadata_func
:param sample: single data payload
:param additional_fields: key-word arguments to be added as metadata values
:return:
zUExpected the metadata_func to return a dict but got                                 ``)r   rJ   rM   rK   rL   )r"   r6   rS   results       r#   rA   JSONLoader._get_metadata   s\     *((CFfd++ ""&v,q2  M$$rR   rC   c                    UR                  5       n[        U[        5      (       d  [        S[	        U5       S35      eU R
                  (       d5  UR                  U R                  5      c  [        SU R                   S35      eU R
                  (       a]  U R                  R                  U R                  5      R                  U5      R                  5       c  [        SU R                   S35      egg)zCheck if a content key is validztExpected the jq schema to result in a list of objects (dict),                     so sample must be a dict but got `rU   Nz_Expected the jq schema to result in a list of objects (dict)                     with the key `z ` which should be parsable by jq)rI   rJ   rM   rK   rL   r   getr   r   r   r;   rE   )r"   rC   r6   s      r#   r>    JSONLoader._validate_content_key   s     &$''77;F|nAG  00

4,,-5##'#4#4"5Q8 
 ,, 1 1288@EEGO##'#4#4"55UW  P -rR   )r   r   r   r!   r   r    r   r   )NFNTF)__name__
__module____qualname____firstlineno____doc__r
   rB   r   r	   boolr   r   r$   r   r   r2   intr,   r   r@   rA   r>   __static_attributes__ rR   r#   r   r      s   FX &*5:@D! 1&h'1& 1& c]	1&
 %-TN1&  $t); <=1& 1& 1&f8H- $Ac A# A(82D A"? ? ?4%38n%;>%	c3h%(# $ rR   r   )r<   osr   pathlibr   typingr   r   r   r   r	   r
   langchain_core.documentsr   )langchain_community.document_loaders.baser   r   rc   rR   r#   <module>ri      s(       A A - @f frR   