
    dhS                         S SK r S SKJr  S SKJr  S SKJrJrJrJ	r	J
r
JrJr  S SKJr  S SKJr  S SKJr  S SKJrJr   " S	 S
\5      r " S S\5      rg)    N)TextIOWrapper)Path)AnyDictIteratorListOptionalSequenceUnion)Document)
BaseLoader)detect_file_encodings)UnstructuredFileLoadervalidate_unstructured_versionc                       \ rS rSrSr     SSS.S\\\4   S\\   S\	\   S	\\
   S
\\   S\S\	\   4S jjjrS\\   4S jrS\S\\   4S jrSrg)	CSVLoader   a  Load a `CSV` file into a list of Documents.

Each document represents one row of the CSV file. Every row is converted
into a key/value pair and outputted to a new line in the document's
page_content.

The source for each document loaded from csv is set to the value of the
`file_path` argument for all documents by default.
You can override this by setting the `source_column` argument to the
name of a column in the CSV file.
The source of each document will then be set to the value of the column
with the name specified in `source_column`.

Output Example:
    .. code-block:: txt

        column1: value1
        column2: value2
        column3: value3

Instantiate:
    .. code-block:: python

        from langchain_community.document_loaders import CSVLoader

        loader = CSVLoader(file_path='./hw_200.csv',
            csv_args={
            'delimiter': ',',
            'quotechar': '"',
            'fieldnames': ['Index', 'Height', 'Weight']
        })

Load:
    .. code-block:: python

        docs = loader.load()
        print(docs[0].page_content[:100])
        print(docs[0].metadata)

    .. code-block:: python

        Index: Index
        Height: Height(Inches)"
        Weight: "Weight(Pounds)"
        {'source': './hw_200.csv', 'row': 0}

Async load:
    .. code-block:: python

        docs = await loader.aload()
        print(docs[0].page_content[:100])
        print(docs[0].metadata)

    .. code-block:: python

        Index: Index
        Height: Height(Inches)"
        Weight: "Weight(Pounds)"
        {'source': './hw_200.csv', 'row': 0}

Lazy load:
    .. code-block:: python

        docs = []
        docs_lazy = loader.lazy_load()

        # async variant:
        # docs_lazy = await loader.alazy_load()

        for doc in docs_lazy:
            docs.append(doc)
        print(docs[0].page_content[:100])
        print(docs[0].metadata)

    .. code-block:: python

        Index: Index
        Height: Height(Inches)"
        Weight: "Weight(Pounds)"
        {'source': './hw_200.csv', 'row': 0}
N )content_columns	file_pathsource_columnmetadata_columnscsv_argsencodingautodetect_encodingr   c                l    Xl         X l        X0l        XPl        U=(       d    0 U l        X`l        Xpl        g)a  

Args:
    file_path: The path to the CSV file.
    source_column: The name of the column in the CSV file to use as the source.
      Optional. Defaults to None.
    metadata_columns: A sequence of column names to use as metadata. Optional.
    csv_args: A dictionary of arguments to pass to the csv.DictReader.
      Optional. Defaults to None.
    encoding: The encoding of the CSV file. Optional. Defaults to None.
    autodetect_encoding: Whether to try to autodetect the file encoding.
    content_columns: A sequence of column names to use for the document content.
        If not present, use all columns that are not part of the metadata.
N)r   r   r   r   r   r   r   )selfr   r   r   r   r   r   r   s           g/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_community/document_loaders/csv_loader.py__init__CSVLoader.__init__c   s1    2 #* 0  B#6 .    returnc              #     #     [        U R                  SU R                  S9 nU R                  U5       S h  vN   S S S 5        g  N! , (       d  f       g = f! [         a  nU R
                  (       a  [        U R                  5      nU Hn  n [        U R                  SUR                  S9 nU R                  U5       S h  vN     S S S 5           S nAg ! , (       d  f       M\  = f! [         a     Ml  f = f    S nAg [        SU R                   35      UeS nAf[         a  n[        SU R                   35      UeS nAff = f7f)N )newliner   zError loading )	openr   r   _CSVLoader__read_fileUnicodeDecodeErrorr   r   RuntimeError	Exception)r   csvfileedetected_encodingsr   s        r   	lazy_loadCSVLoader.lazy_load   s    	Idnnb4==IW++G444 JI4 JI! 	M''%:4>>%J" 2H!! NNBARAR$'+'7'7'@@@!	   
 . ! ! !3 #^DNN3C#DE1L 	I/?@AqH	Is   E	A AAAA E	A
AA E	A 
E#+DC*.CC
C
C*DE	
C'"C*%D'C**
C84D7C88D=E	DE(EEE	r+   c              #     ^ #    [         R                  " U40 T R                  D6n[        U5       H  u  p4 T R                  b  UT R                     O[        T R                  5      nSR                  U 4S jUR                  5        5       5      nXSS.nT R                   H
  n XH   Xx'   M     [        XgS9v   M     g ! [         a    [        ST R                   S35      ef = f! [         a    [        SU S35      ef = f7f)NzSource column 'z' not found in CSV file.
c           	   3     >#    U H  u  pTR                   (       a  UTR                   ;   d  M(  OUTR                  ;  d  M;  Ub  UR                  5       OU S[        U[        5      (       a  UR                  5       O?[        U[
        5      (       a)  SR                  [        [        R                  U5      5      OU 3v   M     g 7f)Nz: ,)r   r   strip
isinstancestrlistjoinmap).0kvr   s      r   	<genexpr>(CSVLoader.__read_file.<locals>.<genexpr>   s        (DA ++ --- .$"7"77!-QWWYQ7r!!S)) GGI "!T** #cii"34:  (s   &CC BC)sourcerowzMetadata column ')page_contentmetadata)csv
DictReaderr   	enumerater   r6   r   KeyError
ValueErrorr8   itemsr   r   )	r   r+   
csv_readerir@   r?   contentrB   cols	   `        r   __read_fileCSVLoader.__read_file   s    ^^G=t}}=

+FA	 ))5 **+T^^,  ii    IIK  G #)3H,,X$'HHM -
 CCA ,   %d&8&8%99QR .   X$'8=U%VWWXs4   2D1B>'<D$C%*D>$C""D%C??D)r   r   r   r   r   r   r   )Nr   NNF)__name__
__module____qualname____firstlineno____doc__r   r6   r   r	   r
   r   boolr   r   r   r.   r   r'   __static_attributes__r   r!   r   r   r      s    Pj (,*,#'"&$)/ *,/d#/  }/ #3-	/
 4./ 3-/ "/ "#/BI8H- I*"D= "DXh5G "Dr!   r   c                   P   ^  \ rS rSrSr S
S\S\S\4U 4S jjjrS\4S jr	S	r
U =r$ )UnstructuredCSVLoader   aH  Load `CSV` files using `Unstructured`.

Like other
Unstructured loaders, UnstructuredCSVLoader can be used in both
"single" and "elements" mode. If you use the loader in "elements"
mode, the CSV file will be a single Unstructured Table element.
If you use the loader in "elements" mode, an HTML representation
of the table will be available in the "text_as_html" key in the
document metadata.

Examples
--------
from langchain_community.document_loaders.csv_loader import UnstructuredCSVLoader

loader = UnstructuredCSVLoader("stanley-cups.csv", mode="elements")
docs = loader.load()
r   modeunstructured_kwargsc                 <   > [        SS9  [        TU ]  " SXS.UD6  g)z

Args:
    file_path: The path to the CSV file.
    mode: The mode to use when loading the CSV file.
      Optional. Defaults to "single".
    **unstructured_kwargs: Keyword arguments to pass to unstructured.
z0.6.8)min_unstructured_version)r   rY   Nr   )r   superr   )r   r   rY   rZ   	__class__s       r   r   UnstructuredCSVLoader.__init__   s#     	&wGO9O;NOr!   r"   c                 J    SSK Jn  U" SSU R                  0U R                  D6$ )Nr   )partition_csvfilenamer   )unstructured.partition.csvra   r   rZ   )r   ra   s     r   _get_elements#UnstructuredCSVLoader._get_elements   s"    <QdnnQ8P8PQQr!   r   )single)rO   rP   rQ   rR   rS   r6   r   r   r   rd   rU   __classcell__)r^   s   @r   rW   rW      sH    & +3PP$'PKNP PRt R Rr!   rW   )rC   ior   pathlibr   typingr   r   r   r   r	   r
   r   langchain_core.documentsr   )langchain_community.document_loaders.baser   ,langchain_community.document_loaders.helpersr   1langchain_community.document_loaders.unstructuredr   r   r   rW   r   r!   r   <module>ro      sE    
   G G G - @ NkD
 kD\$R2 $Rr!   