
    dh                         S SK r S SKJrJrJrJrJrJr  S SKJ	r	  S SK
JrJr  \ R                  " \5      r " S S\5      rg)    N)AnyCallableDictIteratorListOptional)Document)	BaseModelmodel_validatorc                       \ rS rSr% SrSr\\S'   Sr\\S'   Sr	\
\S'   S	r\\\/\4      \S
'   \\S'   \" SS9\S\S\4S j5       5       rS\\   4S jrS\\   4S jrSrg	)TensorflowDatasets
   a  Access to the TensorFlow Datasets.

The Current implementation can work only with datasets that fit in a memory.

`TensorFlow Datasets` is a collection of datasets ready to use, with TensorFlow
or other Python ML frameworks, such as Jax. All datasets are exposed
as `tf.data.Datasets`.
To get started see the Guide: https://www.tensorflow.org/datasets/overview and
the list of datasets: https://www.tensorflow.org/datasets/catalog/
                                           overview#all_datasets

You have to provide the sample_to_document_function: a function that
   a sample from the dataset-specific format to the Document.

Attributes:
    dataset_name: the name of the dataset to load
    split_name: the name of the split to load. Defaults to "train".
    load_max_docs: a limit to the number of loaded documents. Defaults to 100.
    sample_to_document_function: a function that converts a dataset sample
      to a Document

Example:
    .. code-block:: python

        from langchain_community.utilities import TensorflowDatasets

        def mlqaen_example_to_document(example: dict) -> Document:
            return Document(
                page_content=decode_to_str(example["context"]),
                metadata={
                    "id": decode_to_str(example["id"]),
                    "title": decode_to_str(example["title"]),
                    "question": decode_to_str(example["question"]),
                    "answer": decode_to_str(example["answers"]["text"][0]),
                },
            )

        tsds_client = TensorflowDatasets(
                dataset_name="mlqa/en",
                split_name="train",
                load_max_docs=MAX_DOCS,
                sample_to_document_function=mlqaen_example_to_document,
            )

 dataset_nametrain
split_named   load_max_docsNsample_to_document_functiondatasetbefore)modevaluesreturnc                      SSK n SSKnUS   c  [        S5      eUR	                  US   US   S	9US
'   U$ ! [         a    [        S5      ef = f! [         a    [        S5      ef = f)z7Validate that the python package exists in environment.r   Nz\Could not import tensorflow python package. Please install it with `pip install tensorflow`.znCould not import tensorflow_datasets python package. Please install it with `pip install tensorflow-datasets`.r   zmsample_to_document_function is None. Please provide a function that converts a dataset sample to  a Document.r   r   )splitr   )
tensorflowImportErrortensorflow_datasets
ValueErrorload)clsr   r   r   s       i/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_community/utilities/tensorflow_datasets.pyvalidate_environment'TensorflowDatasets.validate_environment?   s    		& /08  
 044>"&*> 5 
y -  	C 	  	L 	s   7 A AA&c                 b   ^  U 4S jT R                   R                  T R                  5       5       $ )zIDownload a selected dataset lazily.

Returns: an iterator of Documents.

c              3   b   >#    U H%  nTR                   c  M  TR                  U5      v   M'     g 7f)N)r   ).0sselfs     r#   	<genexpr>/TensorflowDatasets.lazy_load.<locals>.<genexpr>c   s1      
:// 0D,,Q//:s   //)r   taker   r*   s   `r#   	lazy_loadTensorflowDatasets.lazy_load]   s*    
\\&&t'9'9:
 	
    c                 4    [        U R                  5       5      $ )z=Download a selected dataset.

Returns: a list of Documents.

)listr/   r.   s    r#   r!   TensorflowDatasets.loadi   s     DNN$%%r1    )__name__
__module____qualname____firstlineno____doc__r   str__annotations__r   r   intr   r   r   r   r	   r   r   classmethodr$   r   r/   r   r!   __static_attributes__r5   r1   r#   r   r   
   s    ,\ L#JM3HL(D683C*D!ELL(#$ 3   $8

8H- 

&d8n &r1   r   )loggingtypingr   r   r   r   r   r   langchain_core.documentsr	   pydanticr
   r   	getLoggerr6   loggerr   r5   r1   r#   <module>rF      s2     @ @ - /			8	$e& e&r1   