
    dh`                    b    S SK Jr  S SKJrJrJrJrJr  \(       a
  S SKJ	r	J
r
Jr   " S S5      rg)    )annotations)TYPE_CHECKINGAnyIterableListOptional)	DataFrameRowSparkSessionc                      \ rS rSrSr      S           SS jjr\ S       SS jj5       rSS jrSS jr	SS jr
SSS	 jjrSS
 jrSS jrSS jrSSS jjrSSS jjrSSS jjrSrg)SparkSQL	   z;SparkSQL is a utility class for interacting with Spark SQL.Nc                    SSK Jn  U(       a  UOUR                  R	                  5       U l        Ub%  U R
                  R                  R                  U5        Ub%  U R
                  R                  R                  U5        [        U R                  5       5      U l        U(       a  [        U5      O	[        5       U l        U R                  (       a/  U R                  U R                  -
  nU(       a  [        SU S35      eU(       a  [        U5      O	[        5       U l        U R                  (       a/  U R                  U R                  -
  nU(       a  [        SU S35      eU R                  5       n	U	(       a  [        U	5      OU R                  U l        [#        U[$        5      (       d  ['        S5      eX`l        g! [         a    [        S5      ef = f)	aQ  Initialize a SparkSQL object.

Args:
    spark_session: A SparkSession object.
      If not provided, one will be created.
    catalog: The catalog to use.
      If not provided, the default catalog will be used.
    schema: The schema to use.
      If not provided, the default schema will be used.
    ignore_tables: A list of tables to ignore.
      If not provided, all tables will be used.
    include_tables: A list of tables to include.
      If not provided, all tables will be used.
    sample_rows_in_table_info: The number of rows to include in the table info.
      Defaults to 3.
r   r   Fpyspark is not installed. Please install it with `pip install pyspark`Nzinclude_tables  not found in databasezignore_tables z,sample_rows_in_table_info must be an integer)pyspark.sqlr   ImportErrorbuildergetOrCreate_sparkcatalogsetCurrentCatalogsetCurrentDatabaseset_get_all_table_names_all_tables_include_tables
ValueError_ignore_tablesget_usable_table_names_usable_tables
isinstanceint	TypeError_sample_rows_in_table_info)
selfspark_sessionr   schemaignore_tablesinclude_tablessample_rows_in_table_infor   missing_tablesusable_tabless
             _/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_community/utilities/spark_sql.py__init__SparkSQL.__init__   s   2	0 +M0D0D0P0P0R 	 KK11':KK226:t88:;6Ds>2#%!11D4D4DDN %n%55KL  5Bc-0su!0043C3CCN $^$44JK  3354Ac-0tGWGW3S99JKK*C'E  	X 	s   F: :Gc                     SSK Jn  UR                  R	                  U5      R                  5       nU " U40 UD6$ ! [         a    [        S5      ef = f)zjCreating a remote Spark Session via Spark connect.
For example: SparkSQL.from_uri("sc://localhost:15002")
r   r   r   )r   r   r   r   remoter   )clsdatabase_uriengine_argskwargsr   sparks         r/   from_uriSparkSQL.from_uriK   s\    	0 $$++L9EEG5#F##  	X 	s	   : Ac                    U R                   (       a  U R                   $ [        U R                  U R                  -
  5      $ )zGet names of tables available.)r   sortedr   r    )r'   s    r/   r!   SparkSQL.get_usable_table_names\   s3    '''d&&)<)<<==    c                    U R                   R                  S5      R                  S5      R                  5       n[	        [        S U5      5      $ )NzSHOW TABLES	tableNamec                    U R                   $ N)r@   )rows    r/   <lambda>/SparkSQL._get_all_table_names.<locals>.<lambda>e   s    CMMr>   )r   sqlselectcollectlistmap)r'   rowss     r/   r   SparkSQL._get_all_table_namesc   s;    {{}-44[AIIKC14899r>   c                    U R                   R                  SU 35      R                  5       S   R                  nUR	                  S5      nUS U S-   $ )NzSHOW CREATE TABLE r   USING;)r   rF   rH   createtab_stmtfind)r'   table	statementusing_clause_indexs       r/   _get_create_table_stmtSparkSQL._get_create_table_stmtg   sX    KKOO089AACAFUU 	 '^^G4,,-33r>   c                r   U R                  5       nUb2  [        U5      R                  U5      nU(       a  [        SU S35      eUn/ nU HX  nU R	                  U5      nU R
                  (       a"  US-  nUSU R                  U5       S3-  nUS-  nUR                  U5        MZ     SR                  U5      nU$ )Nztable_names r   z

/*
z*/z

)	r!   r   
differencer   rU   r&   _get_sample_spark_rowsappendjoin)r'   table_namesall_table_namesr-   tables
table_name
table_info	final_strs           r/   get_table_infoSparkSQL.get_table_infoo   s    557" -88IN </??U!VWW)O)J44Z@J..h&
4#>#>z#J"K2NN
d"
MM*% * KK'	r>   c                   SU SU R                    3nU R                  R                  U5      nSR                  [	        [        S UR                  R                  5      5      5      n U R                  U5      nSR                  U Vs/ sH  nSR                  U5      PM     sn5      nU R                    SU SU SU 3$ s  snf ! [         a    Sn N*f = f)	NzSELECT * FROM z LIMIT 	c                    U R                   $ rB   )name)fs    r/   rD   1SparkSQL._get_sample_spark_rows.<locals>.<lambda>   s    166r>   rX    z rows from z table:
)
r&   r   rF   r\   rI   rJ   r)   fields_get_dataframe_results	Exception)r'   rR   querydfcolumns_strsample_rowsrC   sample_rows_strs           r/   rZ   SparkSQL._get_sample_spark_rows   s     wt/N/N.OP[[__U#iiS)9299;K;K%L MN	!55b9K"ii;(O;C3;(OPO
 ../{5'm2!	
	 )P 	! O	!s$   ( C C "C  C CCc                l    [        [        [        UR                  5       R	                  5       5      5      $ rB   )tuplerJ   strasDictvalues)r'   rC   s     r/   _convert_row_as_tupleSparkSQL._convert_row_as_tuple   s#    Scjjl113455r>   c                \    [        [        U R                  UR                  5       5      5      $ rB   )rI   rJ   rz   rH   )r'   rp   s     r/   rm   SparkSQL._get_dataframe_results   s     C22BJJLABBr>   c                    U R                   R                  U5      nUS:X  a  UR                  S5      n[        U R	                  U5      5      $ )None   )r   rF   limitrw   rm   )r'   commandfetchrp   s       r/   runSparkSQL.run   s>    [[__W%E>!B4..r233r>   c                `     U R                  U5      $ ! [         a  n SU 3s SnA$ SnAff = f)a6  Get information about specified tables.

Follows best practices as specified in: Rajkumar et al, 2022
(https://arxiv.org/abs/2204.00498)

If `sample_rows_in_table_info`, the specified number of sample rows will be
appended to each table description. This can increase performance as
demonstrated in the paper.
Error: N)rc   r   )r'   r]   es      r/   get_table_info_no_throw SparkSQL.get_table_info_no_throw   s7    	!&&{33 	!*QC= 	!    
-(--c                `     U R                  X5      $ ! [         a  n SU 3s SnA$ SnAff = f)a
  Execute a SQL command and return a string representing the results.

If the statement returns rows, a string of the results is returned.
If the statement returns no rows, an empty string is returned.

If the statement throws an error, the error message is returned.
r   N)r   rn   )r'   r   r   r   s       r/   run_no_throwSparkSQL.run_no_throw   s5    	!88G++ 	!*QC= 	!r   )r   r    r   r&   r   r"   )NNNNN   )r(   zOptional[SparkSession]r   Optional[str]r)   r   r*   Optional[List[str]]r+   r   r,   r$   rB   )r5   rw   r6   zOptional[dict]r7   r   returnr   )r   zIterable[str])rR   rw   r   rw   )r]   r   r   rw   )rC   r
   r   rv   )rp   r	   r   rI   )all)r   rw   r   rw   r   rw   )__name__
__module____qualname____firstlineno____doc__r0   classmethodr9   r!   r   rU   rc   rZ   rz   rm   r   r   r   __static_attributes__ r>   r/   r   r   	   s    E 15!% $-1.2)*=D-=D =D 	=D
 +=D ,=D $'=D~ >B$$-;$NQ$	$ $ >:4$
"6C4! ! !r>   r   N)
__future__r   typingr   r   r   r   r   r   r	   r
   r   r   r   r>   r/   <module>r      s#    " ? ?88q! q!r>   