
    dh                        S SK Jr  S SKrS SKrS SKrS SKrS SKrS SKrS SKJ	r	  S SK
Jr  S SKJrJrJrJrJrJrJrJrJr  S SKJr  S SKJr  S SKJr  \(       a  S S	KJr  S S
KJr    " S S\5      r!g)    )annotationsN)contextmanager)StringIO)	TYPE_CHECKINGAnyDict	GeneratorIterableListOptionalTupleType)
Embeddings)VectorStore)Document)
connection)cursorc                     \ rS rSrSr " S S\\R                  5      r " S S5      r	SSSS	.             S&S
 jjr
 " S S5      rS'S jrS'S jr  S(       S)S jjr S*       S+S jjrS,S jr S*       S-S jjr        S.S jr\     S/                   S0S jj5       r  S(       S1S jjr S2       S3S jjrS4S jr S5       S6S jjr S5       S7S jjr S5       S8S jjr S5       S9S jjr S*     S:S jjr        S;S jrS<S jrS'S jrS'S  jr S=S! jr!S=S" jr"      S>S# jr#S?S$ jr$S%r%g)@Yellowbrick!   zYellowbrick as a vector database.
Example:
    .. code-block:: python
        from langchain_community.vectorstores import Yellowbrick
        from langchain_community.embeddings.openai import OpenAIEmbeddings
        ...
c                       \ rS rSrSrSrSrSrg)Yellowbrick.IndexType*   z<Enumerator for the supported Index types within Yellowbrick.nonelsh N)__name__
__module____qualname____firstlineno____doc__NONELSH__static_attributes__r       d/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_community/vectorstores/yellowbrick.py	IndexTyper   *   s    Jr%   r'   c                  >    \ rS rSrSr  S   SS jjrS	S
S jjrSrg)Yellowbrick.IndexParams0   z/Parameters for configuring a Yellowbrick index.Nc                j    Uc  [         R                  R                  nXl        U=(       d    0 U l        g N)r   r'   r"   
index_typeparams)selfr-   r.   s      r&   __init__ Yellowbrick.IndexParams.__init__3   s+    
 !(2277
(O ,BDKr%   c                8    U R                   R                  X5      $ r,   )r.   get)r/   keydefaults      r&   	get_param!Yellowbrick.IndexParams.get_param=   s    ;;??300r%   )r-   r.   NN)r-   z!Optional['Yellowbrick.IndexType']r.   zOptional[Dict[str, Any]]r,   )r4   strr5   r   returnr   )r   r   r   r    r!   r0   r6   r$   r   r%   r&   IndexParamsr)   0   s/    = =A/3	'9	' -	'	1 	1r%   r;   NF)schemaloggerdropc                  SSK Jn  UR                  5         U(       a  XPl        O[        R
                  " [        5      U l        U R                  R                  [        R                  5        [        R                  " 5       nUR                  [        R                  5        [        R                  " S5      n	UR                  U	5        U R                  R                  U5        [        U[        5      (       d  U R                  R!                  S5        gSU l        SU l        SU l        X l        [*        R-                  X R                  5      U l        [0        R2                  " U R.                  R4                  5        X@l        X0l        Xl        SU l        U R?                  5         U R.                  RA                  5        n
U(       ah  U RC                  U R8                  U R6                  U
S	9  U RC                  U R8                  U R&                  -   U R6                  U
S	9  U RE                  U
5        U RG                  U
5        U RI                  U
5        SSS5        g! , (       d  f       g= f)
zInitialize with yellowbrick client.
Args:
    embedding: Embedding operator
    connection_string: Format 'postgres://username:password@host:port/database'
    table: Table used to store / retrieve embeddings from
r   )extrasz)%(asctime)s - %(levelname)s - %(message)sz+embeddings input must be Embeddings object.N
_lsh_index_lsh_hyperplane_content)tabler<   r   )%psycopg2r@   register_uuidr=   logging	getLoggerr   setLevelERRORStreamHandlerDEBUG	FormattersetFormatter
addHandler
isinstancer   errorLSH_INDEX_TABLELSH_HYPERPLANE_TABLECONTENT_TABLEconnection_stringr   DatabaseConnectionr   atexitregisterclose_connection_schema_table
_embedding_max_embedding_len_check_database_utf8
get_cursorr>   _drop_lsh_index_tables_create_schema_create_table)r/   	embeddingrU   rD   r<   r=   r>   r@   handler	formatterr   s              r&   r0   Yellowbrick.__init__@   s     	$ K!++H5DKKK  /++-GW]]+))*UVI  +KK""7+)Z00KKKL$0):!",!2%889JKKX889#"&!!#__'')V		DLL	P		++(:(::<<!  
 ++F3'v& *))s    BI
I)c                     ^  \ rS rSr% SrS\S'   SrS\S'   S\S'         SU 4S	 jjrSS
 jrSS jr	\
SS j5       r\
SS j5       rSrU =r$ )Yellowbrick.DatabaseConnection~   Nr9   _connection_stringzOptional['PgConnection']_connectionlogging.Logger_loggerc                   > U R                   c4  [        TU ]	  U 5      U l         XR                   l        X R                   l        U R                   $ r,   )	_instancesuper__new__rj   rm   )clsrU   r=   	__class__s      r&   rq   &Yellowbrick.DatabaseConnection.__new__   s=     }}$ % 43D0(.%== r%   c                    U R                   (       a>  U R                   R                  (       d"  U R                   R                  5         S U l         g g g r,   )rk   closedclose)r/   s    r&   rY   /Yellowbrick.DatabaseConnection.close_connection   s<    (8(8(?(?  &&(#'  )@r%   c                    SS K nU R                  (       a  U R                  R                  (       a1  UR                  U R                  5      U l        SU R                  l        U R                  $ )Nr   F)rE   rk   rv   connectrj   
autocommit)r/   rE   s     r&   get_connection-Yellowbrick.DatabaseConnection.get_connection   sO    ##t'7'7'>'>#+#3#3D4K4K#L .3  +###r%   c              #     #    SSK Jn  U R                  5       n Uv   UR                  5         g ! U a;  nUR	                  5         U R
                  R                  SSS9  [        S5      UeS nAff = f7f)Nr   )DatabaseErrorz2Database error occurred, rolling back transaction.T)exc_infozDatabase transaction failed.)rE   r   r|   commitrollbackrm   rQ   RuntimeError)r/   r   connes       r&   get_managed_connection5Yellowbrick.DatabaseConnection.get_managed_connection   st     .&&(D	
  ! J""HSW #  ##ABIJs$   A3/ A3A06A++A00A3c              #     #    U R                  5        nUR                  5       n Uv   UR                  5          S S S 5        g ! UR                  5         f = f! , (       d  f       g = f7fr,   )r   r   rw   )r/   r   r   s      r&   r_   )Yellowbrick.DatabaseConnection.get_cursor   sN     ,,.$# LLLN /.
 LLN /.s1   A)AAA	A)AA
A&"A))rk   )rU   r9   r=   rl   r:   z 'Yellowbrick.DatabaseConnection'r:   None)r:   z'PgConnection')r:   z%Generator['PgConnection', None, None])r:   z!Generator['PgCursor', None, None])r   r   r   r    ro   __annotations__rk   rq   rY   r|   r   r   r_   r$   __classcell__)rs   s   @r&   rV   rh   ~   sn    	04-4	!$'	!1?	!-	!	(
	$ 
	 
	 
	# 
	#r%   rV   c                    SSK Jn  U R                  (       aG  UR                  UR	                  S5      R                  UR                  U R                  5      S95        gg)z.
Helper function: create schema if not exists
r   sqlzE
                    CREATE SCHEMA IF NOT EXISTS {s}
                )sN)rE   r   rZ   executeSQLformat
Identifier)r/   r   r   s      r&   ra   Yellowbrick._create_schema   sQ     	!<<NN &nnT\\2   r%   c           	     6   SSK Jn  U R                  (       a  U R                  4OSnUR                  " / UQU R                  U R
                  -   P76 nUR                  U R                  U R
                  -   S-   5      nUR                  UR                  S5      R                  UUS95        U R                  (       a  U R                  4OSnUR                  " / UQU R                  P76 nUR                  " / UQU R                  U R
                  -   P76 nUR                  U R                  U R
                  -   S-   5      nUR                  U R                  U R
                  -   S-   5      n	UR                  UR                  S	5      R                  UUUU	S
95        g)z-
Helper function: create table if not exists
r   r   r   
_pk_doc_ida0  
                CREATE TABLE IF NOT EXISTS {t} (
                doc_id UUID NOT NULL,
                text VARCHAR(60000) NOT NULL,
                metadata VARCHAR(1024) NOT NULL,
                CONSTRAINT {c} PRIMARY KEY (doc_id))
                DISTRIBUTE ON (doc_id) SORT ON (doc_id)
            tc_pk_doc_id_embedding_id
_fk_doc_ida  
                CREATE TABLE IF NOT EXISTS {t1} (
                doc_id UUID NOT NULL,
                embedding_id SMALLINT NOT NULL,
                embedding FLOAT NOT NULL,
                CONSTRAINT {c1} PRIMARY KEY (doc_id, embedding_id),
                CONSTRAINT {c2} FOREIGN KEY (doc_id) REFERENCES {t2}(doc_id))
                DISTRIBUTE ON (doc_id) SORT ON (doc_id)
            t1t2c1c2N)	rE   r   rZ   r   r[   rT   r   r   r   )
r/   r   r   schema_prefixr   r   r   r   r   r   s
             r&   rb   Yellowbrick._create_table   sm    	!+/<<RNNLML4;;9K9K+KLNN4;;););;lJKGG	 f  	
  ,0<<R^^8]8DKK8^^M]MDKK$:L:L,LM^^KK$,,,/HH
 ^^DKK$*<*<<|KLGG
 f	  	
r%   c                    Uc4  U R                   R                  5        nU R                  X1US9  SSS5        gU R                  X1US9  g! , (       d  f       g= f)zp
Helper function: Drop data. If a cursor is provided, use it;
otherwise, obtain a new cursor for the operation.
N)r<   )r   r_   _drop_table)r/   rD   r<   r   s       r&   r>   Yellowbrick.drop   sT     >++-  v > .- V6: .-s   A		
Ac                    SSK Jn  U(       a  UR                  X25      nOUR                  U5      nUR                  S5      R	                  U5      nUR                  U5        g)z9
Executes the drop table command using the given cursor.
r   r   z1
        DROP TABLE IF EXISTS {} CASCADE
        N)rE   r   r   r   r   r   )r/   r   rD   r<   r   
table_namedrop_table_querys          r&   r   Yellowbrick._drop_table  sW     	!6J.J77
 &
	 	
 	'(r%   c                   U R                   R                  5        nSnUR                  U5        UR                  5       S   nSSS5        WR	                  5       S:X  d  UR	                  5       S:X  a  g[        S5      e! , (       d  f       NB= f)z5
Helper function: Test the database is UTF-8 encoded
z
                SELECT pg_encoding_to_char(encoding)
                FROM pg_database
                WHERE datname = current_database();
            r   Nutf8zutf-8TzDatabase encoding is not UTF-8)r   r_   r   fetchonelower	Exception)r/   r   queryencodings       r&   r^    Yellowbrick._check_database_utf8  sx     __'')VE
 NN5!(+H * >>v%)9W)D<== *)s   'A>>
Bc           
        Sn[        U5      nU R                  R                  [        U5      5      n/ nU(       d  U Vs/ sH  n0 PM     nnUR                  S5      =(       d    [        R                  5       nU R                  R                  5        n	[        5       n
[        5       n[        R                  " U
SS[        R                  S9n[        R                  " USS[        R                  S9nSn[        U5       H  u  nn[        [        R                  " 5       5      nUR!                  U5        UR#                  UU[$        R&                  " X/   5      /5        [        X_   5       H  u  nnUR#                  UUU/5        M     US-  nX:  d  M  U R)                  XU5        U
R+                  S5        U
R-                  S5        UR+                  S5        UR-                  S5        SnM     US:  a  U R)                  XU5        S S S 5        UR.                  [        R0                  R2                  :X  a&  U R5                  U[        R6                  " W5      5        U$ s  snf ! , (       d  f       Nc= f)Ni'  index_params	")	delimiter	quotecharquotingr      )listr\   embed_documentsr3   r   r;   r   r_   r   csvwriterQUOTE_MINIMAL	enumerater9   uuiduuid4appendwriterowjsondumps_copy_to_dbseektruncater-   r'   r#   _update_indexUUID)r/   texts	metadataskwargs
batch_size
embeddingsresults_r   r   
content_ioembeddings_iocontent_writerembeddings_writercurrent_batch_sizeitextdoc_uuidembedding_idrc   s                       r&   	add_textsYellowbrick.add_texts3  s    
U__44T%[A
%*+UUI+zz.1N[5L5L5N__'')V!J$JM ZZdc3CTCTN !$

cFWFW! "#$U+4tzz|,x(''4IL9Q(RS/8/G+L)%..,	/RS 0H #a'"%3$$VGOOA&''*!&&q)!**1-)*&% ,( "A%  ]CA *D ""k&;&;&?&??|TYYx-@AS , *)s   I C>I%A4I%%
I3c                "   UR                  S5        UR                  S5        SSKJn  U R                  (       a  U R                  4OSnUR                  " / UQU R
                  U R                  -   P76 nUR                  S5      R                  US9nUR                  Xr5        U R                  (       a  U R                  4OSnUR                  " / UQU R
                  P76 nUR                  S5      R                  US9nUR                  X5        g )Nr   r   r   z
            COPY {table} (doc_id, text, metadata) FROM 
            STDIN WITH (FORMAT CSV, DELIMITER E'\t', QUOTE '"')
        )rD   z
            COPY {table} (doc_id, embedding_id, embedding) FROM 
            STDIN WITH (FORMAT CSV, DELIMITER E'\t', QUOTE '"')
        )
r   rE   r   rZ   r   r[   rT   r   r   copy_expert)	r/   r   r   r   r   r   rD   content_copy_queryembeddings_copy_querys	            r&   r   Yellowbrick._copy_to_dbj  s     	1 +/<<RPPt{{T=O=O/OP WW

 &u&
 	 	-:+/<<R;;t{{; #!

 &u&
 	 	0@r%   c                B    U " UUUUUS9n	U	R                   " SXS.UD6  U	$ )aO  Add texts to the vectorstore index.
Args:
    texts: Iterable of strings to add to the vectorstore.
    metadatas: Optional list of metadatas associated with the texts.
    connection_string: URI to Yellowbrick instance
    embedding: Embedding function
    table: table to store embeddings
    kwargs: vectorstore specific parameters
)rc   rU   rD   r<   r>   )r   r   r   )r   )
rr   r   rc   r   rU   rD   r<   r>   r   vsss
             r&   
from_textsYellowbrick.from_texts  s8    * /
 	AEA&A
r%   c                  ^ SSK Jm  U(       a  TR                  S5      nObUbT  [        U4S jU 5       5      nTR                  S5      R	                  U5      nTR                  S5      R                  US9nO[        S	5      eU R                  (       a  U R                  4OS
nU R                  R                  5        nTR                  " / UQU R                  U R                  -   P76 n	TR                  S5      R                  XS9n
UR                  U
5        TR                  " / UQU R                  P76 n	TR                  S5      R                  XS9n
UR                  U
5        U R                  " XR                  U R                  -   /UQ76 (       aZ  TR                  " / UQU R                  U R                  -   P76 n	TR                  S5      R                  XS9n
UR                  U
5        SSS5        g! , (       d  f       g= f)z`Delete vectors by uuids.

Args:
    ids: List of ids to delete, where each id is a uuid string.
r   r   z'
                WHERE 1=1
            Nc              3  D   >#    U H  nTR                  U5      v   M     g 7fr,   )Literal).0idr   s     r&   	<genexpr>%Yellowbrick.delete.<locals>.<genexpr>  s     8Cb#++b//Cs    z, z5
                WHERE doc_id IN ({ids})
            )idsz*Either ids or delete_all must be provided.r   zDELETE FROM {table} {where_sql})rD   	where_sql)rE   r   r   tuplejoinr   
ValueErrorrZ   r   r_   r   r[   rT   r   _table_existsrR   )r/   r   
delete_allr   r   uuidsids_formattedr   r   table_identifierr   r   s              @r&   deleteYellowbrick.delete  s    	!I
 _8C88EGGDM..u5M f!  	  IJJ+/<<R__'')V"~~    $d.@.@ @  GG=>EE& F E NN5!"~~J}JdkkJGG=>EE& F E NN5!!!d&:&::=J  $'>> $"$$(KK$2F2F$F$   ABII* J  u%1 *4 5 *)4 s   =D1G77
Hc                    SSK Jn  UR                  U5      nUR                  U5      nUR                  UR	                  S5      R                  UUS95        UR                  5       S   S:  $ )z.
Checks if a table exists in the given schema
r   r   z
                SELECT COUNT(*)
                FROM sys.table t INNER JOIN sys.schema s ON t.schema_id = s.schema_id
                WHERE s.name = {schema} AND t.name = {table_name}
            )r<   r   )rE   r   r   r   r   r   r   )r/   r   r   r<   r   s        r&   r   Yellowbrick._table_exists  sp     	!V$[[,
GG f%  	
  #a''r%   c                    SS K nSR                  [        [        U5      5      nUR	                  UR                  5       5      nUR                  5       n[        R                  " US S S9nU$ )Nr   ,   )bytes)	hashlibr   mapr9   sha1encodedigestr   r   )r/   vectorr  
vector_strhash_objecthash_digestvector_uuids          r&   _generate_vector_uuid!Yellowbrick._generate_vector_uuid  sZ    XXc#v./
ll:#4#4#67!((*iik#2&67r%   c                   SSK Jn  SSKJn  UR	                  S5      =(       d    [
        R                  5       nU R                  R                  5        nSU R                  -   nU R                  U5      n	UR                  S5      R                  UR                  U5      5      n
UR                  U
5        [        U5       VVs/ sH  u  p[!        U	5      X4PM     nnnUR                  S5      R                  UR                  U5      5      nU" X~U5        UR                  U5      nU R"                  (       a  U R"                  4OSnUR                  " / UQU R                  P76 nUR                  " / UQU R                  U R$                  -   P76 nUR&                  [
        R(                  R*                  :X  a  U R                  S	-   nU R-                  UUU5        U R"                  (       a  U R"                  4OSnUR                  " / UQU R                  U R.                  -   P76 nUR                  U5      nUR                  S
5      R                  UUUUUUR1                  UR3                  SS5      5      S9nUR                  UU45        UR5                  5       nOCUR                  S5      R                  UUUS9nUR                  UU45        UR5                  5       nSSS5        / nW HH  n[6        R8                  " US   5      =(       d    0 n[;        US   US9nUR=                  UUS   45        MJ     U$ s  snnf ! , (       d  f       Nf= f)aR  Perform a similarity search with Yellowbrick with vector

Args:
    embedding (List[float]): query embedding
    k (int, optional): Top K neighbors to retrieve. Defaults to 4.

    NOTE: Please do not let end-user fill this and always be aware
          of SQL injection.

Returns:
    List[Document, float]: List of Documents and scores
r   r   )execute_valuesr   tmp_z 
                CREATE TEMPORARY TABLE {} (
                doc_id UUID,
                embedding_id SMALLINT,
                embedding FLOAT)
                ON COMMIT DROP
                DISTRIBUTE REPLICATE
            z:INSERT INTO {} (doc_id, embedding_id, embedding) VALUES %sr   	_tmp_hasha/  
                    WITH index_docs AS (
                    SELECT
                        t1.doc_id,
                        SUM(ABS(t1.hash-t2.hash)) as hamming_distance
                    FROM
                        {lsh_index} t1
                    INNER JOIN
                        {input_hash_table} t2
                    ON t1.hash_index = t2.hash_index
                    GROUP BY t1.doc_id
                    HAVING hamming_distance <= {hamming_distance}
                    )
                    SELECT
                        text,
                        metadata,
                       SUM(v1.embedding * v2.embedding) /
                        (SQRT(SUM(v1.embedding * v1.embedding)) *
                       SQRT(SUM(v2.embedding * v2.embedding))) AS score
                    FROM
                        {v1} v1
                    INNER JOIN
                        {v2} v2
                    ON v1.embedding_id = v2.embedding_id
                    INNER JOIN
                        {v3} v3
                    ON v2.doc_id = v3.doc_id
                    INNER JOIN
                        index_docs v4
                    ON v2.doc_id = v4.doc_id
                    GROUP BY v3.doc_id, v3.text, v3.metadata
                    ORDER BY score DESC
                    LIMIT %s
                hamming_distance)v1v2v3	lsh_indexinput_hash_tabler  a  
                    SELECT 
                        text,
                        metadata,
                        score
                    FROM
                        (SELECT
                            v2.doc_id doc_id,
                            SUM(v1.embedding * v2.embedding) /
                            (SQRT(SUM(v1.embedding * v1.embedding)) *
                            SQRT(SUM(v2.embedding * v2.embedding))) AS score
                        FROM
                            {v1} v1
                        INNER JOIN
                            {v2} v2
                        ON v1.embedding_id = v2.embedding_id
                        GROUP BY v2.doc_id
                        ORDER BY score DESC LIMIT %s
                        ) v4
                    INNER JOIN
                        {v3} v3
                    ON v4.doc_id = v3.doc_id
                    ORDER BY score DESC
                )r  r  r  Nr   )page_contentmetadata   )rE   r   psycopg2.extrasr  r3   r   r;   r   r_   r[   r  r   r   r   r   r   r9   rZ   rT   r-   r'   r#   _generate_tmp_lsh_hashesrR   r   r6   fetchallr   loadsr   r   )r/   rc   kr   r   r  r   r   tmp_embeddings_table
tmp_doc_idcreate_table_queryr   embedding_value
data_inputinsert_queryr  r   r  r  tmp_hash_tabler  r  	sql_queryr   	documentsresultr  docs                               r&   &similarity_search_with_score_by_vector2Yellowbrick.similarity_search_with_score_by_vector  s#    	!2zz.1N[5L5L5N__'')V#)DKK#7 33I>J!$	" fS^^$89:  NN-. 6?y5I5I1L Z,@5I   77LfS^^$89:  6< 45B/3||T\\OM<<<BQQd>P>P0PQB&&+*?*?*C*CC!%{!:--(" 48<<RNN "$(KK$2F2F$F	 $'>>.#A GG!#F &'%5%([[$../A1E&  	G Z D !//+GG2 &  3 < y1$/ //+u *x 35	Fzz&),2Hq	HECc6!9-. 
 i *)s    A/L>L8HL>8L>>
Mc                    U R                   R                  U5      nU R                  " SXBS.UD6nU VVs/ sH  u  pgUPM	     snn$ s  snnf )a%  Perform a similarity search with Yellowbrick

Args:
    query (str): query string
    k (int, optional): Top K neighbors to retrieve. Defaults to 4.

    NOTE: Please do not let end-user fill this and always be aware
          of SQL injection.

Returns:
    List[Document]: List of Documents
rc   r  r   r\   embed_queryr*  )r/   r   r  r   rc   r'  r)  r   s           r&   similarity_searchYellowbrick.similarity_search  sT     OO//6	?? 

(.
	 #,,)),,,s   Ac                d    U R                   R                  U5      nU R                  " SXBS.UD6nU$ )a2  Perform a similarity search with Yellowbrick

Args:
    query (str): query string
    k (int, optional): Top K neighbors to retrieve. Defaults to 4.

    NOTE: Please do not let end-user fill this and always be aware
          of SQL injection.

Returns:
    List[Document]: List of (Document, similarity)
r-  r   r.  )r/   r   r  r   rc   r'  s         r&   similarity_search_with_score(Yellowbrick.similarity_search_with_score  s@     OO//6	?? 

(.
	 r%   c                b    U R                   " SXS.UD6nU VVs/ sH  u  pVUPM	     snn$ s  snnf )a?  Perform a similarity search with Yellowbrick by vectors

Args:
    embedding (List[float]): query embedding
    k (int, optional): Top K neighbors to retrieve. Defaults to 4.

    NOTE: Please do not let end-user fill this and always be aware
          of SQL injection.

Returns:
    List[Document]: List of documents
r-  r   )r*  )r/   rc   r  r   r'  r)  r   s          r&   similarity_search_by_vector'Yellowbrick.similarity_search_by_vector  s@     ?? 

(.
	 #,,)),,,s   +c                   SSK Jn  U R                  (       a  U R                  4OSnUR                  " / UQU R                  U R
                  -   P76 nUR                  " / UQU R                  U R                  -   P76 nUR                  " / UQU R                  P76 nUR                  S5      R                  U5      nU(       a6  UR                  S5      R                  UR                  [        U5      5      S9OUR                  S5      n	UR                  S5      n
UR                  S	5      R                  UUUU	U
S
9nUR                  U5        g)zAdd hashes to LSH indexr   r   r   zINSERT INTO {}zWHERE e.doc_id = {doc_id})doc_id zGROUP BY 1, 2av  
            {query_prefix}
            SELECT
                e.doc_id as doc_id,
                h.id as hash_index,
                CASE WHEN SUM(e.embedding * h.hyperplane) > 0 THEN 1 ELSE 0 END as hash
            FROM {embedding_table} e
            INNER JOIN {hyperplanes} h ON e.embedding_id = h.hyperplane_id
            {condition}
            {group_by}
        )query_prefixembedding_tablehyperplanes	conditiongroup_byN)rE   r   rZ   r   r[   rS   rR   r   r   r   r9   r   )r/   r   r9  r   r   lsh_hyperplane_tablelsh_index_table_idembedding_table_idquery_prefix_idr>  r?  input_querys               r&   _update_lsh_hashesYellowbrick._update_lsh_hashes  sC    	!+/<<R"~~  
 
 KK$*C*CC 
 !^^ 

 KK$*>*>>
 !^^H]HDKKH''"23::;MN  GG/077s{{3v;?W7X 	
 77?+gg

 &(.,  
 	& 	{#r%   c                   SSK Jn  U R                  (       a  U R                  4OSnUR                  " / UQU R                  U R
                  -   P76 nUR                  U5      nUR                  U5      nUR                  S5      R                  U5      n	UR                  S5      n
UR                  S5      R                  U	UUU
S9nUR                  U5        g)	zGenerate temp LSHr   r   r   z+CREATE TEMPORARY TABLE {} ON COMMIT DROP ASz
GROUP BY 1a[  
            {query_prefix}
            SELECT
                h.id as hash_index,
                CASE WHEN SUM(e.embedding * h.hyperplane) > 0 THEN 1 ELSE 0 END as hash
            FROM {embedding_table} e
            INNER JOIN {hyperplanes} h ON e.embedding_id = h.hyperplane_id
            {group_by}
            DISTRIBUTE REPLICATE
        )r;  r<  r=  r?  N)	rE   r   rZ   r   r[   rS   r   r   r   )r/   r   tmp_embedding_tabler%  r   r   r@  tmp_embedding_table_idtmp_hash_table_idr;  r?  rD  s               r&   r  $Yellowbrick._generate_tmp_lsh_hashes  s     	!+/<<R"~~  
 
 KK$*C*CC 
 "%0C!DNN>:wwLMTT
 77<(gg	
 &%2,	  
 	" 	{#r%   c                   SSK Jn  U R                  (       a  U R                  4OSnUR                  " / UQU R                  U R
                  -   P76 nUR                  UR                  S5      R                  US95        UR                  5       S   S:  a  gUR                  " / UQU R                  P76 nUR                  UR                  S5      R                  US95        UR                  5       S   nUS-  nUR                  S	5      R                  UR                  U5      UR                  U5      US
9nUR                  U5        g)z4Generate random hyperplanes and store in Yellowbrickr   r   r   zSELECT COUNT(*) FROM {t})r   Nz!SELECT MAX(embedding_id) FROM {t}r   a2  
            WITH parameters AS (
                SELECT {num_hyperplanes} AS num_hyperplanes,
                    {dims_per_hyperplane} AS dims_per_hyperplane
            )
            INSERT INTO {hyperplanes_table} (id, hyperplane_id, hyperplane)
                SELECT id, hyperplane_id, (random() * 2 - 1) AS hyperplane
                FROM
                (SELECT range-1 id FROM sys.rowgenerator
                    WHERE range BETWEEN 1 AND
                    (SELECT num_hyperplanes FROM parameters) AND
                    worker_lid = 0 AND thread_id = 0) a,
                (SELECT range-1 hyperplane_id FROM sys.rowgenerator
                    WHERE range BETWEEN 1 AND
                    (SELECT dims_per_hyperplane FROM parameters) AND
                    worker_lid = 0 AND thread_id = 0) b
        )num_hyperplanesdims_per_hyperplanehyperplanes_table)rE   r   rZ   r   r[   rS   r   r   r   r   r   )	r/   r   rM  r   r   rO  r   num_dimensionsr$  s	            r&   _populate_hyperplanes!Yellowbrick._populate_hyperplanes*  s-    +/<<RNN 

 KK$*C*CC
 	sww9:AADUAVW??Q!#NN7M74;;7swwBCJJQJOP*1-!ww
$ &KK8 #N ;/  
% 	. 	|$r%   c           	     P   SSK Jn  U R                  (       a  U R                  4OSnUR                  " / UQU R                  U R
                  -   P76 nUR                  " / UQU R                  U R                  -   P76 nUR                  U R                  U R
                  -   S-   5      nUR                  U R                  U R
                  -   S-   5      nUR                  UR                  S5      R                  UUUUS95        U R                  (       a  U R                  4OSnUR                  " / UQU R                  U R                  -   P76 nUR                  U R                  U R                  -   S-   5      n	UR                  UR                  S	5      R                  UU	S
95        g)z&Create LSH index and hyperplane tablesr   r   r   r   r   a  
                CREATE TABLE IF NOT EXISTS {t1} (
                doc_id UUID NOT NULL,
                hash_index SMALLINT NOT NULL,
                hash SMALLINT NOT NULL,
                CONSTRAINT {c1} PRIMARY KEY (doc_id, hash_index),
                CONSTRAINT {c2} FOREIGN KEY (doc_id) REFERENCES {t2}(doc_id))
                DISTRIBUTE ON (doc_id) SORT ON (doc_id)
            r   _pk_id_hp_ida2  
                CREATE TABLE IF NOT EXISTS {t} (
                id SMALLINT NOT NULL,
                hyperplane_id SMALLINT NOT NULL,
                hyperplane FLOAT NOT NULL,
                CONSTRAINT {c} PRIMARY KEY (id, hyperplane_id))
                DISTRIBUTE REPLICATE SORT ON (id)
            r   N)rE   r   rZ   r   r[   rR   rT   r   r   r   rS   )
r/   r   r   r   r   r   r   r   r   r   s
             r&   _create_lsh_index_tables$Yellowbrick._create_lsh_index_tablesT  sp    +/<<R^^O]ODKK$:N:N,NO^^M]MDKK$:L:L,LM^^DKK$*>*>>MN^^DKK$*>*>>MNGG
 f	  	
& ,0<<RNNSMS4;;9R9R+RSNN4;;)B)BB^STGG	 f  	
r%   c                    U R                  U R                  U R                  U R                  -   US9  U R                  U R                  U R                  U R                  -   US9  g)zDrop LSH index tables)r<   rD   r   N)r>   rZ   r[   rR   rS   )r/   r   s     r&   r`   "Yellowbrick._drop_lsh_index_tables  s]    		<<t{{T5I5I'IRX 	 	
 			<<++ 9 99 	 	
r%   c                f   UR                   [        R                  R                  :X  ax  U R                  R                  5        nU R                  U5        U R                  U5        U R                  X!R                  SS5      5        U R                  U5        SSS5        gg! , (       d  f       g= f)z"Create index from existing vectorsrM     N)r-   r   r'   r#   r   r_   r`   rU  rQ  r6   rE  r/   r   r   s      r&   create_indexYellowbrick.create_index  s    ""k&;&;&?&??++-++F3--f5**223DcJ ''/ .- @--s   AB""
B0c                    UR                   [        R                  R                  :X  a5  U R                  R                  5        nU R                  U5        SSS5        gg! , (       d  f       g= f)zDrop an indexN)r-   r   r'   r#   r   r_   r`   r[  s      r&   
drop_indexYellowbrick.drop_index  sQ    ""k&;&;&?&??++-++F3 .- @--   A
A-c                    UR                   [        R                  R                  :X  a5  U R                  R                  5        nU R                  X25        SSS5        gg! , (       d  f       g= f)zHUpdate an index with a new or modified embedding in the embeddings tableN)r-   r   r'   r#   r   r_   rE  )r/   r   r9  r   s       r&   r   Yellowbrick._update_index  sS     ""k&;&;&?&??++-''7 .- @--ra  c                   SSK Jn   U R                  R                  5        nU R                  (       a  U R                  4OSnUR
                  " / UQU R                  P76 nUR
                  " / UQU R                  S-   P76 nUR
                  " / UQU R                  U R                  -   P76 nUR                  S5      R                  UUS9nUR                  U5        U R                  U5        UR                  S5      R                  UUS9nUR                  U5        UR                  S5      R                  XeS9n	UR                  U	5        S S S 5        g ! , (       d  f       g = f! [         a  n
[        S	U
 35      U
eS n
A
ff = f)
Nr   r   r   _v1zALTER TABLE {t1} RENAME TO {t2})r   r   z
                    INSERT INTO {t1} (doc_id, embedding_id, embedding) 
                    SELECT id, embedding_id, embedding FROM {t2}
                z
                    INSERT INTO {t1} (doc_id, text, metadata) 
                    SELECT DISTINCT id, text, metadata FROM {t2}
                zFailed to migrate schema: )rE   r   r   r_   rZ   r   r[   rT   r   r   r   rb   r   r   )r/   r   r   r   r   old_embeddingscontentalter_table_queryr$  insert_content_queryr   s              r&   migrate_schema_v1_to_v2#Yellowbrick.migrate_schema_v1_to_v2  s    #	H++-37<<R ^^H]HDKKH
!$!T!Te@S!T.. "$(KK$2D2D$D %(GG,M$N$U$U!% %V %! 01""6*"ww 
 &!%    |,'*ww(
 &G&7 % 34A .--B  	H!;A3?@aG	Hs5   E' D+EE' 
E$ E' $E' '
F1F  F)
rT   rS   rR   r\   r]   rZ   r[   r   rU   r=   )rc   r   rU   r9   rD   r9   r<   Optional[str]r=   zOptional[logging.Logger]r>   boolr:   r   )r   
'PgCursor'r:   r   r8   )rD   r9   r<   rl  r   zOptional['PgCursor']r:   r   r,   )r   rn  rD   r9   r<   rl  r:   r   )r:   rm  )r   zIterable[str]r   Optional[List[dict]]r   r   r:   	List[str])r   rn  r   r   r   r   r:   r   )Nr:  	langchainpublicF)rr   zType[Yellowbrick]r   rp  rc   r   r   ro  rU   r9   rD   r9   r<   r9   r>   rm  r   r   r:   r   )r   zOptional[List[str]]r   zOptional[bool]r   r   r:   r   )rr  )r   rn  r   r9   r<   r9   r:   rm  )r  List[float]r:   	uuid.UUID)   )rc   rs  r  intr   r   r:   List[Tuple[Document, float]])r   r9   r  rv  r   r   r:   List[Document])r   r9   r  rv  r   r   r:   rw  )rc   rs  r  rv  r   r   r:   rx  )r   rn  r9  zOptional[uuid.UUID]r:   r   )r   rn  rH  r9   r%  r9   r:   r   )r   rn  rM  rv  r:   r   )r   r)   r:   r   )r   r)   r9  rt  r:   r   r   )&r   r   r   r    r!   r9   enumEnumr'   r;   r0   rV   ra   rb   r>   r   r^   r   r   classmethodr   r   r   r  r*  r0  r3  r6  rE  r  rQ  rU  r`   r\  r_  r   rj  r$   r   r%   r&   r   r   !   su   C 1 1, !%+/<'<' <' 	<' <' )<' <' 
<'|4# 4#l"1
l !%'+	;; ; %	;
 
;( !%	)) ) 	)
 
).. +/55 (5 	5
 
5nA A.6AGOA	A8 
 +/!#   (	
      
 @ $(%); ; #; 	;
 
;| BJ( (.1(;>(	(0 01V$V),V<?V	%Vr $%-- -03-	-, $% 03	%, 01-$-),-<?-	-. '++$+$ $+$ 
	+$Z"$ "$7:"$LO"$	"$H(%T-
^	
	04838=F8	8&Hr%   r   )"
__future__r   rW   r   ry  r   rG   r   
contextlibr   ior   typingr   r   r   r	   r
   r   r   r   r   langchain_core.embeddingsr   langchain_core.vectorstoresr   %langchain_community.docstore.documentr   psycopg2.extensionsr   PgConnectionr   PgCursorr   r   r%   r&   <module>r     sS    "  
     % 
 
 
 1 3 :>6lH+ lHr%   