
    dh;                        S SK Jr  S SKrS SKJr  S SKJr  S SKJrJ	r	J
r
JrJr  S SKrS SKJr  S SKJr  S SKJr  S S	KJr  S S
KJr  \R2                  " \5      r " S S\5      rg)    )annotationsN)deepcopy)Enum)AnyIterableListOptionalTuple)Document)
Embeddings)run_in_executor)VectorStore)maximal_marginal_relevancec                  v   \ rS rSrSr S           SS jjr\SS j5       r   S           SS jjr\	       S                     SS jj5       r
 " S S	\5      rS
\R                  S4           SS jjrS
\R                  S4           SS jjrS
\R                  S4           S S jjrS
\R                  S4           S!S jjr   S"SS.             S#S jjjr   S$           S%S jjrS&S jrS'S jrS(S)S jjr S(     S)S jjrSrg)*Rockset   aN  `Rockset` vector store.

To use, you should have the `rockset` python package installed. Note that to use
this, the collection being used must already exist in your Rockset instance.
You must also ensure you use a Rockset ingest transformation to apply
`VECTOR_ENFORCE` on the column being used to store `embedding_key` in the
collection.
See: https://rockset.com/blog/introducing-vector-search-on-rockset/ for more details

Everything below assumes `commons` Rockset workspace.

Example:
    .. code-block:: python

        from langchain_community.vectorstores import Rockset
        from langchain_community.embeddings.openai import OpenAIEmbeddings
        import rockset

        # Make sure you use the right host (region) for your Rockset instance
        # and APIKEY has both read-write access to your collection.

        rs = rockset.RocksetClient(host=rockset.Regions.use1a1, api_key="***")
        collection_name = "langchain_demo"
        embeddings = OpenAIEmbeddings()
        vectorstore = Rockset(rs, collection_name, embeddings,
            "description", "description_embedding")

c                2    SSK Jn  [        X5      (       d  [	        S[        U5       35      eXl        X0l        X l        X@l	        XPl
        X`l         U R                  R                  S5        g! [         a    [        S5      ef = f! [         a     gf = f)a  Initialize with Rockset client.
Args:
    client: Rockset client object
    collection: Rockset collection to insert docs / query
    embeddings: Langchain Embeddings object to use to generate
                embedding for given text.
    text_key: column in Rockset collection to use to store the text
    embedding_key: column in Rockset collection to use to store the embedding.
                   Note: We must apply `VECTOR_ENFORCE()` on this column via
                   Rockset ingest transformation.

r   )RocksetClient]Could not import rockset client python package. Please install it with `pip install rockset`.z;client should be an instance of rockset.RocksetClient, got 	langchainN)rocksetr   ImportError
isinstance
ValueErrortype_client_collection_name_embeddings	_text_key_embedding_key
_workspaceset_applicationAttributeError)selfclient
embeddingscollection_nametext_keyembedding_key	workspacer   s           b/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_community/vectorstores/rocksetdb.py__init__Rockset.__init__1   s    *	- &00F|n& 
  /%!+#	LL((5)  	@ 	*  		s   A0 B	 0B	
BBc                    U R                   $ N)r   r$   s    r+   r&   Rockset.embeddingsa   s        Nc                   / n/ n[        U5       H  u  p[        U5      U:X  a  XpR                  U5      -  n/ n0 n
U(       a  [        U5      U:  a  [        X(   5      n
U(       a  [        U5      U:  a  X8   U
S'   XU R                  '   U R
                  R                  U	5      XR                  '   UR                  U
5        M     [        U5      S:  a  XpR                  U5      -  n/ nU$ )a  Run more texts through the embeddings and add to the vectorstore

        Args:
    texts: Iterable of strings to add to the vectorstore.
    metadatas: Optional list of metadatas associated with the texts.
    ids: Optional list of ids to associate with the texts.
    batch_size: Send documents in batches to rockset.

Returns:
    List of ids from adding the texts into the vectorstore.

_idr   )		enumeratelen_write_documents_to_rocksetr   r   r   embed_queryr    append)r$   texts	metadatasids
batch_sizekwargsbatch
stored_idsitextdocs              r+   	add_textsRockset.add_textse   s    ( 
 'GA5zZ'>>uEE
CS^a/y|,s3x!| VE
"&'+'7'7'C'CD'IC##$LL ( u:>::5AAJEr2   c
                    Uc   S5       eU(       d   S5       eU(       d   S5       eU(       d   S5       eU " XBXVU5      nUR                  XX5        U$ )z^Create Rockset wrapper with existing texts.
This is intended as a quicker way to get started.
zRockset Client cannot be NonezCollection name cannot be emptyzText key name cannot be emptyzEmbedding key cannot be empty)rD   )clsr:   	embeddingr;   r%   r'   r(   r)   r<   r=   r>   r   s               r+   
from_textsRockset.from_texts   sc    & !B#BB!A AA888x===}fMR%C<r2   c                  *    \ rS rSrSrSrSrSS jrSrg)	Rockset.DistanceFunction   
COSINE_SIMEUCLIDEAN_DISTDOT_PRODUCTc                &    U R                   S:X  a  gg)NrO   ASCDESC)valuer0   s    r+   order_by!Rockset.DistanceFunction.order_by   s    zz--r2    N)returnstr)	__name__
__module____qualname____firstlineno__rN   rO   rP   rU   __static_attributes__rW   r2   r+   DistanceFunctionrL      s    !
)#	r2   r_      c                `    U R                   " U R                  R                  U5      UUU40 UD6$ )a{  Perform a similarity search with Rockset

Args:
    query (str): Text to look up documents similar to.
    distance_func (DistanceFunction): how to compute distance between two
        vectors in Rockset.
    k (int, optional): Top K neighbors to retrieve. Defaults to 4.
    where_str (Optional[str], optional): Metadata filters supplied as a
        SQL `where` condition string. Defaults to None.
        eg. "price<=70.0 AND brand='Nintendo'"

    NOTE: Please do not let end-user to fill this and always be aware
          of SQL injection.

Returns:
    List[Tuple[Document, float]]: List of documents with their relevance score
)1similarity_search_by_vector_with_relevance_scoresr   r8   r$   querykdistance_func	where_strr>   s         r+   'similarity_search_with_relevance_scores/Rockset.similarity_search_with_relevance_scores   s>    2 EE((/	

 
 	
r2   c                `    U R                   " U R                  R                  U5      UUU40 UD6$ )zQSame as `similarity_search_with_relevance_scores` but
doesn't return the scores.
)similarity_search_by_vectorr   r8   rc   s         r+   similarity_searchRockset.similarity_search   s>     //((/	

 
 	
r2   c                b    U R                   " XX440 UD6nU VVs/ sH  u  pxUPM	     snn$ s  snnf )zRAccepts a query_embedding (vector), and returns documents with
similar embeddings.)rb   )	r$   rH   re   rf   rg   r>   docs_and_scoresrC   _s	            r+   rk   #Rockset.similarity_search_by_vector   s=     PP-
6<
 #22//222s   +c           	     *   SnSU;   a  US   nU R                  XX$U5      n U R                  R                  R                  SU0S9n/ n
UR                   GH
  n0 n[        U[        5      (       d   SR                  [        U5      5      5       eUR                  5        H  u  p-X R                  :X  aC  [        U[        5      (       d*   SR                  U R                  [        U5      5      5       eUnMW  US	:X  a8  [        U[        5      (       d   S
R                  [        U5      5      5       eUnM  US;  d  M  XU'   M     U
R!                  [#        WUS9W45        GM     U
$ ! [         a"  n	[
        R                  SU	5        / s Sn	A	$ Sn	A	ff = f)ztAccepts a query_embedding (vector), and returns documents with
similar embeddings along with their relevance scores.Texclude_embeddingsrd   )sqlz$Exception when querying Rockset: %s
Nz;document should be of type `dict[str,Any]`. But found: `{}`zIpage content stored in column `{}` must be of type `str`. But found: `{}`distzDComputed distance between vectors must of type `float`. But found {})r4   _event_time_meta)page_contentmetadata)_build_query_sqlr   Queriesrd   	Exceptionloggererrorresultsr   dictformatr   itemsr   rY   floatr9   r   )r$   rH   re   rf   rg   r>   rs   q_strquery_responseefinalResultdocumentry   vrx   scores                   r+   rb   9Rockset.similarity_search_by_vector_with_relevance_scores   s    "6)!'(<!=%%a4F
	!\\1177We<L7MN 57&..HHh-- MTTN-
 !(&%a-- 6*fT^^T!W56- $%L&[%a// &'fT!Wo&/ E== #$QK! )" ,J1 /< E  	LL@!DI	s   %E& &
F0FFF)rg   c               t   U R                   R                  U5      nU R                  " U4UUSS.UD6nU V	s/ sH  oR                  U R                     PM     n
n	[        [        R                  " U5      U
UUS9nU H  nX   R                  U R                  	 M     U Vs/ sH  oU   PM	     sn$ s  sn	f s  snf )a  Return docs selected using the maximal marginal relevance.

Maximal marginal relevance optimizes for similarity to query AND diversity
among selected documents.

Args:
    query: Text to look up documents similar to.
    k: Number of Documents to return. Defaults to 4.
    fetch_k: Number of Documents to fetch to pass to MMR algorithm.
    distance_func (DistanceFunction): how to compute distance between two
        vectors in Rockset.
    lambda_mult: Number between 0 and 1 that determines the degree
                of diversity among the results with 0 corresponding
                to maximum diversity and 1 to minimum diversity.
                Defaults to 0.5.
    where_str: where clause for the sql query
Returns:
    List of Documents selected by maximal marginal relevance.
F)re   rg   rs   )lambda_multre   )r   r8   rk   ry   r    r   nparray)r$   rd   re   fetch_kr   rg   r>   query_embeddinginitial_docsrC   r&   selected_indicesrA   s                r+   max_marginal_relevance_search%Rockset.max_marginal_relevance_search0  s    : **66u=77
$	

 
 DPP<Cll4#6#67<
P5HH_%#	
 "A(()<)<= " *::)9AQ)9:: Q ;s   !B0 B5c                T   SR                  [        [        U5      5      nUR                   SU R                   SU S3nU(       a  SU S3OSnU(       a  SU R                   S	3OSnS
U SU SU R
                   SU R                   SU SUR                  5        S[        U5       S3$ )zABuilds Rockset SQL query to query similar vectors to query_vector,(z, [z
]) as distzWHERE 
 z EXCEPT(z),zSELECT * z
FROM .zORDER BY dist z
LIMIT )joinmaprY   rT   r    r!   r   rU   )	r$   r   rf   re   rg   rs   q_embedding_strdistance_strselect_embeddings	            r+   rz   Rockset._build_query_sqlh  s     ((3sO#<=)//0$2E2E1F G* .7fYKr*R	2Dht**+2.# 			!L> *
ooa--. /
 %%'( )
1vh  	r2   c                    U R                   R                  R                  U R                  XR                  S9nUR
                   Vs/ sH  o3R                  PM     sn$ s  snf )N
collectiondatar*   )r   	Documentsadd_documentsr   r!   r   r4   )r$   r?   add_doc_res
doc_statuss       r+   r7   #Rockset._write_documents_to_rockset  sT    ll,,::,,5OO ; 
 2=1A1AB1A:1ABBBs   Ac           
          SSK Jn  U R                  R                  R                  U R                  U Vs/ sH  o2" US9PM
     snU R                  S9  g! [         a    [        S5      ef = fs  snf )z1Delete a list of docs from the Rockset collectionr   )DeleteDocumentsRequestDatar   )idr   N)rockset.modelsr   r   r   r   delete_documentsr   r!   )r$   r<   r   rA   s       r+   delete_textsRockset.delete_texts  sz    	A 	//,,<?@Cq,2C@oo 	0 	
  	@ 	 As   A A.
A+c                     Uc  / nU R                  U5        g! [         a   n[        R                  SU5         S nAgS nAff = f)Nz.Exception when deleting docs from Rockset: %s
FT)r   r|   r}   r~   )r$   r<   r>   r   s       r+   deleteRockset.delete  sH    	{c"
 	  	LLJAN	s    
A>Ac                N   #    [        S U R                  U40 UD6I S h  vN $  N7fr/   )r   r   )r$   r<   r>   s      r+   adeleteRockset.adelete  s%      %T4;;FvFFFFs   %#%)r   r   r    r   r   r!   )commons)r%   r   r&   r   r'   rY   r(   rY   r)   rY   r*   rY   )rX   r   )NN    )r:   zIterable[str]r;   Optional[List[dict]]r<   Optional[List[str]]r=   intr>   r   rX   	List[str])NNr   r   r   Nr   )r:   r   rH   r   r;   r   r%   r   r'   rY   r(   rY   r)   rY   r<   r   r=   r   r>   r   rX   r   )rd   rY   re   r   rf   r_   rg   Optional[str]r>   r   rX   List[Tuple[Document, float]])rd   rY   re   r   rf   r_   rg   r   r>   r   rX   List[Document])rH   List[float]re   r   rf   r_   rg   r   r>   r   rX   r   )rH   r   re   r   rf   r_   rg   r   r>   r   rX   r   )r`      g      ?)rd   rY   re   r   r   r   r   r   rg   r   r>   r   rX   r   )r`   NT)r   r   rf   r_   re   r   rg   r   rs   boolrX   rY   )r?   z
List[dict]rX   r   )r<   r   rX   Noner/   )r<   r   r>   r   rX   zOptional[bool])rZ   r[   r\   r]   __doc__r,   propertyr&   rD   classmethodrI   r   r_   rN   rh   rl   rk   rb   r   rz   r7   r   r   r   r^   rW   r2   r+   r   r      sv   H #.. . 	.
 . . .`     +/#'&& (& !	&
 & & 
&P 
 +/!#'  (	
     !   
 8	4 	 *:*E*E#'

 
 (	

 !
 
 
&
H *:*E*E#'

 
 (	

 !
 
 

, *:*E*E#'33 3 (	3
 !3 3 
3& *:*E*E#'55 5 (	5
 !5 5 
&5t  4; $(4;4; 4; 	4;
 4; !4; 4; 
4;x #'#'$ ( 	
 ! ! 
2C
 	 *.G&G9<G	G Gr2   r   )
__future__r   loggingcopyr   enumr   typingr   r   r   r	   r
   numpyr   langchain_core.documentsr   langchain_core.embeddingsr   langchain_core.runnablesr   langchain_core.vectorstoresr   &langchain_community.vectorstores.utilsr   	getLoggerrZ   r}   r   rW   r2   r+   <module>r      sI    "    7 7  - 0 4 3 M			8	$RGk RGr2   