
    dhN                    j   S SK Jr  S SKrS SKrS SKJr  S SKJr  S SKJ	r	J
r
JrJrJrJrJrJr  S SKrS SKJr  S SKJr  S SKJr  S S	KJrJr  S S
KJr  S SKJr  \R@                  " 5       r!Sr"SS jr#SS jr$SS jr%\\&\\&\'\(\)4   4   r* " S S\5      r+ " S S\5      r, " S S\5      r-SS jr.SS jr/g)    )annotationsN)sha1)Thread)AnyDictIterableListMappingOptionalTupleUnion)Document)
Embeddings)VectorStore)BaseSettingsSettingsConfigDict)	TypedDict)maximal_marginal_relevanceFc                $    U H
  nX ;  d  M
    g   g)z
Check if a string has multiple substrings.
Args:
    s: The string to check
    *args: The substrings to check for in the string

Returns:
    bool: True if all substrings are present in the string, False otherwise
FT )sargsas      b/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_community/vectorstores/starrocks.pyhas_mul_sub_strr      s     :      c                2    [         (       a  [        U 5        gg)zK
Print a debug message if DEBUG is True.
Args:
    s: The message to print
N)DEBUGprint)r   s    r   debug_outputr    &   s     ua r   c                ,   U R                  5       nUR                  U5        UR                  n/ nUR                  5        H5  n0 n[	        U5       H  u  pxX7   S   n	XU	'   M     UR                  U5        M7     [        U5        UR                  5         U$ )z
Get a named result from a query.
Args:
    connection: The connection to the database
    query: The query to execute

Returns:
    List[dict[str, Any]]: The result of the query
r   )cursorexecutedescriptionfetchall	enumerateappendr    close)

connectionqueryr"   columnsresultvalueridxdatumks
             r   get_named_resultr2   0   s      F
NN5  GF"#E*JCQAaD + 	a # 
LLNMr   c                  H    \ rS rSr% S\S'   S\S'   S\S'   S\S	'   S
\S'   Srg)QueryResultL   zList[List[str]]idsz	List[Any]
embeddingsList[Document]	documentszOptional[List[Metadata]]	metadataszOptional[List[float]]	distancesr   N)__name__
__module____qualname____firstlineno____annotations____static_attributes__r   r   r   r4   r4   L   s     	''$$r   r4   c                      \ rS rSr% SrSrS\S'   SrS\S'   S	rS\S
'   Sr	S\S'   SSSSS.r
S\S'   SrS\S'   SrS\S'   S S jr\" SSSSS9rSrg)!StarRocksSettingsT   a  StarRocks client configuration.

Attribute:
    StarRocks_host (str) : An URL to connect to MyScale backend.
                         Defaults to 'localhost'.
    StarRocks_port (int) : URL port to connect with HTTP. Defaults to 8443.
    username (str) : Username to login. Defaults to None.
    password (str) : Password to login. Defaults to None.
    database (str) : Database name to find the table. Defaults to 'default'.
    table (str) : Table name to operate on.
                  Defaults to 'vector_table'.

    column_map (Dict) : Column type map to project column name onto langchain
                        semantics. Must have keys: `text`, `id`, `vector`,
                        must be same size to number of columns. For example:
                        .. code-block:: python

                            {
                                'id': 'text_id',
                                'embedding': 'text_embedding',
                                'document': 'text_plain',
                                'metadata': 'metadata_dictionary_in_json',
                            }

                        Defaults to identity map.
	localhoststrhostiF#  intportrootusername passwordiddocument	embeddingmetadata)rN   rO   rP   rQ   zDict[str, str]
column_mapdefaultdatabase	langchaintablec                    [        X5      $ N)getattr)selfitems     r   __getitem__StarRocksSettings.__getitem__   s    t""r   z.envutf-8
starrocks_ignore)env_fileenv_file_encoding
env_prefixextrar   N)r[   rF   returnr   )r<   r=   r>   r?   __doc__rG   r@   rI   rK   rM   rR   rT   rV   r\   r   model_configrA   r   r   r   rC   rC   T   s    6 D#D#HcHc  	"J  HcE3# &!	Lr   rC   c                    ^  \ rS rSrSr S       SU 4S jjjrSS jr\SS j5       rSS jr	SS jr
   S           SS jjr\    S               SS	 jj5       rSS
 jr S       SS jjr S          S!S jjr  S          S"S jjr S          S#S jjrS$S jr\SS j5       r   S%           S&S jjr     S'               S(S jjrSrU =r$ ))	StarRocks   a  `StarRocks` vector store.

You need a `pymysql` python package, and a valid account
to connect to StarRocks.

Right now StarRocks has only implemented `cosine_similarity` function to
compute distance between two vectors. And there is no vector inside right now,
so we have to iterate all vectors and compute spatial distance.

For more information, please visit
    [StarRocks official site](https://www.starrocks.io/)
    [StarRocks github](https://github.com/StarRocks/starrocks)
c                  >  SSK n SSKJn  XPl        [        TU ]  5         Ub  X l        O[        5       U l        U R                  (       d   eU R                  R                  (       a  U R                  R                  (       d   eU R                  R                  (       a6  U R                  R                  (       a  U R                  R                  (       d   eS H  nX`R                  R                  ;   a  M   e   [        UR                  S5      5      nSU R                  R                   S	U R                  R                   S
U R                  R                  S    SU R                  R                  S    SU R                  R                  S    SU R                  R                  S    S3U l        Xpl        SU l        SU l        Xl        SU l        [+        U R                  5        UR,                  " SU R                  R                  U R                  R                  U R                  R.                  U R                  R0                  U R                  R                  S.UD6U l        [+        U R                  5        [5        U R2                  U R                  5        g! [         a    [        S5      ef = f! [         a    S U l         GNf = f)zStarRocks Wrapper to LangChain

embedding_function (Embeddings):
config (StarRocksSettings): Configuration to StarRocks Client
r   NzVCould not import pymysql python package. Please install it with `pip install pymysql`.)tqdmc                    U $ rX   r   )xkwargss     r   <lambda>$StarRocks.__init__.<locals>.<lambda>   s    Qr   )rN   rP   rO   rQ   testzCREATE TABLE IF NOT EXISTS .z
(    
    rN   z string,
    rO   rP   z array<float>,
    rQ   zf string
) ENGINE = OLAP PRIMARY KEY(id) DISTRIBUTED BY HASH(id)   PROPERTIES ("replication_num" = "1")\)rt   'DESC)rG   rI   userrM   rT   r   )pymysqlImportErrorrl   pgbarsuper__init__configrC   rG   rI   rR   rT   rV   lenembed_queryschemadimBSmust_escapeembedding_function
dist_orderr    connectrK   rM   r)   r2   )	rZ   rP   r}   ro   rx   rl   r1   r   	__class__s	           r   r|   StarRocks.__init__   sQ   		/!J 	 K+-DK{{{{{DKK$4$444{{%%$++*>*>4;;CTCTTT<A..... = )''/0 KK0014;;3D3D2E F	[[D!" #	[[J'( )	[[K() *	[[J'( ) &"+ T[[! "// 
!!!!%%[[))[[))
 
 	T[[!$++6k  	@ 	  	/.DJ	/s   J" J; "J8;KKc                :   ^  SR                  U 4S jU 5       5      $ )NrL   c              3  d   >#    U H&  oTR                   ;   a  TR                   U 3OUv   M(     g 7frX   )r   r   ).0crZ   s     r   	<genexpr>'StarRocks.escape_str.<locals>.<genexpr>   s/     VPU1t/?/?*?$''1#QFPUs   -0)join)rZ   r-   s   ` r   
escape_strStarRocks.escape_str   s    wwVPUVVVr   c                    U R                   $ rX   )r   rZ   s    r   r7   StarRocks.embeddings   s    &&&r   c                   SR                  U5      n[        U5      R                  U R                  R                  S   5      n/ nU Hu  nSR                  [        U5       VVs/ sH6  u  pxXt:w  a  SU R                  [        U5      5       S3OS[        U5       3PM8     snn5      nUR                  SU S35        Mw     SU R                  R                   SU R                  R                   SU S	SR                  U5       S
3	n	U	$ s  snnf )N,rP   ru   zarray<float>()z1
                INSERT INTO
                    rs   z))
                VALUES
                z
                )r   tupleindexr}   rR   r&   r   rF   r'   rT   rV   )
rZ   transaccolumn_namesksembed_tuple_index_datanr/   _ni_strs
             r   _build_insert_sqlStarRocks._build_insert_sql   s   XXl#!,/55KK"";/
 A &/q\ &2	 3 DOOCG45Q7+CG956 &2	A LL1QCq" [[))*!DKK,=,=+>at D%! "	 !s   #<Dc                h    U R                  X5      n[        U5        [        U R                  U5        g rX   )r   r    r2   r)   )rZ   r   r   _insert_querys       r   _insertStarRocks._insert   s(    ..wE]#-8r   c           
        U=(       d8    U Vs/ sH+  n[        UR                  S5      5      R                  5       PM-     snnU R                  R                  n/ nUS   UUS   UUS   U R
                  R                  [        U5      5      0n	U=(       d    U V
s/ sH  n
0 PM     sn
n[        [        R                  U5      XS   '   [        [        U5      [        U	5      -
  5      S:  d   e[        U	R                  5       6 u  p SnU R                  [        U6 S[        U5      S	9 H  n[        XR!                  U R                  R                  S   5         5      U R"                  :X  d   eUR%                  U5        [        U5      U:X  d  Mh  U(       a  UR'                  5         [)        U R*                  X/S
9nUR-                  5         / nM     [        U5      S:  a(  U(       a  UR'                  5         U R+                  X5        U Vs/ sH  oPM     sn$ s  snf s  sn
f s  snf ! [.         a:  n[0        R3                  S[5        U5       S[7        U5       S35        / s SnA$ SnAff = f)ab  Insert more texts through the embeddings and add to the VectorStore.

Args:
    texts: Iterable of strings to add to the VectorStore.
    ids: Optional list of ids to associate with the texts.
    batch_size: Batch size of insertion
    metadata: Optional column data to be inserted

Returns:
    List of ids from adding the texts into the VectorStore.

r^   rN   rO   rP   rQ   r   NzInserting data...)desctotal)targetr   	[91m[1m
[0m [95m[0m)r   encode	hexdigestr}   rR   r   embed_documentslistmapjsondumpsr~   setzipitemsrz   r   r   r'   r   r   r   start	ExceptionloggererrortyperF   )rZ   textsr:   
batch_sizer6   ro   tcolmap_r   r   _keysvaluesvies                   r   	add_textsStarRocks.add_texts  s   * I5I5ad188G,-7795I++((DM3JK $"9"9"I"I$u+"V

 4e!4e"e!4	,/

I,FZ()3w<#l"334999L..01	AZZV#6c)n    **T[[%;%;K%HIJKtxxWWq!w<:-dll'IAGGI G 7|aFFHW+"#s!As##? J "5. $ 	LL?47)3CCF87STI	sC   1HHBH) A=H) 
H$H) $H) )
I-3/I("I-(I-c                8    U " X$40 UD6nUR                  XXcS9  U$ )ac  Create StarRocks wrapper with existing texts

Args:
    embedding_function (Embeddings): Function to extract text embedding
    texts (Iterable[str]): List or tuple of strings to be added
    config (StarRocksSettings, Optional): StarRocks configuration
    text_ids (Optional[Iterable], optional): IDs for the texts.
                                             Defaults to None.
    batch_size (int, optional): Batchsize when transmitting data to StarRocks.
                                Defaults to 32.
    metadata (List[dict], optional): metadata to texts. Defaults to None.
Returns:
    StarRocks Index
)r6   r   r:   )r   )	clsr   rP   r:   r}   text_idsr   ro   ctxs	            r   
from_textsStarRocks.from_texts<  s(    2 ).v.ejV
r   c                   SU R                   R                   SU R                   R                   S3nXR                   R                   SU R                   R                   S3-  nUSU R                   R
                   S3-  nSnS	nUS
X#-  S-   -  S-   -  n/ SQnUSUS   S SUS   S 3-  nUSUS   S S3-  nUS
X#-  S-   -  S-   -  nSU R                   R                   SU R                   R                   3n[        U5        [        U R                  U5      nU H#  nUSUS   S SUS   S 3-  nUSUS   S S3-  nM%     US
X#-  S-   -  S-   -  nU$ )zText representation for StarRocks Vector Store, prints backends, username
    and schemas. Easy to use with `str(StarRocks())`

Returns:
    repr: string to show connection info and data schema
z	[92m[1mrs   z @ :z[0m

z[1musername: z[0m

Table Schema:
      -   
)namer   keyz|[94mr   24sz
[0m|[96m   z[0m|
zDESC FieldTypeKey)	r}   rT   rV   rG   rI   rK   r    r2   r)   )rZ   _reprwidthfieldsr+   q_strrsr.   s           r   __repr__StarRocks.__repr__Y  s    "$++"6"6!7q9J9J8K3OKK$$%Qt{{'7'7&8DD$T[[%9%9$::TUU*+d22)9WQZ,,<WQZ<LMM#GAJs#3:>>*+d22,,-Qt{{/@/@.ABUdoou5Ay7C 00@63PPE'%~Z@@E  	*+d22r   c                   SR                  [        [        U5      5      nU(       a  SU 3nOSnSU R                  R                  S    SU R                  R                  S    SU S	U R                  R                  S
    SU R                  R                  S
    SU R                  R
                   SU R                  R                   SU SU R                   SU S3n[        U5        U$ )Nr   zWHERE rL   z?
            SELECT 
                id as id,
                rO   z as document, 
                rQ   zC as metadata, 
                cosine_similarity_norm(array<float>[z],
                rP   z) as dist,
                z as embedding
            FROM rs   z
            z
            ORDER BY dist z
            LIMIT )	r   r   rF   r}   rR   rT   rV   r   r    )rZ   q_embtopk	where_str	q_emb_strr   s         r   _build_query_sqlStarRocks._build_query_sqls  s    HHSe_-	 ,II ''
34 5''
34 555>K @''45 6''45 6++&&'q):):(; <K ??+ ,&  	Ur   c                \    U R                   " U R                  R                  U5      X#40 UD6$ )aK  Perform a similarity search with StarRocks

Args:
    query (str): query string
    k (int, optional): Top K neighbors to retrieve. Defaults to 4.
    where_str (Optional[str], optional): where condition string.
                                         Defaults to None.

    NOTE: Please do not let end-user to fill this and always be aware
          of SQL injection. When dealing with metadatas, remember to
          use `{self.metadata_column}.attribute` instead of `attribute`
          alone. The default name for it is `metadata`.

Returns:
    List[Document]: List of Documents
)similarity_search_by_vectorr   r   )rZ   r*   r1   r   ro   s        r   similarity_searchStarRocks.similarity_search  s5    & //##//6
HN
 	
r   c                   U R                  XU5      n [        U R                  U5      nU Vs/ sHS  n[        XpR                  R
                  S      [        R                  " XpR                  R
                  S      5      S9PMU     sn$ s  snf ! [         a:  n[        R                  S[        U5       S[        U5       S35        / s SnA$ SnAff = f)ac  Perform a similarity search with StarRocks by vectors

Args:
    query (str): query string
    k (int, optional): Top K neighbors to retrieve. Defaults to 4.
    where_str (Optional[str], optional): where condition string.
                                         Defaults to None.

    NOTE: Please do not let end-user to fill this and always be aware
          of SQL injection. When dealing with metadatas, remember to
          use `{self.metadata_column}.attribute` instead of `attribute`
          alone. The default name for it is `metadata`.

Returns:
    List[Document]: List of (Document, similarity)
rO   rQ   page_contentrQ   r   r   r   N)r   r2   r)   r   r}   rR   r   loadsr   r   r   r   rF   )	rZ   rP   r1   r   ro   r   q_rr.   r   s	            r   r   %StarRocks.similarity_search_by_vector  s    . %%iI>	"4??E:C 
 A	 !";;#9#9*#E!F!ZZ++*@*@*L(MN     	LL?47)3CCF87STI	s/   B AB
B 
B 
C/CCCc                   U R                  U R                  R                  U5      X#5      n [        U R                  U5       Vs/ sHX  n[        X`R                  R                  S      [        R                  " X`R                  R                  S      5      S9US   4PMZ     sn$ s  snf ! [         a:  n[        R                  S[        U5       S[        U5       S35        / s SnA$ SnAff = f)	aK  Perform a similarity search with StarRocks

Args:
    query (str): query string
    k (int, optional): Top K neighbors to retrieve. Defaults to 4.
    where_str (Optional[str], optional): where condition string.
                                         Defaults to None.

    NOTE: Please do not let end-user to fill this and always be aware
          of SQL injection. When dealing with metadatas, remember to
          use `{self.metadata_column}.attribute` instead of `attribute`
          alone. The default name for it is `metadata`.

Returns:
    List[Document]: List of documents
rO   rQ   r   distr   r   r   N)r   r   r   r2   r)   r   r}   rR   r   r   r   r   r   r   rF   )rZ   r*   r1   r   ro   r   r.   r   s           r   'similarity_search_with_relevance_scores1StarRocks.similarity_search_with_relevance_scores  s    & %%##//6
	 *$//5A	 BA %&{{'='=j'I%J!%Akk.D.DZ.P,Q!R fI B	 	 	  	LL?47)3CCF87STI	s0   B+ AB&#B+ &B+ +
C/5/C*$C/*C/c                    [        U R                  SU R                  R                   SU R                  R                   35        g)z
Helper function: Drop data
zDROP TABLE IF EXISTS rs   N)r2   r)   r}   rT   rV   r   s    r   dropStarRocks.drop  s9     	OO#DKK$8$8#94;;;L;L:MN	
r   c                4    U R                   R                  S   $ )NrQ   )r}   rR   r   s    r   metadata_columnStarRocks.metadata_column  s    {{%%j11r   c                   U R                  XS 5      n[        U R                  U5      n[        U Vs/ sH  oS   PM	     snU Vs/ sH2  n[        R
                  " XR                  R                  S      5      PM4     snU Vs/ sH  oU R                  R                  S      PM      snU Vs/ sH2  n[        R
                  " XR                  R                  S      5      PM4     snU Vs/ sH  oS   PM	     snS9n	[        [        R                  " U[        R                  S9U	S   UUS	9n
[        U	5      n[        U5       VVs/ sH  u  pX;   d  M  UPM     nnnU$ s  snf s  snf s  snf s  snf s  snf s  snnf )
NrN   rP   rO   rQ   r   )r6   r7   r9   r:   r;   )dtyper7   )r1   lambda_mult)r   r2   r)   r4   r   r   r}   rR   r   nparrayfloat32_results_to_docsr&   )rZ   rP   r1   fetch_kr  ro   r   r   r.   resultsmmr_selected
candidatesr   selected_resultss                 r   'max_marginal_relevance_search_by_vector1StarRocks.max_marginal_relevance_search_by_vector  s^    %%i$?t6"%&#Q4#&LOLOq

1[[33K@ABC GJJc//
;<cJRUVRUQtzz!KK$:$::$F"GHRUV*-.#Qy#.
 2HHYbjj1L!#	
 &g.
*3J*?U*?$!1CTA*?U' ' KV. Vs)   E8E$
$E)-8E.+E3
E8E8c           	         U R                   c  [        S5      eU R                   R                  U5      nU R                  UUUUUUS9$ )NzBFor MMR search, you must specify an embedding function oncreation.)r  filterwhere_document)r7   
ValueErrorr   r  )	rZ   r*   r1   r  r  r  r  ro   rP   s	            r   max_marginal_relevance_search'StarRocks.max_marginal_relevance_search  s^     ??"T  OO//6	;;#) < 
 	
r   )	r   r}   r)   r   r   r   r   rz   r   rX   )rP   r   r}   Optional[StarRocksSettings]ro   r   re   None)r-   rF   re   rF   )re   r   )r   r   r   Iterable[str]re   rF   )r   r   r   r  re   r  )N    N)r   r  r:   zOptional[List[dict]]r   rH   r6   Optional[Iterable[str]]ro   r   re   	List[str])NNNr  )r   r  rP   r   r:   zOptional[List[Dict[Any, Any]]]r}   r  r   r  r   rH   ro   r   re   ri   )re   rF   )r   List[float]r   rH   r   Optional[str]re   rF   )   N)
r*   rF   r1   rH   r   r  ro   r   re   r8   )
rP   r  r1   rH   r   r  ro   r   re   r8   )
r*   rF   r1   rH   r   r  ro   r   re   List[Tuple[Document, float]])re   r  )r           ?)rP   zlist[float]r1   rH   r  rH   r  floatro   r   re   zlist[Document])   r  r   NN)r*   rF   r1   rH   r  rH   r  r!  r  Optional[Dict[str, str]]r  r#  ro   r   re   r8   )r<   r=   r>   r?   rf   r|   r   propertyr7   r   r   r   classmethodr   r   r   r   r   r   r   r   r  r  rA   __classcell__)r   s   @r   ri   ri      s   " /3B7B7 ,B7 	B7
 
B7 B7HW ' '49 +/'+77 (7 	7
 %7 7 
7r 
 59.2,0  2	
 , *   
 86 IM (+8E	6 BF

 
1>
QT
	
4 #'	## # !	#
 # 
#L BF## #1>#QT#	%#J
 2 2       	 
     
 F  +/37

 
 	

 
 )
 1
 
 

 
r   ri   c                L    [        U 5       VVs/ sH  u  pUPM	     snn$ s  snnf rX   )_results_to_docs_and_scores)r	  docr   s      r   r  r  5  s#    9'BCBFCCBCCCs    c                    [        U S   U S   U S   5       Vs/ sH!  n[        US   US   =(       d    0 S9US   4PM#     sn$ s  snf )Nr9   r:   r;   r   r   r   r   )r   r   )r	  r,   s     r   r(  r(  9  sf     K K K 

F 
vay6!9?	CVAYO
  s   'A)r   rF   r   r   re   bool)r   r   re   r  )r)   r   r*   rF   re   zList[dict[str, Any]])r	  r   re   r8   )r	  r   re   r  )0
__future__r   r   logginghashlibr   	threadingr   typingr   r   r   r	   r
   r   r   r   numpyr  langchain_core.documentsr   langchain_core.embeddingsr   langchain_core.vectorstoresr   pydantic_settingsr   r   typing_extensionsr   &langchain_community.vectorstores.utilsr   	getLoggerr   r   r   r    r2   rF   rH   r!  r+  Metadatar4   rC   ri   r  r(  r   r   r   <module>r:     s    "     M M M  - 0 3 > ' M				 2 3c3t3445%) %3 3lh
 h
VDr   