
    dhY                        S SK Jr  S SKrS SKrS SKJr  S SKJr  S SKJ	r	J
r
JrJrJrJr  S SKJr  S SKJr  S SKJr  S S	KJrJr  \R0                  " 5       rSS
 jr " S S\5      r " S S\5      r " S S\5      rg)    )annotationsN)sha1)Thread)AnyDictIterableListOptionalTuple)Document)
Embeddings)VectorStore)BaseSettingsSettingsConfigDictc                $    U H
  nX ;  d  M
    g   g)z
Check if a string contains multiple substrings.
Args:
    s: string to check.
    *args: substrings to check.

Returns:
    True if all substrings are in the string, False otherwise.
FT )sargsas      `/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_community/vectorstores/myscale.pyhas_mul_sub_strr      s     :      c                      \ rS rSr% SrSrS\S'   SrS\S'   S	rS
\S'   S	r	S
\S'   Sr
S\S'   S	rS\S'   SSSSS.rS\S'   SrS\S'   SrS\S'   SrS\S'   S%S jr\" SS S!S"S#9rS$rg	)&MyScaleSettings!   aJ  MyScale client configuration.

Attribute:
    myscale_host (str) : An URL to connect to MyScale backend.
                         Defaults to 'localhost'.
    myscale_port (int) : URL port to connect with HTTP. Defaults to 8443.
    username (str) : Username to login. Defaults to None.
    password (str) : Password to login. Defaults to None.
    index_type (str): index type string.
    index_param (dict): index build parameter.
    database (str) : Database name to find the table. Defaults to 'default'.
    table (str) : Table name to operate on.
                  Defaults to 'vector_table'.
    metric (str) : Metric to compute distance,
                   supported are ('L2', 'Cosine', 'IP'). Defaults to 'Cosine'.
    column_map (Dict) : Column type map to project column name onto langchain
                        semantics. Must have keys: `text`, `id`, `vector`,
                        must be same size to number of columns. For example:
                        .. code-block:: python

                            {
                                'id': 'text_id',
                                'vector': 'text_embedding',
                                'text': 'text_plain',
                                'metadata': 'metadata_dictionary_in_json',
                            }

                        Defaults to identity map.

	localhoststrhosti   intportNOptional[str]usernamepasswordMSTG
index_typezOptional[Dict[str, str]]index_paramidtextvectormetadata)r'   r(   r)   r*   zDict[str, str]
column_mapdefaultdatabase	langchaintableCosinemetricc                    [        X5      $ N)getattr)selfitems     r   __getitem__MyScaleSettings.__getitem__U   s    t""r   z.envutf-8myscale_ignore)env_fileenv_file_encoding
env_prefixextrar   )r6   r   returnr   )__name__
__module____qualname____firstlineno____doc__r   __annotations__r    r"   r#   r%   r&   r+   r-   r/   r1   r7   r   model_config__static_attributes__r   r   r   r   r   !   s    > D#D#"Hm""Hm"J,0K)0 	"J  HcE3FC# &!	Lr   r   c                    ^  \ rS rSrSr S       SU 4S jjjr\SS j5       rSS jrSS jr	SS jr
   S           SS jjr\    S               SS	 jj5       rSS
 jr S       SS jjr S         S S jjr  S         S!S jjr S         S"S jjrS#S jr  S$       S%S jjr\SS j5       rSrU =r$ )&MyScale`   aq  `MyScale` vector store.

You need a `clickhouse-connect` python package, and a valid account
to connect to MyScale.

MyScale can not only search with simple vector indexes.
It also supports a complex query with multiple conditions,
constraints and even sub-queries.

For more information, please visit
    [myscale official site](https://docs.myscale.com/en/overview/)
c                $	  >  SSK Jn   SSKJn  XPl        [
        TU ]  5         Ub  X l        O[        5       U l        U R                  (       d   eU R                  R                  (       a  U R                  R                  (       d   eU R                  R                  (       aQ  U R                  R                  (       a6  U R                  R                  (       a  U R                  R                  (       d   eS H  nX`R                  R                  ;   a  M   e   U R                  R                  R                  5       S;   d   eU R                  R                  S	;   a  [         R#                  S
5        [%        UR'                  S5      5      nU R                  R(                  (       aQ  SSR+                  U R                  R(                  R-                  5        VVs/ sH  u  phSU SU S3PM     snn5      -   OSn	SU R                  R                   SU R                  R                   SU R                  R                  S    SU R                  R                  S    SU R                  R                  S    SU R                  R                  S    SU R                  R                  S    SU SU R                  R                  S    SU R                  R.                   SU R                  R                   SU	 SU R                  R                  S    S 3n
Xpl        S!U l        S"U l        Xl        U R                  R                  R                  5       S#;   a  S$OS%U l        U" S+U R                  R                  U R                  R                  U R                  R:                  U R                  R<                  S&.UD6U l         U R>                  RA                  S'5        U R>                  RA                  S*5        U R>                  RA                  U
5        g! [         a    [        S5      ef = f! [         a    S U l         GN.f = fs  snnf ! [B         a7  n[         RE                  S(U R>                  RF                   S)35         SnANSnAff = f),zMyScale Wrapper to LangChain

embedding (Embeddings):
config (MyScaleSettings): Configuration to MyScale Client
Other keyword arguments will pass into
    [clickhouse-connect](https://docs.myscale.com/)
r   )
get_clientzlCould not import clickhouse connect python package. Please install it with `pip install clickhouse-connect`.)tqdmc                    U $ r3   r   )xs    r   <lambda>"MyScale.__init__.<locals>.<lambda>   s    1r   N)r'   r)   r(   r*   )IPCOSINEL2)ipcosinel2z_Lower case metric types will be deprecated the future. Please use one of ('IP', 'Cosine', 'L2')ztry this out, ,'= z(
            CREATE TABLE IF NOT EXISTS .z(
                r'   z String,
                r(   r)   z! Array(Float32),
                r*   zP JSON,
                CONSTRAINT cons_vec_len CHECK length(                    z) = z$,
                VECTOR INDEX vidx z                     TYPE z&(                        'metric_type=z,)
            ) ENGINE = MergeTree ORDER BY z	
        \)r_   r[   )rT   rU   ASCDESC)r   r    r"   r#   z"SET allow_experimental_json_type=1zClickhouse version=z6 - There is no allow_experimental_json_type parameter.z$SET allow_experimental_object_type=1r   )$clickhouse_connectrM   ImportErrorrN   pgbarsuper__init__configr   r   r    r+   r-   r/   r1   upperloggerwarninglenembed_queryr&   joinitemsr%   dimBSmust_escape_embeddings
dist_orderr"   r#   clientcommand	Exceptiondebugserver_version)r5   	embeddingrg   kwargsrM   rN   kro   vindex_paramsschema__	__class__s               r   rf   MyScale.__init__n   s   	5	%!J 	 K)+DK{{{{{DKK$4$444KK""$$!!""		
#
 6A..... 6{{!!'')-CCCC;;!77NNG )''78 {{&& 388dkk6M6M6S6S6UV6Udaq1QCq\6UVWW 	
((,(<(<'=Qt{{?P?P>Q R''-. /''/0 1''12 3''
34 5[[++H56d3% @##';;#9#9(#C"D E++001 2&&*kk&8&8%9<. I++/;;+A+A$+G*H I	 &$[[''--/3CCE 	
 ! 
!!!![[))[[))	

 
	KK DE 	BCG$_  	K 	  	%$DJ	%: WF  	LL%dkk&@&@%A BF F 	s:   P P/ QQ P,/QQ
R-R

Rc                    U R                   $ r3   )rr   r5   s    r   
embeddingsMyScale.embeddings   s    r   c                :   ^  SR                  U 4S jU 5       5      $ )Nr]   c              3  d   >#    U H&  oTR                   ;   a  TR                   U 3OUv   M(     g 7fr3   )rq   rp   ).0cr5   s     r   	<genexpr>%MyScale.escape_str.<locals>.<genexpr>   s/     VPU1t/?/?*?$''1#QFPUs   -0)rm   )r5   values   ` r   
escape_strMyScale.escape_str   s    wwVPUVVVr   c                x   SR                  U5      n/ nU HU  nSR                  U Vs/ sH!  nSU R                  [        U5      5       S3PM#     sn5      nUR                  SU S35        MW     SU R                  R
                   SU R                  R                   SU SSR                  U5       S3	nU$ s  snf )	NrZ   r[   ()z8
                INSERT INTO TABLE 
                    r^   z))
                VALUES
                z
                )rm   r   r   appendrg   r-   r/   )r5   transaccolumn_namesks_datan_ni_strs           r   _build_istrMyScale._build_istr   s    XXl#AAFAbAdooc"g67q9AFGALL1QCq" [[))*!DKK,=,=+>at D%! "	  Gs   'B7
c                \    U R                  X5      nU R                  R                  U5        g r3   )r   rt   ru   )r5   r   r   _i_strs       r   _insertMyScale._insert   s$    !!'8F#r   c           	        U=(       d8    U Vs/ sH+  n[        UR                  S5      5      R                  5       PM-     snnU R                  R                  n/ nUS   UUS   UUS   [        U R                  R                  U5      0n	U=(       d    U V
s/ sH  n
0 PM     sn
n[        [        R                  U5      XS   '   [        [        U5      [        U	5      -
  5      S:  d   e[        U	R                  5       6 u  p SnU R                  [        U6 S[        U5      S	9 H  n[        XR                  U R                  R                  S   5         5      U R                   :X  d   eUR#                  U5        [        U5      U:X  d  Mh  U(       a  UR%                  5         ['        U R(                  X/S
9nUR+                  5         / nM     [        U5      S:  a(  U(       a  UR%                  5         U R)                  X5        U Vs/ sH  oPM     sn$ s  snf s  sn
f s  snf ! [,         a:  n[.        R1                  S[3        U5       S[5        U5       S35        / s SnA$ SnAff = f)a_  Run more texts through the embeddings and add to the vectorstore.

Args:
    texts: Iterable of strings to add to the vectorstore.
    ids: Optional list of ids to associate with the texts.
    batch_size: Batch size of insertion
    metadata: Optional column data to be inserted

Returns:
    List of ids from adding the texts into the vectorstore.

r9   r'   r(   r)   r*   r   NzInserting data...)desctotal)targetr   	[91m[1m
[0m [95m[0m)r   encode	hexdigestrg   r+   maprr   rl   jsondumpsrk   setziprn   rd   indexro   r   rm   r   r   startrv   ri   errortyper   )r5   texts	metadatas
batch_sizeidsrz   tcolmap_r   r   r   keysvaluesr|   ies                   r   	add_textsMyScale.add_texts   s   * I5I5ad188G,-7795I++((DM3FOUHs4#3#3#?#?G

 4e!4e"e!4	,/

I,FZ()3w<#l"334999L..01	AZZV#6c)n    1ZZ(>(>x(HIJKtxxWWWq!w<:-dll'IAGGI G 7|aFFHW+"#s!As##= J "5* $ 	LL?47)3CCF87STI	sC   1HHBH% A=H% 	
H H%  H% %
I)//I$I)$I)c                8    U " X$40 UD6nUR                  XXcS9  U$ )a  Create Myscale wrapper with existing texts

Args:
    texts (Iterable[str]): List or tuple of strings to be added
    embedding (Embeddings): Function to extract text embedding
    config (MyScaleSettings, Optional): Myscale configuration
    text_ids (Optional[Iterable], optional): IDs for the texts.
                                             Defaults to None.
    batch_size (int, optional): Batchsize when transmitting data to MyScale.
                                Defaults to 32.
    metadata (List[dict], optional): metadata to texts. Defaults to None.
    Other keyword arguments will pass into
        [clickhouse-connect](https://clickhouse.com/docs/en/integrations/python#clickhouse-connect-driver-api)
Returns:
    MyScale Index
)r   r   r   )r   )	clsr   ry   r   rg   text_idsr   rz   ctxs	            r   
from_textsMyScale.from_texts  s(    6 ).v.ejV
r   c                   SU R                   R                   SU R                   R                   S3nXR                   R                   SU R                   R                   S3-  nUSU R                   R
                   S3-  nUS-  nU R                  R                  S	U R                   R                   SU R                   R                   35      R                  5        H  nUS
US   S SUS   S S3-  nM     US-  nU$ )zText representation for myscale, prints backends, username and schemas.
    Easy to use with `str(Myscale())`

Returns:
    repr: string to show connection info and data schema
z	[92m[1mr^   z @ :z[0m

z[1musername: z[0m

Table Schema:
z4---------------------------------------------------
zDESC z|[94mname24sz
[0m|[96mr   z[0m|
)	rg   r-   r/   r   r    r"   rt   querynamed_results)r5   _reprrs      r   __repr__MyScale.__repr__>  s
    "$++"6"6!7q9J9J8K3OKK$$%Qt{{'7'7&8DD$T[[%9%9$::TUU ""DKK(()4;;+<+<*=>

-/A AfIc?*:1V9S/TE 	 r   c                   SR                  [        [        U5      5      nU(       a  SU 3nOSnSU R                  R                  S    SU R                  R                  S    SU R                  R
                   S	U R                  R                   S
U SU R                  R                  S    SU SU R                   SU S
3nU$ )NrZ   	PREWHERE r]   
            SELECT r(   z, 
                r*   z, dist
            FROM r^   
            
            ORDER BY distance(r)   , []) 
                AS dist 
            LIMIT )rm   r   r   rg   r+   r-   r/   rs   r5   q_embtopk	where_str	q_emb_strq_strs         r   _build_qstrMyScale._build_qstrR  s     HHSe_-	#I;/IIKK**623 4''
34 5++&&'q):):(; <K #{{55h?@I; O) *&  r   c                \    U R                   " U R                  R                  U5      X#40 UD6$ )aI  Perform a similarity search with MyScale

Args:
    query (str): query string
    k (int, optional): Top K neighbors to retrieve. Defaults to 4.
    where_str (Optional[str], optional): where condition string.
                                         Defaults to None.

    NOTE: Please do not let end-user to fill this and always be aware
          of SQL injection. When dealing with metadatas, remember to
          use `{self.metadata_column}.attribute` instead of `attribute`
          alone. The default name for it is `metadata`.

Returns:
    List[Document]: List of Documents
)similarity_search_by_vectorrr   rl   )r5   r   r{   r   rz   s        r   similarity_searchMyScale.similarity_searchf  s5    & //((/
AG
 	
r   c           	        U R                  XU5      n U R                  R                  U5      R                  5        Vs/ sH?  n[	        X`R
                  R                  S      X`R
                  R                  S      S9PMA     sn$ s  snf ! [         a:  n[        R                  S[        U5       S[        U5       S35        / s SnA$ SnAff = f)a  Perform a similarity search with MyScale by vectors

Args:
    query (str): query string
    k (int, optional): Top K neighbors to retrieve. Defaults to 4.
    where_str (Optional[str], optional): where condition string.
                                         Defaults to None.

    NOTE: Please do not let end-user to fill this and always be aware
          of SQL injection. When dealing with metadatas, remember to
          use `{self.metadata_column}.attribute` instead of `attribute`
          alone. The default name for it is `metadata`.

Returns:
    List[Document]: List of (Document, similarity)
r(   r*   page_contentr*   r   r   r   N)r   rt   r   r   r   rg   r+   rv   ri   r   r   r   r5   ry   r{   r   rz   r   r   r   s           r   r   #MyScale.similarity_search_by_vector}  s    .   y9
	 **51??A
 BA	 !";;#9#9&#A!B{{55jAB B    	LL?47)3CCF87STI	s/   +B ABB B 
C/CCCc           	        U R                  U R                  R                  U5      X#5      n U R                  R	                  U5      R                  5        Vs/ sHD  n[        X`R                  R                  S      X`R                  R                  S      S9US   4PMF     sn$ s  snf ! [         a:  n[        R                  S[        U5       S[        U5       S35        / s SnA$ SnAff = f)	  Perform a similarity search with MyScale

Args:
    query (str): query string
    k (int, optional): Top K neighbors to retrieve. Defaults to 4.
    where_str (Optional[str], optional): where condition string.
                                         Defaults to None.

    NOTE: Please do not let end-user to fill this and always be aware
          of SQL injection. When dealing with metadatas, remember to
          use `{self.metadata_column}.attribute` instead of `attribute`
          alone. The default name for it is `metadata`.

Returns:
    List[Document]: List of documents most similar to the query text
    and cosine distance in float for each.
    Lower score represents more similarity.
r(   r*   r   distr   r   r   N)r   rr   rl   rt   r   r   r   rg   r+   rv   ri   r   r   r   r5   r   r{   r   rz   r   r   r   s           r   'similarity_search_with_relevance_scores/MyScale.similarity_search_with_relevance_scores  s    *   !1!1!=!=e!DaS	 **51??A	 BA %&{{'='=f'E%F!";;#9#9*#E!F fI B	 	 	  	LL?47)3CCF87STI	s0   +B* A
B%"B* %B* *
C.4/C)#C.)C.c                    U R                   R                  SU R                  R                   SU R                  R                   35        g)z
Helper function: Drop data
zDROP TABLE IF EXISTS r^   N)rt   ru   rg   r-   r/   r   s    r   dropMyScale.drop  s<     	#DKK$8$8#94;;;L;L:MN	
r   c                z   Uc
  Uc   S5       e/ nU(       ab  [        U5      S:  aS  SR                  U Vs/ sH	  nSU S3PM     sn5      nUR                  U R                  R                  S    SU S35        U(       a  UR                  U5        [        U5      S:  d   eS	R                  U5      nS
U R                  R
                   SU R                  R                   SU 3n U R                  R                  U5        gs  snf ! [         a(  n	[        R                  [        U	5      5         Sn	A	gSn	A	ff = f)zDelete by vector ID or other criteria.

Args:
    ids: List of ids to delete.
    **kwargs: Other keyword arguments that subclasses might use.

Returns:
    Optional[bool]: True if deletion is successful,
    False otherwise, None if not implemented.
NzIYou need to specify where to be deleted! Either with `ids` or `where_str`r   rY   r[   r'   z IN (r   z AND zDELETE FROM r^   z WHERE TF)rk   rm   r   rg   r+   r-   r/   rt   ru   rv   ri   r   r   )
r5   r   r   rz   condsr'   id_listwhere_str_finalqstrr   s
             r   deleteMyScale.delete  s*     KI$5 	
W	
6 3s8a<iiS 9Sr1RDS 9:GLLDKK22489wiqIJLL#5zA~~!,,u-4;;//0$++2C2C1D E$%' 		KK% !:  	LLQ 	s   D'D 
D:D55D:c                4    U R                   R                  S   $ )Nr*   )rg   r+   r   s    r   metadata_columnMyScale.metadata_column  s    {{%%j11r   )rp   rr   rt   rg   ro   rs   rq   rd   r3   )ry   r   rg   Optional[MyScaleSettings]rz   r   r@   None)r@   r   )r   r   r@   r   )r   r   r   Iterable[str]r@   r   )r   r   r   r   r@   r   )N    N)r   r   r   zOptional[List[dict]]r   r   r   Optional[Iterable[str]]rz   r   r@   	List[str])NNNr   )r   r   ry   r   r   zOptional[List[Dict[Any, Any]]]rg   r   r   r   r   r   rz   r   r@   rJ   r@   r   r   List[float]r   r   r   r!   r@   r      N)
r   r   r{   r   r   r!   rz   r   r@   List[Document]
ry   r  r{   r   r   r!   rz   r   r@   r  
r   r   r{   r   r   r!   rz   r   r@   zList[Tuple[Document, float]])r@   r   )NN)r   zOptional[List[str]]r   r!   rz   r   r@   zOptional[bool])rA   rB   rC   rD   rE   rf   propertyr   r   r   r   r   classmethodr   r   r   r   r   r   r   r   r   rH   __classcell__r   s   @r   rJ   rJ   `   sH     -1^%^% *^% 	^%
 
^% ^%@    W$ +/'+66 (6 	6
 %6 6 
6p 
 59,0,0  2	
 * *   
 <* IM (+8E	* BF

 
1>
QT
	
4 #'	"" " !	"
 " 
"J BF## #1>#QT#	%#J
 $(#'$ $ !$ 	$
 
$L 2 2r   rJ   c                     ^  \ rS rSrSrS/ 4         S
U 4S jjjr S       SS jjr  S         SS jjr S         SS jjr\	SS j5       r
S	rU =r$ )MyScaleWithoutJSONi  zkMyScale vector store without metadata column

This is super handy if you are working to a SQL-native table
Nc                4   > [         TU ]  " X40 UD6  X0l        g)a7  Building a myscale vector store without metadata column

embedding (Embeddings): embedding model
config (MyScaleSettings): Configuration to MyScale Client
must_have_cols (List[str]): column names to be included in query
Other keyword arguments will pass into
    [clickhouse-connect](https://docs.myscale.com/)
N)re   rf   must_have_cols)r5   ry   rg   r  rz   r   s        r   rf   MyScaleWithoutJSON.__init__  s     	5f5)7r   c                   SR                  [        [        U5      5      nU(       a  SU 3nOSnSU R                  R                  S    SSR                  U R
                  5       SU R                  R                   SU R                  R                   S	U S
U R                  R                  S    SU SU R                   SU S	3nU$ )NrZ   r   r]   r   r(   z, dist, 
                z
            FROM r^   r   r   r)   r   r   r   )	rm   r   r   rg   r+   r  r-   r/   rs   r   s         r   r   MyScaleWithoutJSON._build_qstr  s     HHSe_-	#I;/IIKK**623 4$--./ 0++&&'q):):(; <K #{{55h?@I; O) *&  r   c                   U R                  XU5      n U R                  R                  U5      R                  5        VVs/ sHB  n[	        X`R
                  R                  S      U R                   Vs0 sH  o"Xb   _M	     snS9PMD     snn$ s  snf s  snnf ! [         a:  n[        R                  S[        U5       S[        U5       S35        / s SnA$ SnAff = f)r   r(   r   r   r   r   N)r   rt   r   r   r   rg   r+   r  rv   ri   r   r   r   r   s           r   r   .MyScaleWithoutJSON.similarity_search_by_vector%  s    .   y9
	 **51??A
 BA	 !";;#9#9&#A!B/3/B/BC/B!g/BC B  D  	LL?47)3CCF87STI	sA   ,B  2B2B?	BB BB 
C!/CCCc                   U R                  U R                  R                  U5      X#5      n U R                  R	                  U5      R                  5        VVs/ sHG  n[        X`R                  R                  S      U R                   Vs0 sH  o"Xb   _M	     snS9US   4PMI     snn$ s  snf s  snnf ! [         a:  n[        R                  S[        U5       S[        U5       S35        / s SnA$ SnAff = f)r   r(   r   r   r   r   r   N)r   rr   rl   rt   r   r   r   rg   r+   r  rv   ri   r   r   r   r   s           r   r   :MyScaleWithoutJSON.similarity_search_with_relevance_scoresI  s    *   !1!1!=!=e!DaS	 **51??A	 BA %&{{'='=f'E%F373F3F!G3FaQT'3F!G fI B	 	 "H		  	LL?47)3CCF87STI	sA   ,B5 2B/B*B/&B5 *B//B5 5
C9?/C4.C94C9c                    g)Nr]   r   r   s    r   r   "MyScaleWithoutJSON.metadata_columnn  s    r   )r  )
ry   r   rg   r   r  r   rz   r   r@   r   r3   r  r  r  r  r  )rA   rB   rC   rD   rE   rf   r   r   r   r	  r   rH   r  r  s   @r   r  r    s     -1$&	88 *8 "	8
 8 
8 8& IM (+8E	. #'	"" " !	"
 " 
"J BF## #1>#QT#	%#J  r   r  )r   r   r   r   r@   bool)
__future__r   r   logginghashlibr   	threadingr   typingr   r   r   r	   r
   r   langchain_core.documentsr   langchain_core.embeddingsr   langchain_core.vectorstoresr   pydantic_settingsr   r   	getLoggerri   r   r   rJ   r  r   r   r   <module>r%     sc    "     = = - 0 3 >				 <l <~V2k V2rw wr   