
    dhQ                        S SK Jr  S SKrS SKrS SKrS SKJr  S SKJrJ	r	J
r
JrJrJrJr  S SKrS SKJr  S SKJr  S SKJr  S SKJr  S S	KJrJr  S S
KJr  S SKJr  SS jr SS jr! " S S\5      r"g)    )annotationsN)Path)AnyCallableDictIterableListOptionalTuple)Document)
Embeddingsguard_import)VectorStore)AddableMixinDocstore)InMemoryDocstore)DistanceStrategyc           	     v    U [         R                  " [         R                  R                  U SSS9SS5      -  n U $ )z!Normalize vectors to unit length.T)axiskeepdimsg-q=N)npcliplinalgnorm)xs    ^/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_community/vectorstores/scann.py	normalizer      s1    T:E4	HHAH    c                     [        S5      $ )z5
Import `scann` if available, otherwise raise error.
scannr    r    r   dependable_scann_importr$      s       r    c                     \ rS rSrSrSS\R                  S4               SS jjr  S           SS jjr  S         SS jjr	  S         SS jjr
SSS	 jjr   S           SS
 jjr   S           S S jjr   S           S!S jjr   S           S"S jjr\   S#               S$S jj5       r\  S           S%S jj5       r\  S           S&S jj5       rS'S(S jjr\ S'SS.           S)S jjj5       rS*S jr   S           S S jjrSrg)+ScaNN!   a  `ScaNN` vector store.

To use, you should have the ``scann`` python package installed.

Example:
    .. code-block:: python

        from langchain_community.embeddings import HuggingFaceEmbeddings
        from langchain_community.vectorstores import ScaNN

        model_name = "sentence-transformers/all-mpnet-base-v2"
        db = ScaNN.from_texts(
            ['foo', 'bar', 'barz', 'qux'],
            HuggingFaceEmbeddings(model_name=model_name))
        db.similarity_search('foo?', k=1)
NFc	                d    Xl         X l        X0l        X@l        Xpl        XPl        X`l        Xl        g)z%Initialize with necessary components.N)	embeddingindexdocstoreindex_to_docstore_iddistance_strategyoverride_relevance_score_fn_normalize_L2_scann_config)	selfr)   r*   r+   r,   relevance_score_fnnormalize_L2r-   scann_configs	            r   __init__ScaNN.__init__3   s1     #
 $8!!2+=())r    c                    [        U R                  [        5      (       d  [        SU R                   S35      e[	        S5      e)NSIf trying to add texts, the underlying docstore should support adding items, which 	 does notz(Updates are not available in ScaNN, yet.)
isinstancer+   r   
ValueErrorNotImplementedError)r1   texts
embeddings	metadatasidskwargss         r   __addScaNN.__addH   sF     $--66''+}}oY@  ""LMMr    c                t    U R                   R                  [        U5      5      nU R                  " X4X#S.UD6$ )a4  Run more texts through the embeddings and add to the vectorstore.

Args:
    texts: Iterable of strings to add to the vectorstore.
    metadatas: Optional list of metadatas associated with the texts.
    ids: Optional list of unique IDs.

Returns:
    List of ids from adding the texts into the vectorstore.
r?   r@   )r)   embed_documentslist_ScaNN__add)r1   r=   r?   r@   rA   r>   s         r   	add_textsScaNN.add_textsW   s5    $ ^^33DK@
zz%TyTVTTr    c                    [        U R                  [        5      (       d  [        SU R                   S35      e[	        U6 u  pVU R
                  " XV4X#S.UD6$ )aY  Run more texts through the embeddings and add to the vectorstore.

Args:
    text_embeddings: Iterable pairs of string and embedding to
        add to the vectorstore.
    metadatas: Optional list of metadatas associated with the texts.
    ids: Optional list of unique IDs.

Returns:
    List of ids from adding the texts into the vectorstore.
r8   r9   rE   )r:   r+   r   r;   ziprH   )r1   text_embeddingsr?   r@   rA   r=   r>   s          r   add_embeddingsScaNN.add_embeddingsl   s`    $ $--66''+}}oY@ 
  1zz%TyTVTTr    c                    [        S5      e)zDelete by vector ID or other criteria.

Args:
    ids: List of ids to delete.
    **kwargs: Other keyword arguments that subclasses might use.

Returns:
    Optional[bool]: True if deletion is successful,
    False otherwise, None if not implemented.
z*Deletions are not available in ScaNN, yet.)r<   )r1   r@   rA   s      r   deleteScaNN.delete   s     ""NOOr    c           
       ^ [         R                  " U/[         R                  S9nU R                  (       a  [	        U5      nU R
                  R                  Xcc  UOU5      u  px/ n	[        US   5       H  u  pUS:X  a  M  U R                  U   nU R                  R                  U5      m[        T[        5      (       d  [        SU ST 35      eUb  UR                  5        VVs0 sH   u  pU[        U[        5      (       d  U/OU_M"     nnn[!        U4S jUR                  5        5       5      (       a  U	R#                  TUS   U
   45        M  M  U	R#                  TUS   U
   45        M     UR%                  S5      nUbv  U R&                  [(        R*                  [(        R,                  4;   a  [.        R0                  O[.        R2                  nU	 VVs/ sH  u  nnU" UU5      (       d  M  UU4PM     n	nnU	SU $ s  snnf s  snnf )	a  Return docs most similar to query.

Args:
    embedding: Embedding vector to look up documents similar to.
    k: Number of Documents to return. Defaults to 4.
    filter (Optional[Dict[str, Any]]): Filter by metadata. Defaults to None.
    fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
              Defaults to 20.
    **kwargs: kwargs to be passed to similarity search. Can include:
        score_threshold: Optional, a floating point value between 0 to 1 to
            filter the resulting set of retrieved docs

Returns:
    List of documents most similar to the query text and L2 distance
    in float for each. Lower score represents more similarity.
dtypeNr   r   zCould not find document for id z, got c              3  b   >#    U H%  u  pTR                   R                  U5      U;   v   M'     g 7fN)metadataget).0keyvaluedocs      r   	<genexpr>?ScaNN.similarity_search_with_score_by_vector.<locals>.<genexpr>   s'     W*#s||'',5s   ,/score_threshold)r   arrayfloat32r/   r   r*   search_batched	enumerater,   r+   searchr:   r   r;   itemsrG   allappendrY   r-   r   MAX_INNER_PRODUCTJACCARDoperatorgele)r1   r)   kfilterfetch_krA   vectorindicesscoresdocsji_idr[   r\   r`   cmpr]   
similaritys                    ` r   &similarity_search_with_score_by_vector,ScaNN.similarity_search_with_score_by_vector   s   0 9+RZZ8v&F**33AW
 gaj)DABw++A.C--&&s+Cc8,, #B3%vcU!STT! '-lln&4
 
5$(?(?%UJ&4   WWWWKKfQil 34 X S&)A,/0! *$ !**%67& ))$668H8P8PQR  [[	  (,'+OCz?3 "j!'+  
 BQx-"s   '&HH1Hc                j    U R                   R                  U5      nU R                  " UU4UUS.UD6nU$ )a  Return docs most similar to query.

Args:
    query: Text to look up documents similar to.
    k: Number of Documents to return. Defaults to 4.
    filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
    fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
              Defaults to 20.

Returns:
    List of documents most similar to the query text with
    L2 distance in float. Lower score represents more similarity.
ro   rp   )r)   embed_queryrz   )r1   queryrn   ro   rp   rA   r)   rt   s           r   similarity_search_with_score"ScaNN.similarity_search_with_score   sL    * NN..u5	::
 	

 
 r    c                h    U R                   " UU4UUS.UD6nU VVs/ sH  u  pxUPM	     snn$ s  snnf )a  Return docs most similar to embedding vector.

Args:
    embedding: Embedding to look up documents similar to.
    k: Number of Documents to return. Defaults to 4.
    filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
    fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
              Defaults to 20.

Returns:
    List of Documents most similar to the embedding.
r}   )rz   )	r1   r)   rn   ro   rp   rA   docs_and_scoresr]   _s	            r   similarity_search_by_vector!ScaNN.similarity_search_by_vector   sM    ( EE
 	

 
 #22//222s   .c                d    U R                   " X4X4S.UD6nU VVs/ sH  u  pxUPM	     snn$ s  snnf )a  Return docs most similar to query.

Args:
    query: Text to look up documents similar to.
    k: Number of Documents to return. Defaults to 4.
    filter: (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
    fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
              Defaults to 20.

Returns:
    List of Documents most similar to the query.
r}   )r   )	r1   r   rn   ro   rp   rA   r   r]   r   s	            r   similarity_searchScaNN.similarity_search  sC    ( ;;
#
8>
 #22//222s   ,c                (   [        S5      nUR                  S[        R                  5      n	UR                  SS 5      n
[        R
                  " U[        R                  S9nU(       a  [        U5      nU
b  UR                  R                  X5      nOU	[        R                  :X  a:  UR                  R                  USS5      R                  5       R                  5       nO9UR                  R                  USS5      R                  5       R                  5       n/ nUc-  U Vs/ sH!  n[        [        R                   " 5       5      PM#     nn[#        U5       H,  u  nnU(       a  XO   O0 nUR%                  ['        UUS95        M.     [)        [#        U5      5      n[+        U5      [+        U5      :w  a#  [-        [+        U5       S	[+        U5       S
35      e[/        [)        [1        UR3                  5       U5      5      5      nU " UUUU4SU0UD6$ s  snf )Nr"   r-   r4   rT      dot_product
squared_l2)page_contentrX   z ids provided for z, documents. Each document should have an id.r3   )r   rY   r   EUCLIDEAN_DISTANCEr   ra   rb   r   scann_ops_pybindcreate_searcherri   builderscore_brute_forcebuildstruuiduuid4rd   rh   r   dictlen	Exceptionr   rL   values)clsr=   r>   r)   r?   r@   r3   rA   r"   r-   r4   rq   r*   	documentsr   rv   textrX   index_to_idr+   s                       r   __fromScaNN.__from+  s    W%"JJ!1!D!D
 zz.$7*BJJ7v&F#**::6PE $4$F$FF**2261mL&&(UW  **2261lK&&(UW 
 	;.34e3tzz|$eC4 'GAt'0y|bHX4(KL ( 9S>*{s9~-{#$$6s9~6F G4 4 
 $D[-?-?-A9)M$NO	

 &
 
 	
 5s   "'Hc                T    UR                  U5      nU R                  " UUU4UUS.UD6$ )a  Construct ScaNN wrapper from raw documents.

This is a user friendly interface that:
    1. Embeds documents.
    2. Creates an in memory docstore
    3. Initializes the ScaNN database

This is intended to be a quick way to get started.

Example:
    .. code-block:: python

        from langchain_community.vectorstores import ScaNN
        from langchain_community.embeddings import OpenAIEmbeddings
        embeddings = OpenAIEmbeddings()
        scann = ScaNN.from_texts(texts, embeddings)
rE   )rF   _ScaNN__from)r   r=   r)   r?   r@   rA   r>   s          r   
from_textsScaNN.from_textsg  sE    4 ..u5
zz
  
 
 	
r    c                    U Vs/ sH  ofS   PM	     nnU Vs/ sH  ofS   PM	     nnU R                   " UUU4UUS.UD6$ s  snf s  snf )a  Construct ScaNN wrapper from raw documents.

This is a user friendly interface that:
    1. Embeds documents.
    2. Creates an in memory docstore
    3. Initializes the ScaNN database

This is intended to be a quick way to get started.

Example:
    .. code-block:: python

        from langchain_community.vectorstores import ScaNN
        from langchain_community.embeddings import OpenAIEmbeddings
        embeddings = OpenAIEmbeddings()
        text_embeddings = embeddings.embed_documents(texts)
        text_embedding_pairs = list(zip(texts, text_embeddings))
        scann = ScaNN.from_embeddings(text_embedding_pairs, embeddings)
r   r   rE   )r   )	r   rM   r)   r?   r@   rA   tr=   r>   s	            r   from_embeddingsScaNN.from_embeddings  si    8  //!1/$34OqdO
4zz
  
 
 	
 04s	   ?Ac                l   [        U5      nUSR                  US9-  nUR                  SSS9  U R                  R	                  [        U5      5        [        USR                  US9-  S5       n[        R                  " U R                  U R                  4U5        SSS5        g! , (       d  f       g= f)zSave ScaNN index, docstore, and index_to_docstore_id to disk.

Args:
    folder_path: folder path to save index, docstore,
        and index_to_docstore_id to.
{index_name}.scann
index_nameTexist_okparents{index_name}.pklwbN)r   formatmkdirr*   	serializer   openpickledumpr+   r,   )r1   folder_pathr   path
scann_pathfs         r   
save_localScaNN.save_local  s     K 077:7NN
$5 	

S_- $+22j2II4PTUKK(A(ABAF QPPs   ..B%%
B3)allow_dangerous_deserializationc                  U(       d  [        S5      e[        U5      nUSR                  US9-  nUR                  SSS9  [	        S5      nUR
                  R                  [        U5      5      n	[        USR                  US9-  S5       n
[        R                  " U
5      u  nnS	S	S	5        U " X)WW40 UD6$ ! , (       d  f       N= f)
a=  Load ScaNN index, docstore, and index_to_docstore_id from disk.

Args:
    folder_path: folder path to load index, docstore,
        and index_to_docstore_id from.
    embedding: Embeddings to use when generating queries
    index_name: for saving with a specific index file name
    allow_dangerous_deserialization: whether to allow deserialization
        of the data which involves loading a pickle file.
        Pickle files can be modified by malicious actors to deliver a
        malicious payload that results in execution of
        arbitrary code on your machine.
aB  The de-serialization relies loading a pickle file. Pickle files can be modified to deliver a malicious payload that results in execution of arbitrary code on your machine.You will need to set `allow_dangerous_deserialization` to `True` to enable deserialization. If you do this, make sure that you trust the source of the data. For example, if you are loading a file that you created, and know that no one else has modified the file, then this is safe to do. Do not set this to `True` if you are loading a file from an untrusted source (e.g., some random site on the internet.).r   r   Tr   r"   r   rbN)r;   r   r   r   r   r   load_searcherr   r   r   load)r   r   r)   r   r   rA   r   r   r"   r*   r   r+   r,   s                r   
load_localScaNN.load_local  s    . /	"  K 077:7NN
$5W%&&44S_E $+22j2II4PTU $ Q 9X/CNvNN QPs   B88
Cc                    U R                   b  U R                   $ U R                  [        R                  :X  a  U R                  $ U R                  [        R
                  :X  a  U R                  $ [        S5      e)a   
The 'correct' relevance function
may differ depending on a few things, including:
- the distance / similarity metric used by the VectorStore
- the scale of your embeddings (OpenAI's are unit normed. Many others are not!)
- embedding dimensionality
- etc.
zJUnknown distance strategy, must be cosine, max_inner_product, or euclidean)r.   r-   r   ri   %_max_inner_product_relevance_score_fnr   _euclidean_relevance_score_fnr;   )r1   s    r   _select_relevance_score_fn ScaNN._select_relevance_score_fn  sr     ++7333 !!%5%G%GG===##'7'J'JJ555  r    c                $   UR                  SS5      nU R                  5       nUc  [        S5      eU R                  " U4UUUS.UD6nU V	V
s/ sH  u  pX" U
5      4PM     nn	n
Ub  U V	Vs/ sH  u  pX:  d  M  X4PM     nn	nU$ s  sn
n	f s  snn	f )z?Return docs and their similarity scores on a scale from 0 to 1.r`   NzLnormalize_score_fn must be provided to ScaNN constructor to normalize scores)rn   ro   rp   )popr   r;   r   )r1   r   rn   ro   rp   rA   r`   r2   r   r]   scoredocs_and_rel_scoresry   s                r   (_similarity_search_with_relevance_scores.ScaNN._similarity_search_with_relevance_scores  s     !**%6=!<<>%9  ;;
	

 
 @O
?NS$U+, 	 
 & (;#':OC0 "!':   #
 #"
#s   B,
B:B)r/   r0   r-   r+   r)   r*   r,   r.   )r)   r   r*   r   r+   r   r,   zDict[int, str]r2   z"Optional[Callable[[float], float]]r3   boolr-   r   r4   zOptional[str])NN)r=   Iterable[str]r>   zIterable[List[float]]r?   Optional[List[dict]]r@   Optional[List[str]]rA   r   return	List[str])
r=   r   r?   r   r@   r   rA   r   r   r   )
rM   z!Iterable[Tuple[str, List[float]]]r?   r   r@   r   rA   r   r   r   rW   )r@   r   rA   r   r   zOptional[bool])   N   )r)   List[float]rn   intro   Optional[Dict[str, Any]]rp   r   rA   r   r   List[Tuple[Document, float]])r   r   rn   r   ro   r   rp   r   rA   r   r   r   )r)   r   rn   r   ro   r   rp   r   rA   r   r   List[Document])r   r   rn   r   ro   r   rp   r   rA   r   r   r   )NNF)r=   r   r>   zList[List[float]]r)   r   r?   r   r@   r   r3   r   rA   r   r   r&   )r=   r   r)   r   r?   r   r@   r   rA   r   r   r&   )rM   zList[Tuple[str, List[float]]]r)   r   r?   r   r@   r   rA   r   r   r&   )r*   )r   r   r   r   r   None)r   r   r)   r   r   r   r   r   rA   r   r   r&   )r   zCallable[[float], float])__name__
__module____qualname____firstlineno____doc__r   r   r5   rH   rI   rN   rQ   rz   r   r   r   classmethodr   r   r   r   r   r   r   __static_attributes__r#   r    r   r&   r&   !   sR   . BF".>.Q.Q&*** * 	*
 -* ?* * ,* $*2 +/#'NN *N (	N
 !N N 
N$ +/#'	UU (U !	U
 U 
U0 +/#'	U:U (U !	U
 U 
U8P" +/>> > )	>
 > > 
&>F +/  )	
   
&D +/33 3 )	3
 3 3 
3@ +/33 3 )	3
 3 3 
32  +/#'"9
9
 &9
 	9

 (9
 !9
 9
 9
 
9
 9
v 
 +/#'!
!
 !
 (	!

 !!
 !
 
!
 !
F 
 +/#'$
6$
 $
 (	$

 !$
 $
 
$
 $
LG$ 
 "	3O 163O3O 3O 	3O *.3O 3O 
3O 3Oj8 +/"#"# "# )	"#
 "# "# 
&"# "#r    r&   )r   
np.ndarrayr   r   )r   r   )#
__future__r   rk   r   r   pathlibr   typingr   r   r   r   r	   r
   r   numpyr   langchain_core.documentsr   langchain_core.embeddingsr   langchain_core.utilsr   langchain_core.vectorstoresr   !langchain_community.docstore.baser   r   &langchain_community.docstore.in_memoryr   &langchain_community.vectorstores.utilsr   r   r$   r&   r#   r    r   <module>r      sM    "     G G G  - 0 - 3 D C C!T#K T#r    