
    dh                         S SK Jr  S SKJrJrJrJrJr  S SKr	S SK
Jr  S SKJr  S SKJr  S SKJr  S SKJr  S S	KJr  S S
KJr   " S S\\5      r " S S\5      rg)    )Enum)AnyDictListOptionalUnionN)CallbackManagerForRetrieverRun)Document)
Embeddings)BaseRetriever)
get_fields)
ConfigDict)maximal_marginal_relevancec                        \ rS rSrSrSrSrSrg)
SearchType   z-Enumerator of the types of search to perform.
similaritymmr N)__name__
__module____qualname____firstlineno____doc__r   r   __static_attributes__r       _/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_community/retrievers/docarray.pyr   r      s    7J
Cr   r   c            
       t   \ rS rSr% SrSr\\S'   \\S'   \	\S'   \	\S'   \
R                  r\
\S'   S	r\\S
'   Sr\\   \S'   \" SS9rS\	S\S\\   4S jrS\R0                  S
\S\\\\	\4   \4      4S jrS\R0                  S\\   4S jrS\R0                  S\\   4S jrS\\\	\4   \4   S\4S jrSrg)DocArrayRetriever   a  `DocArray Document Indices` retriever.

Currently, it supports 5 backends:
InMemoryExactNNIndex, HnswDocumentIndex, QdrantDocumentIndex,
ElasticDocIndex, and WeaviateDocumentIndex.

Args:
    index: One of the above-mentioned index instances
    embeddings: Embedding model to represent text as vectors
    search_field: Field to consider for searching in the documents.
        Should be an embedding/vector/tensor.
    content_field: Field that represents the main content in your document schema.
        Will be used as a `page_content`. Everything else will go into `metadata`.
    search_type: Type of search to perform (similarity / mmr)
    filters: Filters applied for document retrieval.
    top_k: Number of documents to return
Nindex
embeddingssearch_fieldcontent_fieldsearch_type   top_kfiltersT)arbitrary_types_allowedqueryrun_managerreturnc                V   [         R                  " U R                  R                  U5      5      nU R                  [
        R                  :X  a  U R                  U5      nU$ U R                  [
        R                  :X  a  U R                  U5      nU$ [        SU R                   S35      e)zGet documents relevant for a query.

Args:
    query: string to find relevant documents for

Returns:
    List of relevant documents
zSearch type z5 does not exist. Choose either 'similarity' or 'mmr'.)nparrayr"   embed_queryr%   r   r   _similarity_searchr   _mmr_search
ValueError)selfr*   r+   	query_embresultss        r   _get_relevant_documents)DocArrayRetriever._get_relevant_documents5   s     HHT__88?@	z444--i8G  /&&y1G  t//0 17 8 r   r5   c                 p   SSK JnJn  0 nU R                  n[	        U R
                  U5      (       a  U R                  US'   SnO:[	        U R
                  U5      (       a  U R                  US'   OU R                  US'   U R                  (       a  U R
                  R                  5       R                  XS9R                  " S0 UD6R                  US9nU R
                  R                  U5      n[        US	5      (       a  UR                  nUS
U nU$ U R
                  R                  XUS9R                  nU$ )z
Perform a search using the query embedding and return top_k documents.

Args:
    query_emb: Query represented as an embedding
    top_k: Number of documents to return

Returns:
    A list of top_k documents matching the query
r   )ElasticDocIndexWeaviateDocumentIndexwhere_filter r*   filter_query)r*   r#   )limit	documentsN)r*   r#   r?   r   )docarray.indexr:   r;   r#   
isinstancer!   r(   build_queryfindfilterbuildexecute_queryhasattrr@   )	r4   r5   r'   r:   r;   filter_argsr#   r*   docss	            r   _searchDocArrayRetriever._searchQ   s,    	J((djj"788*.,,K'L

O44#'<<K *.,,K'<<

&&(#   	 ' &	'
 U#  ::++E2Dt[))~~<D
  ::??% # i  r   c                     U R                  XR                  S9nU Vs/ sH  o0R                  U5      PM     nnU$ s  snf )z
Perform a similarity search.

Args:
    query_emb: Query represented as an embedding

Returns:
    A list of documents most similar to the query
r5   r'   )rK   r'   _docarray_to_langchain_doc)r4   r5   rJ   docr6   s        r   r1   $DocArrayRetriever._similarity_search   s@     ||izz|BCGH4C22374H Is   ;c           
      2   U R                  USS9n[        UU Vs/ sH;  n[        U[        5      (       a  X0R                     O[        X0R                  5      PM=     snU R                  S9nU Vs/ sH  oPR                  X%   5      PM     nnU$ s  snf s  snf )z
Perform a maximal marginal relevance (mmr) search.

Args:
    query_emb: Query represented as an embedding

Returns:
    A list of diverse documents related to the query
   rN   )k)rK   r   rB   dictr#   getattrr'   rO   )r4   r5   rJ   rP   mmr_selectedidxr6   s          r   r2   DocArrayRetriever._mmr_search   s     ||ir|:1
  	  C c4(( %%&S"3"345  	 jj	
 JVV#2249=V Ws   AB
0BrP   c                    [        U[        5      (       a  UR                  5       O
[        U5      nU R                  U;  a  [        SU R                   S35      e[        [        U[        5      (       a  XR                     O[        XR                  5      S9nU Hm  n[        U[        5      (       a  X   O
[        X5      n[        U[        [        [        [        45      (       d  MN  X@R                  :w  d  M_  XSR                  U'   Mo     U$ )a  
Convert a DocArray document (which also might be a dict)
to a langchain document format.

DocArray document can contain arbitrary fields, so the mapping is done
in the following way:

page_content <-> content_field
metadata <-> all other fields excluding
    tensors and embeddings (so float, int, string)

Args:
    doc: DocArray document

Returns:
    Document in langchain format

Raises:
    ValueError: If the document doesn't contain the content field
z.Document does not contain the content field - .)page_content)rB   rU   keysr   r$   r3   r
   rV   strintfloatboolmetadata)r4   rP   fieldslc_docnamevalues         r   rO   ,DocArrayRetriever._docarray_to_langchain_doc   s    ,  *#t44*S/V+@ASAS@TTUV  #t$$ //0001
 D!+C!6!6CIGC<NE53UD"9::...(-%  r   r   ) r   r   r   r   r   r!   r   __annotations__r   r^   r   r   r%   r'   r_   r(   r   r   model_configr	   r   r
   r7   r.   ndarrayr   r   rK   r1   r2   rO   r   r   r   r   r   r      s   $ E3(33K3E3N!GXc]! $L 4	
 
h8,,,/,	eDcNC'(	),\BJJ 4> RZZ DN 2*eDcNC4G.H *X *r   r   )enumr   typingr   r   r   r   r   numpyr.   langchain_core.callbacksr	   langchain_core.documentsr
   langchain_core.embeddingsr   langchain_core.retrieversr   langchain_core.utils.pydanticr   pydanticr   &langchain_community.vectorstores.utilsr   r^   r   r   r   r   r   <module>ru      sB     3 3  C - 0 3 4  Md z zr   