
    dh                        S SK Jr  S SKJrJrJrJrJrJr  S SK	J
r
  S SKJr  S SKJr  S SKJrJr  SS jr " S S	\5      rg
)    )annotations)AnyCallableDictIterableListOptional)CallbackManagerForRetrieverRun)Document)BaseRetriever)
ConfigDictFieldc                "    U R                  5       $ N)split)texts    [/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_community/retrievers/bm25.pydefault_preprocessing_funcr      s    ::<    c                      \ rS rSr% SrSrS\S'    \" SS9rS\S	'    S
r	S\S'    \
rS\S'    \" SS9r\SSS\
4             SS jj5       r\S\
S.         SS jj5       r      SS jrSrg)BM25Retriever   z'`BM25` retriever without Elasticsearch.Nr   
vectorizerF)reprList[Document]docs   intkCallable[[str], List[str]]preprocess_funcT)arbitrary_types_allowedc                    SSK Jn  U Vs/ sH
  o" U5      PM     n	nU=(       d    0 nU" U	40 UD6n
U=(       d	    S U 5       nU(       a+  [        XU5       VVVs/ sH  u  pn[	        XUS9PM     nnnnO%[        X5       VVs/ sH  u  p[	        XS9PM     nnnU " SXUS.UD6$ ! [         a    [        S5      ef = fs  snf s  snnnf s  snnf )	a  
Create a BM25Retriever from a list of texts.
Args:
    texts: A list of texts to vectorize.
    metadatas: A list of metadata dicts to associate with each text.
    ids: A list of ids to associate with each text.
    bm25_params: Parameters to pass to the BM25 vectorizer.
    preprocess_func: A function to preprocess each text before vectorization.
    **kwargs: Any other arguments to pass to the retriever.

Returns:
    A BM25Retriever instance.
r   )	BM25OkapizHCould not import rank_bm25, please install with `pip install rank_bm25`.c              3  $   #    U H  n0 v   M	     g 7fr    ).0_s     r   	<genexpr>+BM25Retriever.from_texts.<locals>.<genexpr>A   s     !4e"es   page_contentmetadataid)r,   r-   )r   r   r!   r&   )	rank_bm25r$   ImportErrorzipr   )clstexts	metadatasidsbm25_paramsr!   kwargsr$   ttexts_processedr   mir   s                 r   
from_textsBM25Retriever.from_texts   s   .	+ 8==u!?1-u=!'R>+>
4!4e!4	  #5S99GA! a:9  D BEUAVAVa4AV    
!o
QW
 	
'  	 	 >

s   B& B?CC&B<)r6   r!   c          	     V    [        S U 5       6 u  pVnU R                  " SUUUUUS.UD6$ )aW  
Create a BM25Retriever from a list of Documents.
Args:
    documents: A list of Documents to vectorize.
    bm25_params: Parameters to pass to the BM25 vectorizer.
    preprocess_func: A function to preprocess each text before vectorization.
    **kwargs: Any other arguments to pass to the retriever.

Returns:
    A BM25Retriever instance.
c              3  d   #    U H'  oR                   UR                  UR                  4v   M)     g 7fr   r+   )r'   ds     r   r)   /BM25Retriever.from_documents.<locals>.<genexpr>d   s!     D)Q~~qzz1440)s   .0)r3   r6   r4   r5   r!   r&   )r1   r<   )r2   	documentsr6   r!   r7   r3   r4   r5   s           r   from_documentsBM25Retriever.from_documentsO   sL    ( !$D)D!
# ~~ 
#+
 
 	
r   c                   U R                  U5      nU R                  R                  X0R                  U R                  S9nU$ )N)n)r!   r   	get_top_nr   r   )selfqueryrun_managerprocessed_queryreturn_docss        r   _get_relevant_documents%BM25Retriever._get_relevant_documentso   s;     ..u5oo//dff/Ur   r&   )r3   zIterable[str]r4   zOptional[Iterable[dict]]r5   zOptional[Iterable[str]]r6   Optional[Dict[str, Any]]r!   r    r7   r   returnr   )
rB   zIterable[Document]r6   rO   r!   r    r7   r   rP   r   )rI   strrJ   r
   rP   r   )__name__
__module____qualname____firstlineno____doc__r   __annotations__r   r   r   r   r!   r   model_configclassmethodr<   rC   rM   __static_attributes__r&   r   r   r   r      s   1J e,D.,AsJ)2LO/LO $L  /3'+046P-
-
 ,-
 %	-

 .-
 4-
 -
 
-
 -
^ 
 156P
%
 .	

 4
 
 

 
>*H	r   r   N)r   rQ   rP   z	List[str])
__future__r   typingr   r   r   r   r   r	   langchain_core.callbacksr
   langchain_core.documentsr   langchain_core.retrieversr   pydanticr   r   r   r   r&   r   r   <module>ra      s-    " @ @ C - 3 &eM er   