
    dhf                        S SK Jr  S SKrS SKJr  S SKJrJrJrJ	r	J
r
  S SKJr  S SKJr  S SKJr  S SKJr   " S	 S
\5      rg)    )annotationsN)Path)AnyDictIterableListOptional)CallbackManagerForRetrieverRun)Document)BaseRetriever)
ConfigDictc                     \ rS rSr% SrSrS\S'    S\S'    SrS\S'    S	rS
\S'    \	" SS9r
\  S         SS jj5       r\SS.       SS jj5       r      SS jr S     SS jjr\SSS.       SS jj5       rSrg)TFIDFRetriever   z`TF-IDF` retriever.

Largely based on
https://github.com/asvskartheek/Text-Retrieval/blob/master/TF-IDF%20Search%20Engine%20(SKLEARN).ipynb
Nr   
vectorizerList[Document]docstfidf_array   intkT)arbitrary_types_allowedc           	         SSK Jn  U=(       d    0 nU" S0 UD6nUR                  U5      nU=(       d	    S U 5       n[	        X5       VV	s/ sH  u  p[        XS9PM     n
nn	U " SXjUS.UD6$ ! [         a    [        S5      ef = fs  sn	nf )Nr   )TfidfVectorizerzNCould not import scikit-learn, please install with `pip install scikit-learn`.c              3  $   #    U H  n0 v   M	     g 7fN ).0_s     \/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_community/retrievers/tfidf.py	<genexpr>,TFIDFRetriever.from_texts.<locals>.<genexpr>4   s     !4e"es   page_contentmetadatar   r   r   r   )sklearn.feature_extraction.textr   ImportErrorfit_transformzipr   )clstexts	metadatastfidf_paramskwargsr   r   r   tmr   s              r    
from_textsTFIDFRetriever.from_texts!   s    	G $)r$4|4
 ..u54!4e!4	ADUAVWAVa4AVWWjWPVWW  	! 	 Xs   A. B.B)r.   c               N    [        S U 5       6 u  pEU R                  " SXBUS.UD6$ )Nc              3  N   #    U H  oR                   UR                  4v   M     g 7fr   r#   )r   ds     r    r!   0TFIDFRetriever.from_documents.<locals>.<genexpr>@   s      Qy!..!**!=ys   #%)r,   r.   r-   r   )r*   r2   )r+   	documentsr.   r/   r,   r-   s         r    from_documentsTFIDFRetriever.from_documents8   s:      Qy QR~~ 
i
KQ
 	
    c                  SSK Jn  U R                  R                  U/5      nU" U R                  U5      R                  S5      nUR                  5       U R                  * S  S S S2    Vs/ sH  o`R                  U   PM     nnU$ s  snf )Nr   )cosine_similarity)r>   )	sklearn.metrics.pairwiser=   r   	transformr   reshapeargsortr   r   )selfqueryrun_managerr=   	query_vecresultsireturn_docss           r    _get_relevant_documents&TFIDFRetriever._get_relevant_documentsE   s     	?OO--G
	 $D$4$4i@HH
 .5__->wy-I$B$-OP-Oyy|-OP Qs   +Btfidf_vectorizerc                h    SS K n[        U5      nUR                  SSS9  UR	                  U R
                  XB S3-  5        [        XB S3-  S5       n[        R                  " U R                  U R                  4U5        S S S 5        g ! [         a    [        S5      ef = f! , (       d  f       g = f)Nr   BCould not import joblib, please install with `pip install joblib`.T)exist_okparents.joblib.pklwb)
joblibr(   r   mkdirdumpr   openpickler   r   )rC   folder_path	file_namerT   pathfs         r    
save_localTFIDFRetriever.save_localS   s    
	 K 

D$
/ 	DOOTk,A%AB $;d++T2aKKD$4$45q9 32  	T 	 32s   B
 .B#
B #
B1F)allow_dangerous_deserializationrZ   c               :    SSK nU(       d  [        S5      e[        U5      nUR	                  XS S3-  5      n[        XS S3-  S5       n[        R                  " U5      u  pSSS5        U " UWW	S9$ ! [         a    [        S5      ef = f! , (       d  f       N/= f)	a  Load the retriever from local storage.

Args:
    folder_path: Folder path to load from.
    allow_dangerous_deserialization: Whether to allow dangerous deserialization.
        Defaults to False.
        The deserialization relies on .joblib and .pkl files, which can be
        modified to deliver a malicious payload that results in execution of
        arbitrary code on your machine. You will need to set this to `True` to
        use deserialization. If you do this, make sure you trust the source of
        the file.
    file_name: File name to load from. Defaults to "tfidf_vectorizer".

Returns:
    TFIDFRetriever: Loaded retriever.
r   NrN   a  The de-serialization of this retriever is based on .joblib and .pkl files.Such files can be modified to deliver a malicious payload that results in execution of arbitrary code on your machine.You will need to set `allow_dangerous_deserialization` to `True` to load this retriever. If you do this, make sure you trust the source of the file, and you are responsible for validating the file came from a trusted source.rQ   rR   rbr&   )rT   r(   
ValueErrorr   loadrW   rX   )
r+   rY   r_   rZ   rT   r[   r   r\   r   r   s
             r    
load_localTFIDFRetriever.load_locali   s    0	 /.	 	 K  [[+W(=!=>
 $;d++T2a !'AD 3
 jtMM9  	T 	. 32s   A3 
B3B	
Br   )NN)
r,   zIterable[str]r-   zOptional[Iterable[dict]]r.   Optional[Dict[str, Any]]r/   r   returnr   )r8   zIterable[Document]r.   rf   r/   r   rg   r   )rD   strrE   r
   rg   r   )rL   )rY   rh   rZ   rh   rg   None)rY   rh   r_   boolrZ   rh   rg   r   )__name__
__module____qualname____firstlineno____doc__r   __annotations__r   r   r   model_configclassmethodr2   r9   rJ   r]   rd   __static_attributes__r   r;   r    r   r      sb    J
KAsJ( $L  /315	XX ,X /	X
 X 
X X, 
 26	

%

 /	


 

 


 

*H	" ,:: : 
	:, 
 16+5N5N *.	5N
 5N 
5N 5Nr;   r   )
__future__r   rX   pathlibr   typingr   r   r   r   r	   langchain_core.callbacksr
   langchain_core.documentsr   langchain_core.retrieversr   pydanticr   r   r   r;   r    <module>r{      s0    "   6 6 C - 3 RN] RNr;   