o
    ôrÒhÓ/  ã                   @   s”   d dl Z d dlZd dlZd dlZe e¡Ze jfdd„Z	e j
Z
ddd„Ze jddfd	d
„Zddd„Zddd„Zddd„Z		ddd„Zddd„ZdS )é    Nc                 C   s  t  d| j|f ¡ t ¡ }| j\}}t |¡}tj|||d}t ||¡}	t ¡ r8t  dt ¡  ¡ t 	|	¡}	d}
|D ]3}|jd }|	 
|¡ |	 | |¡\}}||
7 }| ||¡ |	 ¡  |
|7 }
t  d|
t ¡ | f ¡ q<| ¡  t  dt ¡ | |
f ¡ |j|jfS )z¤Computes the exact KNN search results for a dataset that possibly
    does not fit in RAM but for which we have an iterator that
    returns it block by block.
    z%knn_ground_truth queries size %s k=%d©Úkeep_maxúrunning on %d GPUsr   ú%d db elements, %.3f szGT time: %.3f s (%d vectors))ÚLOGÚinfoÚshapeÚtimeÚfaissÚis_similarity_metricÚ
ResultHeapÚ	IndexFlatÚget_num_gpusÚindex_cpu_to_all_gpusÚaddÚsearchÚ
add_resultÚresetÚfinalizeÚDÚI)ÚxqÚdb_iteratorÚkÚmetric_typeÚt0ÚnqÚdr   ÚrhÚindexÚi0ÚxbiÚnir   r   © r#   ú]/var/www/html/alpaca_bot/venv/lib/python3.10/site-packages/faiss/contrib/exhaustive_search.pyÚknn_ground_truth   s,   




r%   é   c           !      C   sœ  | j \}}t|tjƒ}t |j¡}|rt|ƒnt|ƒ}t|j	|ƒ}	t
 d|› d|	 ›d| ›d| ›¡ t ¡ }
| | |	¡\}}t ¡ |
 }|rV|d dk sQJ ‚| d¡}d}d	}|d	urÂ|sm|d	d	…|	d
 f |k }n|d	d	…|	d
 f |k}| ¡ dkrÂt
 d| ¡  ¡ t ¡ }
t|tjƒrª|}|ržt |d ¡}nt ||j¡}| |¡ | | | |¡\}}}|r¼| d¡}t ¡ |
 }t
 d¡ t ¡ }
|rÐtjntj}|||	||ƒ}	 tj}||ƒ|_||ƒ|_|d	ur||ƒ|_||ƒ|_|| d¡ƒ|_||ƒ|_tj |d
 dd}| !||ƒ¡ |d }tj ||j"d}tj |dd}| #||ƒ||ƒ¡ t ¡ |
 } t
 d|d›d|d›d| d›d¡ |||fS )aA  GPU does not support range search, so we emulate it with
    knn search + fallback to CPU index.

    The index_cpu can either be:
    - a CPU index that supports range search
    - a numpy table, that will be used to construct a Flat index if needed.
    - None. In that case, at most gpu_k results will be returned
    zGPU search z queries with k=z is_binary_index=z
 keep_max=é   i €  Úint16r   Né   zCPU search remain %dÚcombineTÚint64©Údtypeéÿÿÿÿc                 S   ó   g | ]}t |ƒ‘qS r#   ©Úlen)Ú.0Údir#   r#   r$   Ú
<listcomp>‹   ó    z$range_search_gpu.<locals>.<listcomp>ztimes z.3fzs Ús))r   Ú
isinstancer
   ÚIndexBinaryr   r   ÚintÚfloatÚminÚntotalr   Údebugr	   r   ÚastypeÚsumÚnpÚndarrayÚIndexBinaryFlatr   r   Úrange_searchÚCombinerRangeKNNint16ÚCombinerRangeKNNfloatÚswig_ptrr   r   ÚmaskÚD_remainÚviewÚ
lim_remainÚI_remainÚemptyÚcompute_sizesr-   Úwrite_resultÚrangeÚ	METRIC_L2ÚappendÚcumsumÚhstack)!r   Úr2Ú	index_gpuÚ	index_cpuÚgpu_kr   r   Úis_binary_indexr   r   r   r   r   Út1Út2rJ   rG   ÚxbrH   rK   ÚCombinerRangeKNNÚcombinerÚspÚL_resÚnresÚD_resÚI_resÚnrÚiÚnvÚl0Úl1Út3r#   r#   r$   Úrange_search_gpu5   sr   
	 ÿ




þ





$
ri   Fr.   c                    s  | j \}}t ¡ }tj| dd} t ||¡}	|dkrt ¡ }|r6t d| ¡ t 	¡ }
||
_
tj|	|
|d}d}dd„ t|ƒD ƒ}d	d„ t|ƒD ƒ}|D ]r}|j d }|dkrk| |¡ t| |||ƒ\}}}| ¡  n|	 |¡ |	 | |¡\}}}|	 ¡  ||7 }t|ƒD ]'}|| ||d
  }}||kr¬||  |||… ¡ ||  |||… ¡ q…||7 }t d|t ¡ | f ¡ qLtjddd‰tjddd‰ ‡ fdd„|D ƒ}‡fdd„|D ƒ}dd„ |D ƒ}t|ƒ|ksîJ ‚tj|d
 dd}t |¡|d
d…< |t |¡t |¡fS )z§Computes the range-search search results for a dataset that possibly
    does not fit in RAM but for which we have an iterator that
    returns it block by block.
    Úfloat32r,   r.   r   ©ÚcoÚngpur   c                 S   ó   g | ]}g ‘qS r#   r#   ©r2   Ú_ir#   r#   r$   r4   ¨   ó    z&range_ground_truth.<locals>.<listcomp>c                 S   rn   r#   r#   ro   r#   r#   r$   r4   ©   rq   r)   r   r+   c                    ó"   g | ]}|g krt  |¡nˆ ‘qS r#   ©r@   rS   ©r2   rd   )Úempty_Dr#   r$   r4   À   ó   " c                    rr   r#   rs   rt   )Úempty_Ir#   r$   r4   Á   rv   c                 S   r/   r#   r0   rt   r#   r#   r$   r4   Â   r5   Úuint64N)r   r	   r@   Úascontiguousarrayr
   r   r   r   r   ÚGpuMultipleClonerOptionsÚshardr   rO   r   ri   r   rC   rQ   Úzerosr1   rR   rS   )r   r   Ú	thresholdr   r{   rm   r   r   r   r   rl   rU   r    r   r   r!   r"   Úlims_iÚDiÚIiÚjrf   rg   ÚsizesÚlimsr#   )ru   rw   r$   Úrange_ground_truth“   sR   




€r„   c           
      C   sp   |r||k}n||k }t  | ¡}d}t| ƒD ]\}}	t|	ƒ}	||||	 …  ¡ ||< ||	7 }q||| || fS )z select a set of results r   )r@   Ú
zeros_likeÚ	enumerater9   r?   )
r`   ÚdisÚidsÚthreshr   rG   Únew_nresÚord   rc   r#   r#   r$   Úthreshold_radius_nresÉ   s   


rŒ   c                 C   s‚   |r||k}n||k }t  | ¡}t| ƒd }t|ƒD ]}| | | |d  }	}
|| ||	|
…  ¡  ||d < q||| || fS )z= restrict range-search results to those below a given radius r)   )r@   r…   r1   rO   r?   )rƒ   r‡   rˆ   r‰   r   rG   Únew_limsÚnrd   rf   rg   r#   r#   r$   Úthreshold_radiusØ   s   

"r   c           
      C   sì   t  dd„ | D ƒ¡}t|ƒ|ksJ ‚|r&| t|ƒ| d ¡ |d|  }n	| |¡ || }|jdkr9t|ƒ}nt|ƒ}t d| ¡ d}t	| ƒD ] \}\}}}	t
|||	||d\}}}	|t|ƒ7 }|||	f| |< qJt d	| ¡ ||fS )
z‘find radius that reduces number of results to target_nres, and
    applies it in-place to the result batches used in
    range_search_max_resultsc                 S   s   g | ]\}}}|‘qS r#   r#   )r2   Ú_r‡   r#   r#   r$   r4   ê   ó    z apply_maxres.<locals>.<listcomp>r)   r.   rj   z   setting radius to %sr   r   z.   updated previous results, new nb results %d)r@   rS   r1   Ú	partitionr-   r:   r9   r   r=   r†   rŒ   )
Úres_batchesÚtarget_nresr   ÚalldisÚradiusÚtotresrd   r`   r‡   rˆ   r#   r#   r$   Úapply_maxresæ   s(   



ÿr˜   c                 C   s  t | tjƒ}|du r|dusJ ‚td| ƒ}|du r&|dus J ‚t|d ƒ}|dkr.t ¡ }|rGt d| ¡ t ¡ }	||	_tj	| |	|d}
nd}
t
 
¡ }d }}d } }}g }|D ]”}t
 
¡ }t dt|ƒ› d	¡ |
ryt|||
| ƒ\}}}n	|  ||¡\}}}|d
d… |dd…  }|t|ƒ7 }|t|ƒ7 }t
 
¡ }|r¥| d¡}|t|ƒ7 }| |||f¡ |durÑ||krÑt d||f ¡ t||| jtjkd\}}t
 
¡ }||| 7 }||| 7 }t dt
 
¡ | ||f ¡ q[t d||||f ¡ |r||krt||| jtjkd\}}t dd„ |D ƒ¡}t dd„ |D ƒ¡}t dd„ |D ƒ¡}tjt|ƒd
 dd}t |¡|d
d…< ||||fS )a  Performs a range search with many queries (given by an iterator)
    and adjusts the threshold on-the-fly so that the total results
    table does not grow larger than max_results.

    If ngpu != 0, the function moves the index to this many GPUs to
    speed up search.
    Ngš™™™™™é?g      ø?r.   r   rk   r   z
searching z vectorsr)   r(   z-too many results %d > %d, scaling back radiusr   z'   [%.3f s] %d queries done, %d resultszBsearch done in %.3f s + %.3f s, total %d results, end threshold %gc                 S   s   g | ]\}}}|‘qS r#   r#   ©r2   Únres_iÚdis_iÚids_ir#   r#   r$   r4   T  r‘   z,range_search_max_results.<locals>.<listcomp>c                 S   s   g | ]\}}}|‘qS r#   r#   r™   r#   r#   r$   r4   U  r‘   c                 S   s   g | ]\}}}|‘qS r#   r#   r™   r#   r#   r$   r4   V  r‘   rx   r,   )r7   r
   r8   r9   r   r   r   rz   r{   r   r	   r=   r1   ri   rC   r>   rQ   r˜   r   ÚMETRIC_INNER_PRODUCTr@   rS   r|   rR   )r   Úquery_iteratorr–   Úmax_resultsÚmin_resultsr{   rm   Úclip_to_minrX   rl   rU   Út_startÚt_searchÚt_post_processÚqtotr—   Ú
raw_totresr“   Úxqir   r~   r   r€   rš   rY   rZ   r`   r‡   rˆ   rƒ   r#   r#   r$   Úrange_search_max_results  s~   
ÿ

þ
ÿÿÿ

þr¨   é    é N  c                 c   s\    t | ƒ}|}d}||k r,| ||| … }|V  ||k r |d9 }|t |ƒ7 }||k sdS dS )z¤ produces batches of progressively increasing sizes. This is useful to
    adjust the search radius progressively without overflowing with
    intermediate results r   é   Nr0   )r   Ústart_bsÚmax_bsr   Úbsrd   r§   r#   r#   r$   Úexponential_query_iterator^  s   €ûr¯   )r&   )F)NNFr   F)r©   rª   )r
   r	   Únumpyr@   ÚloggingÚ	getLoggerÚ__name__r   rP   r%   Úknnri   r„   rŒ   r   r˜   r¨   r¯   r#   r#   r#   r$   Ú<module>   s$   
"
^
ÿ
6


þ\