o
    tBh-                     @   s  d Z ddlZddlZddlZddlmZ ddlmZ ddlmZ ddlm	Z	 ddl
mZ ddlmZ dd	lmZ dd
l
mZ ddlmZmZ ddlmZ ddlmZ ddlmZmZ ddlmZmZ edZe Ze ej!j"Z#ej$e# e_$ej!e# e_!e Z%e e%j!j"Z#e%j$e# e%_$e%j!e# e%_!dd Z&dd Z'dd Z(dd Z)dd Z*dd Z+dd Z,ej-.ddd gd!d" Z/d#d$ Z0d%d& Z1d'd( Z2d)d* Z3d+d, Z4ed-ed=i d.d/id0ej-.d1d2d3gd4d5 Z5ed-ed=i d.d6id0ej-.d1d2d3gd7d8 Z6d9d: Z7d;d< Z8dS )>zD
Testing for Isolation Forest algorithm (sklearn.ensemble.iforest).
    N)assert_array_equal)assert_array_almost_equal)ignore_warnings)assert_allclose)ParameterGrid)IsolationForest)_average_path_length)train_test_split)load_diabetes	load_iris)check_random_state)roc_auc_score)
csc_matrix
csr_matrix)Mockpatchc                  C   s   t ddgddgg} t ddgddgg}tdgg dddgd}t  |D ]}tdd	ti|| | q)W d
   d
S 1 sFw   Y  d
S )z6Check Isolation Forest for various parameter settings.r            )      ?      ?r   TF)n_estimatorsmax_samples	bootstraprandom_stateN )nparrayr   r   r   rngfitpredict)X_trainX_testgridparamsr   r   z/var/www/html/riverr-enterprise-integrations-main/venv/lib/python3.10/site-packages/sklearn/ensemble/tests/test_iforest.pytest_iforest/   s    "r&   c                  C   s   t d} ttjdd tjdd | d\}}}}tddgddgd	}ttfD ]8}||}||}|D ]+}	tdd
dd|		|}
|

|}tdd
dd|		|}|
|}t|| q4q(dS )z=Check IForest for various parameter settings on sparse input.r   N2   r   r   r   TF)r   r   
   r   )r   r   r   )r   r	   diabetesdatatargetr   r   r   r   r   r    r   )r   r!   r"   y_trainy_testr#   sparse_formatX_train_sparseX_test_sparser$   sparse_classifiersparse_resultsdense_classifierdense_resultsr   r   r%   test_iforest_sparse=   s8   

r6   c                  C   sL  t j} tt tdd|  W d   n1 sw   Y  tt tdd|  W d   n1 s8w   Y  tt tdd|  W d   n1 sUw   Y  d}tjt|d tdd|  W d   n1 svw   Y  t	
  t	d	t td
d|  W d   n1 sw   Y  t	
  t	d	t ttdd|  W d   n1 sw   Y  tt tdd|  W d   n1 sw   Y  tt tdd|  W d   n1 sw   Y  tt t | | ddddf  W d   dS 1 sw   Y  dS )z7Test that it gives proper exception on deficient input.r   N               @3max_samples will be set to n_samples for estimationmatchi  errorautor   foobarg      ?r   )irisr+   pytestraises
ValueErrorr   r   warnsUserWarningwarningscatch_warningssimplefilterr   int64r    )Xwarn_msgr   r   r%   test_iforest_errorY   s>   

$$rM   c               	   C   sF   t j} t | }|jD ]}|jttt	| j
d ks J qdS )zDCheck max_depth recalculation when max_samples is reset to n_samplesr   N)rA   r+   r   r   estimators_	max_depthintr   ceillog2shape)rK   clfestr   r   r%   test_recalculate_max_depth{   s
   
&rV   c                  C   s   t j} t | }|j| jd ksJ tdd}d}tjt|d ||  W d    n1 s1w   Y  |j| jd ks@J tdd| }|jd| jd  ksTJ d S )Nr   i  r8   r;   r<   g?)	rA   r+   r   r   max_samples_rS   rB   rE   rF   )rK   rT   rL   r   r   r%   test_max_samples_attribute   s   
rX   c            	      C   s   t d} ttjtj| d\}}}}tddd|}|jdd ||}|jdd ||}t	|| tddd|}||}t	|| dS )	zCheck parallel regression.r   r(   r   )n_jobsr   r   )rY   r   N)
r   r	   r*   r+   r,   r   r   
set_paramsr    r   )	r   r!   r"   r-   r.   ensembley1y2y3r   r   r%    test_iforest_parallel_regression   s   




r_   c                  C   s   t d} d| dd }tj|d |d f }|dd }| jdddd	}tj|dd |f }td
gd dgd  }td| d|}|| }t	||dksUJ dS )z#Test Isolation Forest performs wellr   g333333?x   Nd      )   r   )lowhighsizer   rd   r   )r   r   g\(\?)
r   randnr   r_uniformr   r   r   decision_functionr   )r   rK   r!   
X_outliersr"   r.   rT   y_predr   r   r%   test_iforest_performance   s   rn   contamination      ?r?   c              	   C   s   ddgddgddgddgddgddgddgddgg}t t| d	}|| || }||}t|dd  t|d d ksDJ t|ddg ddg   d S )
Nr7   r   r      r   rb      )r   ro   )	r   r   r   rk   r    r   minmaxr   )ro   rK   rT   decision_funcpredr   r   r%   test_iforest_works   s   4

(rx   c                  C   s&   t j} t | }|j|jksJ d S N)rA   r+   r   r   rW   _max_samples)rK   rT   r   r   r%   test_max_samples_consistency   s   r{   c                  C   sV   t d} ttjd d tjd d | d\}}}}tdd}||| || d S )Nr   r'   r(   g?)max_features)r   r	   r*   r+   r,   r   r   r    )r   r!   r"   r-   r.   rT   r   r   r%    test_iforest_subsampled_features   s   
r}   c                  C   s   dt dt j  d } dt dt j  d }ttdgdg ttdgdg ttd	gd
g ttdg| g ttdg|g ttt g ddd
| |g tt d}t|t | d S )Nr:   g      @g?g     0@g}?r   r9   r   r   r        )r   r   r~   r   )	r   logeuler_gammar   r   r   aranger   sort)
result_one
result_twoavg_path_lengthr   r   r%    test_iforest_average_path_length   s   
r   c                  C   s   ddgddgddgg} t dd| }t  | }t|ddgg|ddgg|j  t|ddgg|ddgg|j  t|ddgg|ddgg d S )Nr   r   g?)ro   r:   )r   r   r   score_samplesrk   offset_)r!   clf1clf2r   r   r%   test_score_samples   s   r   c                  C   sv   t d} | dd}tdd| dd}|| |jd }|jdd || t|jdks0J |jd |u s9J dS )	z/Test iterative addition of iTrees to an iForestr   rd   r   r)   T)r   r   r   
warm_start)r   N)r   rh   r   r   rN   rZ   len)r   rK   rT   tree_1r   r   r%   test_iforest_warm_start  s   


r   z*sklearn.ensemble._iforest.get_chunk_n_rowsreturn_valuer   )side_effectzcontamination, n_predict_calls)rp   r   )r?   r   c                 C      t | | j|ksJ d S ry   rx   
call_countmocked_get_chunkro   n_predict_callsr   r   r%   test_iforest_chunks_works1     r   r)   c                 C   r   ry   r   r   r   r   r%   test_iforest_chunks_works2(  r   r   c                  C   st  t d} t }||  t jd}t|| dksJ t||dddks-J t|| d dks:J t|| d dksGJ t 	|dddd} t }||  t|| dkseJ t||dddkstJ t|t ddksJ |dd} t }||  t|| dksJ t||dddksJ t|t ddksJ dS )z=Test whether iforest predicts inliers when using uniform data)ra   r)   r   r   ra   r)   N)
r   onesr   r   randomRandomStateallr    rh   repeat)rK   iforestr   r   r   r%   test_iforest_with_uniform_data2  s(   



 r   c                  C   sl   t ddgddgg} t ddg}t | |}tjtdd |j W d    d S 1 s/w   Y  d S )Nr   r   r   rc   r   z`n_features_` was deprecatedr<   )r   r   r   r   rB   rE   FutureWarningn_features_)rK   yrU   r   r   r%   test_n_features_deprecationU  s   "r   r   )9__doc__rB   rG   numpyr   sklearn.utils._testingr   r   r   r   sklearn.model_selectionr   sklearn.ensembler   sklearn.ensemble._iforestr   r	   sklearn.datasetsr
   r   sklearn.utilsr   sklearn.metricsr   scipy.sparser   r   unittest.mockr   r   r   rA   permutationr,   rg   permr+   r*   r&   r6   rM   rV   rX   r_   rn   markparametrizerx   r{   r}   r   r   r   r   r   r   r   r   r   r   r%   <module>   sl    "
#