o
    tBhoz                     @   s*  d Z ddlmZ ddlZddlZddlZddlmZ ddl	m
Z
 ddl	mZ ddlmZmZ ddlmZmZ dd	lmZmZ dd
lmZmZ ddlmZmZ ddlmZmZ ddlmZm Z  ddl!m"Z" ddl#m$Z$ ddl%m&Z& ddlm'Z' ddl(m)Z)m*Z*m+Z+ ddl,m-Z- ddl.m/Z/m0Z0 ddlm1Z1 ddl2m3Z3m4Z4 e-dZ5e* Z6e57e6j8j9Z:e6j;e: e6_;e6j8e: e6_8e) Z<e57e<j8j9Z:e<j;e: e<_;e<j8e: e<_8dd Z=ej>?dee3e4gdddddddddddd dd!ddd d"gg d#d$d% Z@d&d' ZAd(d) ZBG d*d+ d+eZCd,d- ZDd.d/ ZEd0d1 ZFd2d3 ZGd4d5 ZHd6d7 ZId8d9 ZJd:d; ZKd<d= ZLd>d? ZMd@dA ZNdBdC ZOG dDdE dEeZPdFdG ZQdldIdJZRdKdL ZSdMdN ZTdOdP ZUdQdR ZVdSdT ZWdUdV ZXdWdX ZYdYdZ ZZd[d\ Z[d]d^ Z\d_d` Z]dadb Z^dcdd Z_dedf Z`dgdh Zaej>?dieegdjdk ZbdS )mzE
Testing for the bagging ensemble module (sklearn.ensemble.bagging).
    )productN)BaseEstimator)assert_array_equal)assert_array_almost_equal)DummyClassifierDummyRegressor)GridSearchCVParameterGrid)BaggingClassifierBaggingRegressor)
PerceptronLogisticRegression)KNeighborsClassifierKNeighborsRegressor)DecisionTreeClassifierDecisionTreeRegressor)SVCSVR)SparseRandomProjection)make_pipeline)SelectKBest)train_test_split)load_diabetes	load_irismake_hastie_10_2)check_random_state)FunctionTransformerscale)cycle)
csc_matrix
csr_matrixc            	      C   s   t d} ttjtj| d\}}}}tddgddgddgddgd	}d t td
dtddt	 t
 g}t|t|D ]\}}td|| dd|||| q:d S )Nr   random_state      ?      ?      TFmax_samplesmax_features	bootstrapbootstrap_features   )max_iter   )	max_depth)base_estimatorr"   n_estimators )r   r   irisdatatargetr	   r   r   r   r   r   zipr   r
   fitpredict)	rngX_trainX_testy_trainy_testgrid
estimatorsparamsr0   r2   r2   z/var/www/html/riverr-enterprise-integrations-main/venv/lib/python3.10/site-packages/sklearn/ensemble/tests/test_bagging.pytest_classification4   s8   
	
rB   zsparse_format, params, methodr#   r.   Tr'   r$   r&   Fr)   r*   r+   r(   r*   r+   )r8   predict_probapredict_log_probadecision_functionc                    s   G dd dt }td}tttjtj|d\}}}}| |}	| |}
td|ddddd	||	|}t	|||
}td|ddddd	|||}t	|||}t
|| t|	 d
d |jD }t fdd|D ssJ d S )Nc                           e Zd ZdZ fddZ  ZS )z-test_sparse_classification.<locals>.CustomSVC7SVC variant that records the nature of the training setc                       t  || t|| _| S Nsuperr7   type
data_type_selfXy	__class__r2   rA   r7   r      
z1test_sparse_classification.<locals>.CustomSVC.fit__name__
__module____qualname____doc__r7   __classcell__r2   r2   rT   rA   	CustomSVCo       r]   r   r!   linearovr)kerneldecision_function_shaper%   r0   r"   c                 S      g | ]}|j qS r2   rO   .0ir2   r2   rA   
<listcomp>       z.test_sparse_classification.<locals>.<listcomp>c                       g | ]}| kqS r2   r2   rg   tsparse_typer2   rA   ri          r2   )r   r   r   r   r3   r4   r5   r
   r7   getattrr   rN   estimators_all)sparse_formatr@   methodr]   r9   r:   r;   r<   r=   X_train_sparseX_test_sparsesparse_classifiersparse_resultsdense_classifierdense_resultstypesr2   rn   rA   test_sparse_classificationU   s:   


r}   c                  C   s   t d} ttjd d tjd d | d\}}}}tddgddgddgddgd}d t t t t	 fD ]}|D ]}t
d
|| d	|||| q9q5d S )Nr   2   r!   r#   r$   TFr'   rc   r2   )r   r   diabetesr4   r5   r	   r   r   r   r   r   r7   r8   )r9   r:   r;   r<   r=   r>   r0   r@   r2   r2   rA   test_regression   s4   
r   c                     s0  t d} ttjd d tjd d | d\}}}}G dd dt}ddddd	d
dddd	ddddddddg}ttfD ]X}||}||}	|D ]K}
td| dd|
	||}|
|	}td| dd|
	||
|}t| dd |jD }t|| t fdd|D sJ t|| qIq=d S )Nr   r~   r!   c                       rH   )z)test_sparse_regression.<locals>.CustomSVRrI   c                    rJ   rK   rL   rP   rT   r2   rA   r7      rV   z-test_sparse_regression.<locals>.CustomSVR.fitrW   r2   r2   rT   rA   	CustomSVR   r^   r   r#   r.   Tr'   r$   r&   FrC   rD   r%   rc   c                 S   rd   r2   re   rf   r2   r2   rA   ri      rj   z*test_sparse_regression.<locals>.<listcomp>c                    rk   r2   r2   rl   rn   r2   rA   ri      rp   r2   )r   r   r   r4   r5   r   r   r    r   r7   r8   rN   rr   r   rs   )r9   r:   r;   r<   r=   r   parameter_setsrt   rv   rw   r@   rx   ry   r{   r|   r2   rn   rA   test_sparse_regression   sR   




r   c                   @   s   e Zd Zdd ZdS )DummySizeEstimatorc                 C   s   |j d | _t|| _d S )Nr   )shapetraining_size_joblibhashtraining_hash_rP   r2   r2   rA   r7      s   zDummySizeEstimator.fitN)rX   rY   rZ   r7   r2   r2   r2   rA   r      s    r   c            	      C   s   t d} ttjtj| d\}}}}t ||}tt dd| d||}||||||ks3J tt dd| d||}||||||ksNJ tt	 dd||}g }|j
D ]}|j|jd ksjJ ||j q^tt|t|ks}J d S )Nr   r!   r$   F)r0   r(   r*   r"   T)r0   r*   )r   r   r   r4   r5   r   r7   r   scorer   rr   r   r   appendr   lenset)	r9   r:   r;   r<   r=   r0   ensembletraining_hash	estimatorr2   r2   rA   test_bootstrap_samples   sB   

r   c                  C   s   t d} ttjtj| d\}}}}tt dd| d||}|jD ]}tjj	d t
|j	d ks3J q!tt dd| d||}|jD ]}tjj	d t
|j	d ksVJ qDd S )Nr   r!   r$   F)r0   r)   r+   r"   r%   T)r   r   r   r4   r5   r   r   r7   estimators_features_r   npunique)r9   r:   r;   r<   r=   r   featuresr2   r2   rA   test_bootstrap_features  s2   

"
"r   c                  C   s  t d} ttjtj| d\}}}}tjddd` tt | d	||}t
tj||ddtt| t
||t|| tt | dd		||}t
tj||ddtt| t
||t|| W d    d S 1 s{w   Y  d S )
Nr   r!   ignore)divideinvalidrc   r%   )axis   )r0   r"   r(   )r   r   r3   r4   r5   r   errstater
   r   r7   r   sumrE   onesr   exprF   r   r9   r:   r;   r<   r=   r   r2   r2   rA   test_probability7  s8   
"r   c            	   	   C   s   t d} ttjtj| d\}}}}t t fD ]H}t|ddd| d||}|	||}t
||j dk s7J d}tjt|d t|d	dd| d}||| W d    n1 sZw   Y  qd S )
Nr   r!   d   Tr0   r1   r*   	oob_scorer"   皙?{Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.matchr%   )r   r   r3   r4   r5   r   r   r
   r7   r   abs
oob_score_pytestwarnsUserWarning)	r9   r:   r;   r<   r=   r0   clf
test_scorewarn_msgr2   r2   rA   test_oob_score_classificationZ  s<   
r   c            	      C   s   t d} ttjtj| d\}}}}tt ddd| d||}|||}t	||j
 dk s0J d}tjt|d tt d	dd| d}||| W d    d S 1 sUw   Y  d S )
Nr   r!   r~   Tr   r   r   r   r%   )r   r   r   r4   r5   r   r   r7   r   r   r   r   r   r   )	r9   r:   r;   r<   r=   r   r   r   regrr2   r2   rA   test_oob_score_regression  s6   
"r   c                  C   sf   t d} ttjtj| d\}}}}tt ddd| d||}t ||}t|	||	| d S )Nr   r!   r%   F)r0   r1   r*   r+   r"   )
r   r   r   r4   r5   r   r   r7   r   r8   )r9   r:   r;   r<   r=   clf1clf2r2   r2   rA   test_single_estimator  s   
r   c                  C   s  t jt j} }t }tt t|dd| | W d    n1 s$w   Y  tt t|dd| | W d    n1 sCw   Y  tt t|dd| | W d    n1 sbw   Y  tt t|dd| | W d    n1 sw   Y  tt t|dd| | W d    n1 sw   Y  tt t|dd| | W d    n1 sw   Y  tt t|dd| | W d    n1 sw   Y  tt t|dd| | W d    n1 sw   Y  tt t|dd| | W d    n	1 sw   Y  tt t|dd| | W d    n	1 s=w   Y  t	t|| |d	rPJ d S )
N)r(   g        g       @i  foobarr)   r   rG   )
r3   r4   r5   r   r   raises
ValueErrorr
   r7   hasattr)rR   rS   baser2   r2   rA   
test_error  sB    r   c                  C   s  t tjtjdd\} }}}tt ddd| |}||}|jdd ||}t	|| tt ddd| |}||}t	|| tt
ddddd| |}||}|jdd ||}	t	||	 tt
ddddd| |}||}
t	||
 d S )	Nr   r!      n_jobsr"   r%   r   r`   )rb   )r   r3   r4   r5   r
   r   r7   rE   
set_paramsr   r   rG   )r:   r;   r<   r=   r   y1y2y3
decisions1
decisions2
decisions3r2   r2   rA   test_parallel_classification  sF   









r   c            	      C   s   t d} ttjtj| d\}}}}tt ddd||}|jdd |	|}|jdd |	|}t
|| tt ddd||}|	|}t
|| d S )Nr   r!   r   r   r%   r   r.   )r   r   r   r4   r5   r   r   r7   r   r8   r   )	r9   r:   r;   r<   r=   r   r   r   r   r2   r2   rA   test_parallel_regression  s"   




r   c                  C   sD   t jt j} }d||dk< ddd}ttt |dd| | d S )Nr%   r.   )r%   r.   )r1   base_estimator__Croc_auc)scoring)r3   r4   r5   r   r
   r   r7   )rR   rS   
parametersr2   r2   rA   test_gridsearch  s   
 r   c                  C   s,  t d} ttjtj| d\}}}}td ddd||}t|jt	s$J tt	 ddd||}t|jt	s8J tt
 ddd||}t|jt
sLJ ttjtj| d\}}}}td ddd||}t|jtslJ tt ddd||}t|jtsJ tt ddd||}t|jtsJ d S )Nr   r!   r   r   )r   r   r3   r4   r5   r
   r7   
isinstancebase_estimator_r   r   r   r   r   r   r   r2   r2   rA   test_base_estimator'  s6   

r   c                  C   sL   t ttddt dd} | tjtj t| d j	d d j
ts$J d S )Nr%   )kr.   r   r   r   )r
   r   r   r   r7   r3   r4   r5   r   stepsr"   int)r   r2   r2   rA   test_bagging_with_pipelineS  s
   "r   c                   @   s   e Zd Zdd Zdd ZdS )DummyZeroEstimatorc                 C   s   t || _| S rK   )r   r   classes_rP   r2   r2   rA   r7   \  s   zDummyZeroEstimator.fitc                 C   s   | j tj|jd td S )Nr   )dtype)r   r   zerosr   r   )rQ   rR   r2   r2   rA   r8   `  s   zDummyZeroEstimator.predictN)rX   rY   rZ   r7   r8   r2   r2   r2   rA   r   [  s    r   c                  C   s   t t } td}| tjtjtj t	t
 | jtjtj|jdtjjd dd W d    d S 1 s9w   Y  d S )Nr   
   )size)sample_weight)r
   r   r   r7   r3   r4   r5   r8   r   r   r   randintr   )r   r9   r2   r2   rA   1test_bagging_sample_weight_unsupported_but_passedd  s   
"r   *   c                 C   s   t ddd\}}d }dD ]"}|d u rt|| dd}n|j|d ||| t||ks.J qtd| d	d}||| td
d |D tdd |D ksPJ d S )Nr,   r%   	n_samplesr"   )r   r   T)r1   r"   
warm_startr1   r   Fc                 S   rd   r2   r!   rg   treer2   r2   rA   ri     rj   z#test_warm_start.<locals>.<listcomp>c                 S   rd   r2   r!   r   r2   r2   rA   ri     rj   )r   r
   r   r7   r   r   )r"   rR   rS   clf_wsr1   	clf_no_wsr2   r2   rA   test_warm_startq  s"   r   c                  C   sp   t ddd\} }tddd}|| | |jdd tt || | W d    d S 1 s1w   Y  d S )	Nr,   r%   r   r   T)r1   r   r&   r   )r   r
   r7   r   r   r   r   rR   rS   r   r2   r2   rA   $test_warm_start_smaller_n_estimators  s   "r   c            	      C   s   t ddd\} }t| |dd\}}}}tdddd	}||| ||}|d
7 }d}tjt|d ||| W d    n1 sCw   Y  t||| d S )Nr,   r%   r   +   r!   r   TS   r1   r   r"   r$   z;Warm-start fitting without increasing n_estimators does notr   )	r   r   r
   r7   r8   r   r   r   r   )	rR   rS   r:   r;   r<   r=   r   y_predr   r2   r2   rA   "test_warm_start_equal_n_estimators  s   
r   c            
      C   s   t ddd\} }t| |dd\}}}}tdddd	}||| |jd
d ||| ||}td
ddd	}||| ||}	t||	 d S )Nr,   r%   r   r   r!   r   TiE  r   r   r   F)r   r   r
   r7   r   r8   r   )
rR   rS   r:   r;   r<   r=   r   r   r   r   r2   r2   rA   test_warm_start_equivalence  s   

r   c                  C   sZ   t ddd\} }tdddd}tt || | W d    d S 1 s&w   Y  d S )Nr,   r%   r   r   T)r1   r   r   )r   r
   r   r   r   r7   r   r2   r2   rA   $test_warm_start_with_oob_score_fails  s
   "r   c                  C   s~   t ddd\} }tddd}|| | |jdddd	 || | tt t|d
 W d    d S 1 s8w   Y  d S )Nr   r%   r   r   T)r1   r   Fr   )r   r   r1   r   )r   r
   r7   r   r   r   AttributeErrorrq   r   r2   r2   rA   $test_oob_score_removed_on_warm_start  s   "r   c                  C   sH   t ddd\} }tt ddddd}|| |j|| |jks"J d S )N   r%   r   r#   T)r(   r)   r   r"   )r   r
   r   r7   r   rR   rS   baggingr2   r2   rA   test_oob_score_consistency  s   $r   c                  C   s   t ddd\} }tt ddddd}|| | |j}|j}|j}t|t|ks+J t|d t| d ks9J |d jj	d	ksCJ d}|| }|| }|| }	| | d d |f }
|| }|	j
}|	|
| |	j
}t|| d S )
Nr   r%   r   r#   F)r(   r)   r"   r*   r   r.   rh   )r   r
   r   r7   estimators_samples_r   rr   r   r   kindcoef_r   )rR   rS   r   estimators_samplesestimators_featuresr?   estimator_indexestimator_samplesestimator_featuresr   r:   r<   
orig_coefs	new_coefsr2   r2   rA   test_estimators_samples  s2   r  c                  C   s   t  } | j| j}}ttddt }t|ddd}||| |jd j	d d j
 }|jd }|jd }|jd }|| d d |f }	|| }
||	|
 t|j	d d j
| d S )Nr.   )n_componentsr#   r   )r0   r(   r"   r   r%   )r   r4   r5   r   r   r   r
   r7   rr   r   r   copyr   r   r   )r3   rR   rS   base_pipeliner   pipeline_estimator_coefr   estimator_sampleestimator_featurer:   r<   r2   r2   rA   %test_estimators_samples_deterministic  s"   


r  c                  C   sH   d} t d|  dd\}}tt | ddd}||| |j| ks"J d S )Nr   r.   r%   r   r#   )r(   r)   r"   )r   r
   r   r7   _max_samples)r(   rR   rS   r   r2   r2   rA   test_max_samples_consistency"  s   r  c                  C   s   d} dgdgdggd }g dd }g dd }g dd }t d| d	||j}t d| d	||j}t d| d	||j}||g||gksIJ d S )
Nr   r   r   r%   )ABC)r   r   r%   )r   r%   r.   T)r   r"   )r
   r7   r   )r"   rR   Y1Y2Y3x1x2x3r2   r2   rA   !test_set_oob_score_label_encoding1  s$   


r  c                 C   s"   | j ddd} d| t|  < | S )NfloatT)r  r   )astyper   isfinite)rR   r2   r2   rA   replaceK  s   r  c               	   C   sJ  t g dg ddt jdgdt jdgdt jdgg} t g dt g dg dg dg dg dgg}|D ]k}t }ttt|}|	| |
|  t|}|	| |
| }|j|jksaJ t }t|}tt |	| | W d    n1 s~w   Y  t|}tt |	| | W d    n1 sw   Y  q7d S )Nr%   r   r   r.   N   r.   r  )r.   r   r   r   r   )r.   r%   	   )r   r     )r   arraynaninfNINFr   r   r   r  r7   r8   r   r   r   r   r   )rR   y_valuesrS   	regressorpipelinebagging_regressory_hatr2   r2   rA   *test_bagging_regressor_with_missing_inputsQ  sH   



r*  c               	   C   s2  t g dg ddt jdgdt jdgdt jdgg} t g d}t }ttt|}|	| |
|  t|}|	| | |
| }|j|jksKJ ||  ||  t }t|}tt |	| | W d    n1 srw   Y  t|}tt |	| | W d    d S 1 sw   Y  d S )Nr  r  r.   r  )r   r  r  r  r  )r   r!  r"  r#  r$  r   r   r   r  r7   r8   r
   r   rF   rE   r   r   r   )rR   rS   
classifierr'  bagging_classifierr)  r2   r2   rA   +test_bagging_classifier_with_missing_inputsz  s6   


	


"r-  c                  C   sD   t ddgddgg} t ddg}tt ddd}|| | d S )Nr%   r.   r   r&   r   g333333?)r)   r"   )r   r!  r
   r   r7   r   r2   r2   rA   test_bagging_small_max_features  s   r.  c                  C   sj   t jd} | dd}t d}G dd dt}t| ddd}||| t|j	d j
|jd  d S )Nr      r&   c                   @   s   e Zd ZdZdd ZdS )z8test_bagging_get_estimators_indices.<locals>.MyEstimatorz7An estimator which stores y indices information at fit.c                 S   s
   || _ d S rK   )_sample_indicesrP   r2   r2   rA   r7     s   
z<test_bagging_get_estimators_indices.<locals>.MyEstimator.fitN)rX   rY   rZ   r[   r7   r2   r2   r2   rA   MyEstimator  s    r1  r%   )r0   r1   r"   )r   randomRandomStaterandnaranger   r   r7   r   rr   r0  r   )r9   rR   rS   r1  r   r2   r2   rA   #test_bagging_get_estimators_indices  s   
r6  	Estimatorc                 C   sl   t ddgddgg}t ddg}|  ||}tjtdd |j W d    d S 1 s/w   Y  d S )Nr%   r.   r   r&   r   z`n_features_` was deprecatedr   )r   r!  r7   r   r   FutureWarningn_features_)r7  rR   rS   estr2   r2   rA   test_n_features_deprecation  s   "r;  )r   )cr[   	itertoolsr   numpyr   r   r   sklearn.baser   sklearn.utils._testingr   r   sklearn.dummyr   r   sklearn.model_selectionr   r	   sklearn.ensembler
   r   sklearn.linear_modelr   r   sklearn.neighborsr   r   sklearn.treer   r   sklearn.svmr   r   sklearn.random_projectionr   sklearn.pipeliner   sklearn.feature_selectionr   r   sklearn.datasetsr   r   r   sklearn.utilsr   sklearn.preprocessingr   r   r   scipy.sparser   r    r9   r3   permutationr5   r   permr4   r   rB   markparametrizer}   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r*  r-  r.  r6  r;  r2   r2   r2   rA   <module>   s    !


):*#%$!),	

() 
