o
    tBheS                     @   s  d Z ddlmZ ddlZddlZddlmZmZm	Z	 ddl
mZ ddlmZmZ ddlmZmZmZ ddlmZmZ dd	lmZmZ dd
lmZ ddlmZmZmZ ddlm Z  ddl!m"Z" ddl#m$Z$ ddl#m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+ ddl,m-Z- ddl.m/Z/ ddlm0Z0 ddlm1Z1 G dd dZ2dd Z3dd Z4dd Z5ej67dd d!gd"d# Z8d$d% Z9d&d' Z:d(d) Z;d*d+ Z<d,d- Z=d.d/ Z>d0d1 Z?d2d3 Z@d4d5 ZAd6d7 ZBd8d9 ZCej67d:ed;d;gej67d<ed=fed>fgd?d@ ZDej67dAdBeEfdCeFfdDdE eFfdgeEfgej67dFeegdGdH ZGej67dIdd=gdJdK ZHdLdM ZIdNdO ZJej67dPeegdQdR ZKej67dPeegej67dSeeegdTdU ZLdS )Vz'
Testing Recursive feature elimination
    )
attrgetterN)assert_array_almost_equalassert_array_equalassert_allclose)sparse)BaseEstimatorClassifierMixin)PLSCanonicalPLSRegressionCCA)RFERFECV)	load_irismake_friedman1)zero_one_loss)SVCSVR	LinearSVR)LogisticRegression)RandomForestClassifier)cross_val_score)
GroupKFold)TransformedTargetRegressor)make_pipeline)StandardScaler)check_random_state)ignore_warnings)make_scorer)
get_scorerc                   @   sZ   e Zd ZdZdddZdd Zdd ZeZeZeZ	dd
dZ
dddZdd Zdd Zd	S )MockClassifierz@
    Dummy classifier to test recursive feature elimination
    r   c                 C   s
   || _ d S N	foo_param)selfr"    r$   /var/www/html/riverr-enterprise-integrations-main/venv/lib/python3.10/site-packages/sklearn/feature_selection/tests/test_rfe.py__init__&      
zMockClassifier.__init__c                 C   s0   t |t |ks
J tj|jd tjd| _| S )N   )dtype)lennponesshapefloat64coef_r#   Xyr$   r$   r%   fit)   s   zMockClassifier.fitc                 C   s
   |j d S )Nr   )r-   )r#   Tr$   r$   r%   predict.   r'   zMockClassifier.predictNc                 C      dS )Ng        r$   r0   r$   r$   r%   score5      zMockClassifier.scoreTc                 C   s
   d| j iS )Nr"   r!   )r#   deepr$   r$   r%   
get_params8   r'   zMockClassifier.get_paramsc                 K   s   | S r    r$   )r#   paramsr$   r$   r%   
set_params;   r8   zMockClassifier.set_paramsc                 C   s   ddiS )N	allow_nanTr$   )r#   r$   r$   r%   
_more_tags>   s   zMockClassifier._more_tags)r   )NN)T)__name__
__module____qualname____doc__r&   r3   r5   predict_probadecision_function	transformr7   r:   r<   r>   r$   r$   r$   r%   r   !   s    


r   c                  C   s   t d} t }tj|j| jt|jdfdf }|j}td| dd}t	|ddd	}|
|| t|j|jd
 ks;J tdd}t	|ddd	}|
|| t| |  d S )Nr      size      )n_estimatorsrandom_state	max_depth   皙?	estimatorn_features_to_selectstepr(   linearkernel)r   r   r+   c_datanormalr*   targetr   r   r3   ranking_r-   r   r   get_support)	generatoririsr1   r2   clfrfeclf_svcrfe_svcr$   r$   r%   test_rfe_features_importanceB   s   "
rc   c                  C   s8  t d} t }tj|j| jt|jdfdf }t|}|j	}t
dd}t|ddd}||| ||}||| t|j|jd	 ksIJ t
dd}t|ddd}	|	|| |	|}
|j|jjksiJ t|d d
 |jd d
  t||||j |||||j|j	ksJ t||
  d S )Nr   rF   rG   rT   rU   rN   rO   rP   r(   
   )r   r   r+   rW   rX   rY   r*   r   
csr_matrixrZ   r   r   r3   rE   r[   r-   r   r5   r7   toarray)r]   r^   r1   X_sparser2   r_   r`   X_r
clf_sparse
rfe_sparse
X_r_sparser$   r$   r%   test_rfeU   s(   "




 rl   c                  C   s   G dd dt t} tdd\}}tjtdd t|  d|| W d    n1 s,w   Y  tjtdd t|  dj||d	d
|| W d    n1 sSw   Y  t|  dj||d	d
j||d	d
 d S )Nc                   @   s    e Zd ZdddZdddZdS )z0test_RFE_fit_score_params.<locals>.TestEstimatorNc                 S   s2   |d u rt dtdd||| _| jj| _| S )Nfit: prop cannot be NonerT   rU   )
ValueErrorr   r3   svc_r/   r#   r1   r2   propr$   r$   r%   r3   v   s
   
z4test_RFE_fit_score_params.<locals>.TestEstimator.fitc                 S   s   |d u rt d| j||S )Nscore: prop cannot be None)rn   ro   r7   rp   r$   r$   r%   r7   }   s   z6test_RFE_fit_score_params.<locals>.TestEstimator.scorer    )r?   r@   rA   r3   r7   r$   r$   r$   r%   TestEstimatoru   s    
rs   T
return_X_yrm   matchrQ   rr   foo)rq   )	r   r   r   pytestraisesrn   r   r3   r7   )rs   r1   r2   r$   r$   r%   test_RFE_fit_score_paramsr   s   "(r|   rR   g @c                 C   sl   t dd}t }t|| dd}d|  }tjt|d ||j|j W d    d S 1 s/w   Y  d S )NrT   rU   rO   rP   z$n_features_to_select must be .+ Got rv   )	r   r   r   rz   r{   rn   r3   rX   rZ   )rR   r_   r^   r`   msgr$   r$   r%   "test_rfe_invalid_n_features_errors   s   

"r   c                  C   s   t d} t }tj|j| jt|jdfdf }|j}tdd}t	|ddd}|
|| t	|d	dd}|
|| t|j|j t|j|j d S )
Nr   rF   rG   rT   rU   rN   rO   rP   g?)r   r   r+   rW   rX   rY   r*   rZ   r   r   r3   r   r[   support_)r]   r^   r1   r2   r_   rfe_numrfe_percr$   r$   r%   test_rfe_percent_n_features   s   "
r   c                  C   s   t d} t }tj|j| jt|jdfdf }|j}t }t	|ddd}|
|| ||}|
|| t|j|jd ksBJ |j|jjksKJ d S )Nr   rF   rG   rN   rO   rP   r(   )r   r   r+   rW   rX   rY   r*   rZ   r   r   r3   rE   r[   r-   )r]   r^   r1   r2   r_   r`   rh   r$   r$   r%   test_rfe_mockclassifier   s   "
r   c                  C   s  t d} t }tj|j| jt|jdfdf }t|j}t	t
dddd}||| d}tjt|d	 t|j|jd ksBJ W d    n1 sLw   Y  |j D ]}t|j| |jd ksfJ qVt|j|jd kssJ ||}t||j t	t
dddd}t|}	||	| ||	}
t|
 |j ttd
d}t	t
ddd|d}t|j|| ||}t||j td}t	t
ddd|d}||| ||}t||j dd }t	t
ddd|d}||| tjt|d	 t|jt|jj W d    n	1 sw   Y  |jdks J t	t
dddd}||| tjt|d	 t|jdksAJ W d    n	1 sLw   Y  |j D ]}t|j| dksdJ qVt|j|jd kssJ ||}t||j t	t
dddd}t|}	||	| ||	}
t|
 |j t	t
dddd}t|}	||	| ||	}
t|
 |j d S )Nr   rF   rG   rT   rU   r(   rQ   rS   yThe `grid_scores_` attribute is deprecated in version 1\.0 in favor of `cv_results_` and will be removed in version 1\.2.rv   F)greater_is_better)rQ   rS   scoringaccuracyc                 S   r6   )Ng      ?r$   )rQ   r1   r2   r$   r$   r%   test_scorer   r8   ztest_rfecv.<locals>.test_scorerrJ   皙?)r   r   r+   rW   rX   rY   r*   listrZ   r   r   r3   rz   warnsFutureWarninggrid_scores_r-   cv_results_keysr[   rE   r   r   re   rf   r   r   r   r   r,   n_features_)r]   r^   r1   r2   rfecvr~   keyrh   rfecv_sparserg   rk   r   scorerr   r$   r$   r%   
test_rfecv   st   "










r   c                  C   s   t d} t }tj|j| jt|jdfdf }t|j}t	t
 dd}||| d}tjt|d t|j|jd ks@J W d    n1 sJw   Y  |j D ]}t|j| |jd ksdJ qTt|j|jd ksqJ d S )Nr   rF   rG   r(   r   r   rv   )r   r   r+   rW   rX   rY   r*   r   rZ   r   r   r3   rz   r   r   r   r-   r   r   r[   )r]   r^   r1   r2   r   r~   r   r$   r$   r%   test_rfecv_mockclassifier  s   "
r   c                  C   s   ddl m}  dd l}|  |_td}t }tj|j|j	t
|jdfdf }t|j}ttddddd}||| |j}|d t
| dksMJ d S )	Nr   )StringIOrF   rG   rT   rU   r(   )rQ   rS   verbose)ior   sysstdoutr   r   r+   rW   rX   rY   r*   r   rZ   r   r   r3   seekreadline)r   r   r]   r^   r1   r2   r   verbose_outputr$   r$   r%   test_rfecv_verbose_output0  s   "

r   c            
   	   C   s&  t d} t }tj|j| jt|jdfdf }t|j}ddgddgddgfD ]h\}}t	t
 ||d}||| t|jd | | d }d}tjt|d	 t|j|ks[J W d    n1 sew   Y  |j D ]}	t|j|	 |ks|J qot|j|jd ksJ |j|ksJ q(d S )
Nr   rF   rG   rJ   r(      )rQ   rS   min_features_to_selectr   rv   )r   r   r+   rW   rX   rY   r*   r   rZ   r   r   r3   ceilr-   rz   r   r   r   r   r   r[   r   )
r]   r^   r1   r2   rS   r   r   	score_lenr~   r   r$   r$   r%   test_rfecv_cv_results_sizeD  s,   "
r   c                  C   sF   t tdd} | jdksJ t }t| |j|j}| dks!J d S )NrT   rU   
classifiergffffff?)r   r   _estimator_typer   r   rX   rZ   min)r`   r^   r7   r$   r$   r%   test_rfe_estimator_tagse  s
   r   c                  C   s   d} t d| dd\}}|j\}} tdd}t|dd}|||}|j | d	 ks,J t|d
d}|||}|j | d	 ksCJ t|dd}|||}|j | d	 ksZJ d S )Nrd   2   r   	n_samples
n_featuresrL   rT   rU   g{Gz?rS   rJ   r      )r   r-   r   r   r3   r   sum)r   r1   r2   r   rQ   selectorselr$   r$   r%   test_rfe_min_stepn  s   

r   c               	   C   s  dd } dd }ddg}ddg}ddg}t |||D ]D\}}}td}|jd	|fd
}	|d	 }
ttdd||d}||	|
 t	|j
| |||ksPJ t	|j
||||ks^J qd}ddg}ddg}t ||D ]\}}td}|jd	|fd
}	|d	 }
ttdd|d}||	|
 d}tjt|d" t|j| |||ksJ t|j||||ksJ W d    n1 sw   Y  |j D ] }t|j| | |||ksJ t|j| ||||ksJ qqnd S )Nc                 S   s   d| | | d |  S Nr(   r$   r   rR   rS   r$   r$   r%   formula1  s   z4test_number_of_subsets_of_features.<locals>.formula1c                 S   s   dt | | t|  S r   )r+   r   floatr   r$   r$   r%   formula2  s   z4test_number_of_subsets_of_features.<locals>.formula2   r   rJ   +   d   rG   rT   rU   rP   r(   rd   r   r   rv   )zipr   rY   randroundr   r   r3   r+   maxr[   r   rz   r   r   r*   r   r   r   )r   r   n_features_listn_features_to_select_list	step_listr   rR   rS   r]   r1   r2   r`   r   r~   r   r$   r$   r%   "test_number_of_subsets_of_features  s^   	



r   c            
      C   s8  t d} t }tj|j| jt|jdfdf }|j}tt	ddd}|
|| |j}d}tjt|d |j}W d    n1 sCw   Y  |j}|jd	d
 |
|| t|j| tjt|d t|j| W d    n1 suw   Y  | |j ksJ | D ]}	||	 t|j|	 ksJ qd S )Nr   rF   rG   rT   rU   rx   r   rv   rJ   )n_jobs)r   r   r+   rW   rX   rY   r*   rZ   r   r   r3   r[   rz   r   r   r   r   r<   r   r   approx)
r]   r^   r1   r2   r   rfecv_rankingr~   rfecv_grid_scoresrfecv_cv_results_r   r$   r$   r%   test_rfe_cv_n_jobs  s.   "r   c                  C   s   t d} t }d}ttd|t|j}|j}|jdkt	}t
t| dddtddd}|j|||d	 |jdks>J d S )
Nr   rN   rL   r(   r   rJ   )n_splits)rQ   rS   r   cv)groups)r   r   r+   floorlinspacer*   rZ   rX   astypeintr   r   r   r3   r   )r]   r^   number_groupsr   r1   r2   
est_groupsr$   r$   r%   test_rfe_cv_groups  s   r   importance_getterzregressor_.coef_zselector, expected_n_featuresr   rN   c                 C   s\   t dddd\}}tdd}t|tjtjd}||| d}|||}|j |ks,J d S )Nr   rd   r   r   r   	regressorfuncinverse_funcr   )	r   r   r   r+   logexpr3   r   r   )r   r   expected_n_featuresr1   r2   rQ   log_estimatorr   r$   r$   r%   test_rfe_wrapped_estimator  s   

r   zimportance_getter, err_typeautorandomc                 C   s   | j S r    )
importance)xr$   r$   r%   <lambda>  s    r   Selectorc                 C   sr   t dddd\}}t }t|tjtjd}t| ||| d}||| W d    d S 1 s2w   Y  d S )Nr   rd   *   r   r   r   )	r   r   r   r+   r   r   rz   r{   r3   )r   err_typer   r1   r2   rQ   r   modelr$   r$   r%   %test_rfe_importance_getter_validation  s   
"r   r   c                 C   sn   t  }|j}|j}tj|d d< tj|d d< t }| d ur%t|| d}nt|d}|	|| |
| d S )Nr   r(   )rQ   r   rx   )r   rX   rZ   r+   NaNInfr   r   r   r3   rE   )r   r^   r1   r2   r_   r`   r$   r$   r%   test_rfe_allow_nan_inf_in_x.  s   
r   c                  C   sR   t t t } tdd\}}t| ddd}||| ||jd dks'J d S )NTrt   rJ   z$named_steps.logisticregression.coef_)rR   r   r(   )r   r   r   r   r   r3   rE   r-   )pipelinerX   r2   sfmr$   r$   r%   test_w_pipeline_2d_coef_A  s   r   c            	         s   t d} t }tj|j| jt|jdfdf }|j}tt	ddd  
|| t jd }dd	 t|D }t fd
d	|D }tj|dd}tj|dd}t jd | t jd | d S )Nr   rF   rG   rT   rU   rx   rJ   c                 S   s   g | ]}d | dqS )split_test_scorer$   ).0ir$   r$   r%   
<listcomp>X  s    z+test_rfecv_std_and_mean.<locals>.<listcomp>c                    s   g | ]} j | qS r$   )r   )r   r   r   r$   r%   r   Z  s    )axismean_test_scorestd_test_score)r   r   r+   rW   rX   rY   r*   rZ   r   r   r3   r   rangeasarraymeanstdr   )	r]   r^   r1   r2   n_split_keys
split_keys	cv_scoresexpected_meanexpected_stdr$   r   r%   test_rfecv_std_and_meanO  s   "r  ClsRFEc                 C   s@   t jjdd}t jjddd}tdd}| |}||| d S )N)rd   r   rG   rJ   )rd   rJ   r   )rK   )r+   r   rY   randintr   r3   )r  r1   r2   r_   rfe_testr$   r$   r%   test_multioutputb  s
   
r  PLSEstimatorc                 C   sH   t dddd\}}|dd}| |dd||}|||dks"J d	S )
zCheck the behaviour of RFE with PLS estimators.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/12410
    r   rd   r   r   r(   )n_componentsr   g      ?N)r   r3   r7   )r  r  r1   r2   rQ   r   r$   r$   r%   test_rfe_plsk  s   
r
  )MrB   operatorr   rz   numpyr+   numpy.testingr   r   r   scipyr   sklearn.baser   r   sklearn.cross_decompositionr	   r
   r   sklearn.feature_selectionr   r   sklearn.datasetsr   r   sklearn.metricsr   sklearn.svmr   r   r   sklearn.linear_modelr   sklearn.ensembler   sklearn.model_selectionr   r   sklearn.composer   sklearn.pipeliner   sklearn.preprocessingr   sklearn.utilsr   sklearn.utils._testingr   r   r   r   rc   rl   r|   markparametrizer   r   r   r   r   r   r   r   r   r   r   r   r   rn   AttributeErrorr   r   r   r  r  r
  r$   r$   r$   r%   <module>   s    !

_!	N!
	

