o
    tBhgU                  
   @   s  d Z ddlZddlZddlZddlmZ ddlmZ ddlmZ ddlm	Z	 ddlm
Z
 ddlmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm Z m!Z! ddl"m#Z#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z* ej+,dZ-ddgddgddgddgddgddggZ.g dZ/g dZ0ddgddgddggZ1g dZ2g d Z3e*4 Z5e-6e5j7j8Z9e&e5j:e5j7e-d!\e5_:e5_7e*; Z<e&e<j:e<j7e-d!\e<_:e<_7d"d# Z=d$d% Z>ej?@d&d'd(gd)d* ZAd+d, ZBd-d. ZCej?@d/g d0d1d2 ZDej?@d&d'd(gd3d4 ZEd5d6 ZFd7d8 ZGd9d: ZHd;d< ZId=d> ZJd?d@ ZKdAdB ZLdCdD ZMdEdF ZNdGdH ZOej?@d&d'd(gdIdJ ZPdKdL ZQej?@dMdNdieRdOfdNdieRdPfdNdQieSdRfdSdieRdTfdSdieRdUfgej?@dVee.e/fee.e0fgdWdX ZTej?@d&d'd(gdYdZ ZUej?@dVe e5j:e5j7fe e<j:e<j7fgd[d\ ZVdS )]z6Testing for the boost module (sklearn.ensemble.boost).    N)
csc_matrix)
csr_matrix)
coo_matrix)
dok_matrix)
lil_matrix)assert_array_equalassert_array_less)assert_array_almost_equal)BaseEstimator)clone)DummyClassifierDummyRegressor)LinearRegression)train_test_split)GridSearchCV)AdaBoostClassifier)AdaBoostRegressor)_samme_proba)SVCSVR)DecisionTreeClassifierDecisionTreeRegressor)shuffle)NoSampleWeightWrapper)datasets      )foor   r   r   r   r   )r   r   r   r   r   r      )r   r   r   )r   r   r   random_statec                     s   t g dg dg dg dg  t  jddd d t jf   G  fddd} |  }t|d	t  }t|j j t 	|
 sHJ tt j|ddg d
 tt j|ddg d d S )N)r   ư>r   )gRQ?g333333?皙?)igRQ?g      ?)r#   r   g&.>r   axisc                       s   e Zd Z fddZdS )z'test_samme_proba.<locals>.MockEstimatorc                    s   t |j j  S N)r   shapeselfXprobs /var/www/html/riverr-enterprise-integrations-main/venv/lib/python3.10/site-packages/sklearn/ensemble/tests/test_weight_boosting.pypredict_probaC   s   z5test_samme_proba.<locals>.MockEstimator.predict_probaN)__name__
__module____qualname__r0   r.   r,   r.   r/   MockEstimatorB   s    r4   r    )r   r   r   r   )r   r   r   r   )nparrayabssumnewaxisr   	ones_liker   r(   isfiniteallargminargmax)r4   mocksamme_probar.   r,   r/   test_samme_proba7   s   $rA   c                  C   s>   t tt} t t| }t|tt ttdf d S )Nr   )r5   oneslenr+   r   fitr	   r0   )y_tclfr.   r.   r/   test_oneclass_adaboost_probaT   s   "rG   	algorithmSAMMESAMME.Rc                 C   sz   t | dd}|tt t|tt tt	t
t|j |tjttdfks.J |tjttfks;J d S )Nr   rH   r"   r   )r   rD   r+   y_classr   predictT	y_t_classr5   uniqueasarrayclasses_r0   r(   rC   decision_function)rH   rF   r.   r.   r/   test_classification_toy]   s   rT   c                  C   s*   t dd} | tt t| tt d S )Nr   r!   )r   rD   r+   y_regrr   rM   rN   y_t_regr)rF   r.   r.   r/   test_regression_toyh   s   
rW   c                  C   s  t tj} d  }}dD ]m}t|d}|tjtj t| |j |	tj}|dkr/|}|}|j
d t| ks:J |tjj
d t| ksIJ |tjtj}|dks]J d||f t|jdksfJ ttdd |jD t|jksyJ qd	|_td
t |	tj|  d S )NrI   rJ   rH   rI   r   g?z'Failed with algorithm %s and score = %fc                 s       | ]}|j V  qd S r'   r!   .0estr.   r.   r/   	<genexpr>       ztest_iris.<locals>.<genexpr>rJ   r   )r5   rP   iristargetr   rD   datar   rR   r0   r(   rC   rS   scoreestimators_setrH   r   r7   )classes	clf_samme
prob_sammealgrF   probarc   r.   r.   r/   	test_iriso   s(   
 rk   loss)linearsquareexponentialc                 C   st   t | dd}|tjtj |tjtj}|dksJ t|jdks%J ttdd |jD t|jks8J d S )Nr   )rl   r"   g?r   c                 s   rZ   r'   r!   r[   r.   r.   r/   r^      r_   z test_diabetes.<locals>.<genexpr>)	r   rD   diabetesrb   ra   rc   rC   rd   re   )rl   regrc   r.   r.   r/   test_diabetes   s   *rr   c                 C   s  t jd}|jdtjjd}|jdtjjd}t| dd}|j	tj
tj|d |tj
}dd |tj
D }|tj
}dd |tj
D }|jtj
tj|d}	d	d |jtj
tj|dD }
t|dkskJ t||d
  t|dkszJ t||d
  t|
dksJ t|	|
d
  tddd}|j	tj
tj|d |tj
}dd |tj
D }|jtj
tj|d}	dd |jtj
tj|dD }
t|dksJ t||d
  t|
dksJ t|	|
d
  d S )Nr   
   )size)rH   n_estimatorssample_weightc                 S      g | ]}|qS r.   r.   r\   pr.   r.   r/   
<listcomp>       z'test_staged_predict.<locals>.<listcomp>c                 S   rx   r.   r.   ry   r.   r.   r/   r{      r|   c                 S   rx   r.   r.   r\   sr.   r.   r/   r{      s    r   )ru   r"   c                 S   rx   r.   r.   ry   r.   r.   r/   r{      r|   c                 S   rx   r.   r.   r}   r.   r.   r/   r{      s    )r5   randomRandomStaterandintr`   ra   r(   rp   r   rD   rb   rM   staged_predictr0   staged_predict_probarc   staged_scorerC   r	   r   )rH   rngiris_weightsdiabetes_weightsrF   predictionsstaged_predictionsrj   staged_probasrc   staged_scoresr.   r.   r/   test_staged_predict   sB   
r   c                  C   sh   t t d} dddd}t| |}|tjtj tt dd} ddd}t| |}|t	jt	j d S )N)base_estimator)r   r   rX   )ru   base_estimator__max_depthrH   r   r   r"   )ru   r   )
r   r   r   rD   r`   rb   ra   r   r   rp   )boost
parametersrF   r.   r.   r/   test_gridsearch   s   


r   c                  C   s   dd l } dD ]8}t|d}|tjtj |tjtj}| |}| |}t	||j
ks0J |tjtj}||ks>J qtdd}|tjtj |tjtj}| |}| |}t	||j
ksgJ |tjtj}||ksuJ d S )Nr   rX   rY   r!   )pickler   rD   r`   rb   ra   rc   dumpsloadstype	__class__r   rp   )r   ri   objrc   r~   obj2score2r.   r.   r/   test_pickle   s$   





r   c               	   C   s~   t jdddddddd\} }dD ],}t|d	}|| | |j}|jd dks)J |d dtjf |dd  k s<J qd S )
Ni  rs   r    r   Fr   )	n_samples
n_featuresn_informativen_redundant
n_repeatedr   r"   rX   rY   )	r   make_classificationr   rD   feature_importances_r(   r5   r9   r<   )r+   yri   rF   importancesr.   r.   r/   test_importances   s    


(r   c                  C   s   t dd} d}tjt|d | tt W d    n1 sw   Y  tdd}d}tjt|d |tt W d    n1 sCw   Y  t }t	d}tjt|d |jttt
dgd	 W d    d S 1 sow   Y  d S )
Nr   )rl   zEloss must be 'linear', 'square', or 'exponential'. Got 'foo' instead.matchrY   z:Algorithm must be 'SAMME' or 'SAMME.R'. Got 'foo' instead.z*sample_weight.shape == (1,), expected (6,)r   rv   )r   pytestraises
ValueErrorrD   r+   rL   r   reescaper5   rQ   )rq   msgrF   r.   r.   r/   
test_error  s   


"r   c                  C   s   ddl m}  t|  }|tt tt dd}|tt ddl m} t	| dd}|tt t	t
 dd}|tt ddgddgddgddgg}g d}tt dd}tjtd	d
 ||| W d    d S 1 spw   Y  d S )Nr   )RandomForestClassifierrI   rY   )RandomForestRegressorr!   r   )r   barr   r   zworse than randomr   )sklearn.ensembler   r   rD   r+   rU   r   rL   r   r   r   r   r   r   )r   rF   r   X_faily_failr.   r.   r/   test_base_estimator%  s    
"r   c                  C   sV   d} t dddd}tjt| d |tjtj W d    d S 1 s$w   Y  d S )Nz+Sample weights have reached infinite values   g      @rI   )ru   learning_raterH   r   )r   r   warnsUserWarningrD   r`   rb   ra   )r   rF   r.   r.   r/   test_sample_weights_infiniteA  s
   "r   c                  C   s4  G dd dt } tjddddd\}}t|}t||dd	\}}}}tttt	t
fD ]}||}||}	t| d
dddd||}
t| d
dddd||}|
|	}||}t|| |
|	}||}t|| |
|	}||}t|| |
|	}||}t|| |
|	|}|||}t|| |
|	}||}t||D ]	\}}t|| q|
|	}||}t||D ]	\}}t|| q|
|	}||}t||D ]	\}}t|| q|
|	|}|||}t||D ]	\}}t|| qdd |
jD }tdd |D sJ q*d S )Nc                       "   e Zd ZdZd fdd	Z  ZS )z-test_sparse_classification.<locals>.CustomSVCz8SVC variant that records the nature of the training set.Nc                        t  j|||d t|| _| S z<Modification on fit caries data type for later verification.rv   superrD   r   
data_type_r*   r+   r   rw   r   r.   r/   rD   N     
z1test_sparse_classification.<locals>.CustomSVC.fitr'   r1   r2   r3   __doc__rD   __classcell__r.   r.   r   r/   	CustomSVCK      r   r         *   )	n_classesr   r   r"   r   r!   T)probabilityrI   )r   r"   rH   c                 S      g | ]}|j qS r.   r   r\   ir.   r.   r/   r{         z.test_sparse_classification.<locals>.<listcomp>c                 S      g | ]
}|t kp|tkqS r.   r   r   r\   tr.   r.   r/   r{         )r   r   make_multilabel_classificationr5   ravelr   r   r   r   r   r   r   rD   rM   r   rS   r	   predict_log_probar0   rc   staged_decision_functionzipr   r   r   rd   r<   )r   r+   r   X_trainX_testy_trainy_testsparse_formatX_train_sparseX_test_sparsesparse_classifierdense_classifiersparse_resultsdense_results
sprase_res	dense_restypesr.   r.   r/   test_sparse_classificationH  sr   	




















r   c                  C   s
  G dd dt } tjddddd\}}t||dd	\}}}}tttttfD ]]}||}||}	t	|  dd

||}
t	|  dd

|| }}|
|	}||}t|| |
|	}||}t||D ]	\}}t|| qedd |
jD }tdd |D sJ q%d S )Nc                       r   )z)test_sparse_regression.<locals>.CustomSVRz8SVR variant that records the nature of the training set.Nc                    r   r   r   r   r   r.   r/   rD     r   z-test_sparse_regression.<locals>.CustomSVR.fitr'   r   r.   r.   r   r/   	CustomSVR  r   r   r   2   r   r   )r   r   	n_targetsr"   r   r!   r   c                 S   r   r.   r   r   r.   r.   r/   r{     r   z*test_sparse_regression.<locals>.<listcomp>c                 S   r   r.   r   r   r.   r.   r/   r{     r   )r   r   make_regressionr   r   r   r   r   r   r   rD   rM   r	   r   r   rd   r<   )r   r+   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r.   r.   r/   test_sparse_regression  s8   	





r   c                  C   sF   G dd dt } t|  dd}|tt t|jt|jks!J dS )z
    AdaBoostRegressor should work without sample_weights in the base estimator
    The random weighted sampling is done internally in the _boost method in
    AdaBoostRegressor.
    c                   @   s   e Zd Zdd Zdd ZdS )z=test_sample_weight_adaboost_regressor.<locals>.DummyEstimatorc                 S   s   d S r'   r.   )r*   r+   r   r.   r.   r/   rD     s   zAtest_sample_weight_adaboost_regressor.<locals>.DummyEstimator.fitc                 S   s   t |jd S )Nr   )r5   zerosr(   r)   r.   r.   r/   rM     s   zEtest_sample_weight_adaboost_regressor.<locals>.DummyEstimator.predictN)r1   r2   r3   rD   rM   r.   r.   r.   r/   DummyEstimator  s    r   r    )ru   N)r
   r   rD   r+   rU   rC   estimator_weights_estimator_errors_)r   r   r.   r.   r/   %test_sample_weight_adaboost_regressor  s   r   c                  C   s   t jd} | ddd}| ddgd}| d}ttdd}||| || |	| t
t }||| || dS )zX
    Check that the AdaBoost estimators can work with n-dimensional
    data matrix
    r   r   r    r   most_frequent)strategyN)r5   r   r   randnchoicer   r   rD   rM   r0   r   r   )r   r+   ycyrr   r.   r.   r/   test_multidimensional_X  s   



r   c                 C   sr   t jt j}}tt }t|| d}d|jj}t	j
t|d ||| W d    d S 1 s2w   Y  d S )N)r   rH   z {} doesn't support sample_weightr   )r`   rb   ra   r   r   r   formatr   r1   r   r   r   rD   )rH   r+   r   r   rF   err_msgr.   r.   r/   -test_adaboostclassifier_without_sample_weight  s   
"r   c            
      C   sL  t jd} t jdddd}d| d | |jd d  }|d	d
}|d	  d9  < d|d	< tt d
dd}t	|}t	|}|
|| |
|d d	 |d d	  t |}d|d	< |j
|||d ||d d	 |d d	 }||d d	 |d d	 }||d d	 |d d	 }	||k sJ ||	k sJ |t|	ksJ d S )Nr   r   d   i  )numg?r$   g-C6?r   r   rs   i'  )r   ru   r"   rv   )r5   r   r   linspacerandr(   reshaper   r   r   rD   r:   rc   r   approx)
r   r+   r   regr_no_outlierregr_with_weightregr_with_outlierrw   score_with_outlierscore_no_outlierscore_with_weightr.   r.   r/   $test_adaboostregressor_sample_weight	  s,    
r  zparams, err_type, err_msgru   z n_estimators == -1, must be >= 1zn_estimators == 0, must be >= 1g      ?z2n_estimators must be an instance of int, not floatr   z!learning_rate == -1, must be > 0.z learning_rate == 0, must be > 0.zmodel, X, yc                 C   sN   | di |}t j||d ||| W d   dS 1 s w   Y  dS )z4Check input parameter validation in weight boosting.r   Nr.   )r   r   rD   )modelr+   r   paramserr_typer   r]   r.   r.   r/   test_adaboost_params_validation/  s   "r  c                 C   sZ   t tjddddi\}}}}t| dd}||| ttj||dd|	| d S )NT)
return_X_yr"   r   rK   r   r%   )
r   r   load_digitsr   rD   r   r5   r>   r0   rM   )rH   r   r   r   r   r  r.   r.   r/    test_adaboost_consistent_predictK  s   
r  c                 C   sZ   t |}d|d< d}tjt|d | j|||d W d    d S 1 s&w   Y  d S )Nir   z1Negative values in data passed to `sample_weight`r   rv   )r5   r:   r   r   r   rD   )r  r+   r   rw   r   r.   r.   r/   #test_adaboost_negative_weight_error[  s   
"r  )Wr   numpyr5   r   r   scipy.sparser   r   r   r   r   sklearn.utils._testingr   r   r	   sklearn.baser
   r   sklearn.dummyr   r   sklearn.linear_modelr   sklearn.model_selectionr   r   r   r   r   !sklearn.ensemble._weight_boostingr   sklearn.svmr   r   sklearn.treer   r   sklearn.utilsr   sklearn.utils._mockingr   sklearnr   r   r   r   r+   rL   rU   rN   rO   rV   	load_irisr`   permutationra   rt   permrb   load_diabetesrp   rA   rG   markparametrizerT   rW   rk   rr   r   r   r   r   r   r   r   r   r   r   r   r   r  r   	TypeErrorr  r  r  r.   r.   r.   r/   <module>   s    (
	

"

-]0
&
