o
    tBhA                    @   s  d Z ddlZddlZddlZddlZddlmZmZm	Z	 ddl
mZ ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm Z  ddlm!Z! ddlm"Z" ddlm#Z# ddlm$Z$ ddlm%Z% ddlm&Z& ddlm'Z' ddlm(Z( ddlm)Z) ddlm*Z* ddlm+Z+ ddlm,Z, dd lm-Z- dd!lm.Z. dd"l/m0Z0 dd#l1m2Z2 dd$l1m3Z3 dd%l1m4Z4 dd&l5m6Z6 dd'l5m7Z7 dd(l8m9Z9 e:d)Z;e<d)d* Z=ee>d+Z?e@g d,e@g d-e@g d.e@g d/g d,g d0fZAe6 ZBed1d2 ZCd3d4 ZDdd5d6ZEd7d8 ZFd9d: ZGd;d< ZHd=d> ZId?d@ ZJejKLdAdBdCgejKLdDg dEejKLdFee.gdGdH ZMejKLdAdBdCgejKLdDg dIejKLdFee.gdJdK ZNdLdM ZOejKLdFee.gdNdO ZPdPdQ ZQejKLdFeee.gdRdS ZRdTdU ZSdVdW ZTdXdY ZUdZd[ ZVejKLd\e@dgd] d^gd]  e@g d_eWd`d`gd`d`gd`d`ggfe@dgda d^gdb  e@g dceWdddegdddegdddeggfgdfdg ZXejKLdhg diejKLdjg dkdldm ZYdndo ZZejKLdpe%e'gejKLdqg drdsdt Z[ejKLdqg dudvdw Z\edxdy Z]dzd{ Z^d|d} Z_d~d Z`dd Zadd Zbdd Zcdd Zddd Zedd Zfdd Zgdd Zhdd Ziedd ZjejKLde,e-gdd Zkdd Zldd Zmdd Zndd Zodd ZpejKLdg ddd ZqejKLdg ddd ZrejKLdqg ddd Zsdd Ztdd Zuedd Zvdd Zwdd Zxdd ZyejKLdg ddd Zzdd Z{dd Z|dd Z}dd Z~ddĄ ZejKLdFee.gddƄ ZddȄ Zddʄ Zdd̄ Zdd΄ ZddЄ Zdd҄ ZddԄ ZejKLde%e&e'fddׄ Zddل Zddۄ Zdd݄ ZejKLdeee.fdd ZejKLdg e dCfedCdddCfe dCfedCdddCfe.dCdddCfe. dCfe,dddCfe-dddCfe%dddCfe&dddCfe'dddCfe dCfe  dCfe! dCfe" dCfe$d*ddCfe#d*ddCfedCdddBfedCdddBfedCejdddBfedCejdddBfe,dddBfe,ejdddBfe-dddBfe-ejdddBfe%dddBfe%ejdddBfe&dddBfe&ejdddBfe'dddBfe'ejdddBfdd ZdS )zTest the split module    N)
coo_matrix
csc_matrix
csr_matrix)stats)comb)combinations)combinations_with_replacement)permutations)assert_allclose)assert_array_almost_equal)assert_array_equal)ignore_warnings)_num_samples)MockDataFrame)cross_val_score)KFold)StratifiedKFold)
GroupKFold)TimeSeriesSplit)LeaveOneOut)LeaveOneGroupOut)	LeavePOut)LeavePGroupsOut)ShuffleSplit)GroupShuffleSplit)StratifiedShuffleSplit)PredefinedSplit)check_cv)train_test_split)GridSearchCV)RepeatedKFold)RepeatedStratifiedKFold)StratifiedGroupKFold)DummyClassifier)_validate_shuffle_split_build_repr_yields_constant_splits)load_digits)make_classification)SVC
            r0   r0   r0   r-   r-   r-      r1   r1   r1   r1   r   r   r   r0   r0   r0   r-   r-   r-   r1   r1   r1   r   r0   r-   r1   r   r0   r-   r1   r   r0   r-   r1   r   r0   r-   r0   r0   r-   r-   r-   r1   r1   r1      r5   r5   r5   r5   r5   r5   r5   1r7   r7   r7   2r8   r8   3r9   r9   r9   r9   c            "      C   sL  d} d}d}d}d}t ddgddgddgdd	gg}t g d
}t g d}t g d
}t }	t|}
t|}t|}t }t|}tdd}t	g d}t
|}d}d}d}d}d}d}d}d}d}| t| ||||t|||d|g	}tt|	|
|||||||g	|||||||||g	D ]R\}\}}|| ||||ksJ t jt||||t|||| ||||D ]\}} t |jjdksJ t | jjdksJ q|t|ksJ qd}!tjt|!d |	d || W d    n1 sw   Y  tjt|!d |
d || W d    d S 1 sw   Y  d S )Nr5   r-   r,   r0   r1   r.            )r0   r-   r1   r5   )r0   r0   r-   r-   r   random_statezLeaveOneOut()zLeavePOut(p=2)z3KFold(n_splits=2, random_state=None, shuffle=False)z=StratifiedKFold(n_splits=2, random_state=None, shuffle=False)LeaveOneGroupOut()LeavePGroupsOut(n_groups=2)zJShuffleSplit(n_splits=10, random_state=0, test_size=None, train_size=None)z.PredefinedSplit(test_fold=array([1, 1, 2, 2]))zBStratifiedGroupKFold(n_splits=2, random_state=None, shuffle=False)iz%The 'X' parameter should not be None.match)nparrayr   r   r   r   r   r   r   r   r"   r   	enumeratezipget_n_splitstestingassert_equallistsplitasarraydtypekindreprpytestraises
ValueError)"	n_samplesn_unique_groupsn_splitspn_shuffle_splitsXX_1dygroupsloolpokfskflololoposspssgkfloo_reprlpo_reprkf_reprskf_repr	lolo_repr	lopo_reprss_reprps_repr	sgkf_reprn_splits_expectedrA   cvcv_reprtraintestmsg ru   /var/www/html/riverr-enterprise-integrations-main/venv/lib/python3.10/site-packages/sklearn/model_selection/tests/test_split.py(test_cross_validator_with_default_paramsA   s   "
 $rw   c                  C   sF  d} t jd}|jdd| dfd}|jdd| fd}|dd}|jdd| dfd}|jdd| fd}t tddt t t	 t
 t t td	d
t t tddtddt t|dg}|D ]@}t|||| t|||| zt|||| W q` ty }	 zd}
d|
}|t|	v sJ W Y d }	~	q`d }	~	ww d S )N   r0   r   r1   r-   )sizerW         ?	test_sizen_groupsrV   )	test_fold)binary
multiclassz/Supported target types are: {}. Got 'multilabel)rD   randomRandomStaterandintreshaper   r   r   r   r    r!   r"   r   r   r   r   r   r   r   r   rK   rL   rS   formatstr)rT   rngrY   r[   y_2dy_multilabelr\   	splitterssplittereallowed_target_typesrt   ru   ru   rv   	test_2d_y   sJ   r   c                 C   sP   t | t |} }| |t  ksJ |d ur$| |t t|ks&J d S d S N)setintersectionunionrange)rr   rs   rT   ru   ru   rv   check_valid_split   s   r   c           
      C   s   t |}| ||||ksJ t }d}| |||D ]\}}	t||	|d |d7 }||	 q||ks6J |d urD|tt|ksFJ d S d S )Nr   )rT   r0   )r   rH   r   rL   r   updater   )
rp   rY   r[   r\   expected_n_splitsrT   collected_test_samples
iterationsrr   rs   ru   ru   rv   check_cv_coverage   s   r   c                  C   s  t ddgddgddgg} t ddgddgddgddgd	d
gg}tttd| f t g d}td}tjt	dd t||| W d    n1 sQw   Y  t
d}t t|}tjt	dd t|||| W d    n1 s|w   Y  t  td t|||d dd W d    n1 sw   Y  t  td t||||dd W d    n1 sw   Y  t g d}tt t||| W d    n1 sw   Y  tt t||| W d    n	1 sw   Y  tt td W d    n	1 sw   Y  tt td W d    n	1 s6w   Y  d}tjt|d td W d    n	1 sTw   Y  tjt|d td W d    n	1 spw   Y  tjt|d t
d W d    n	1 sw   Y  tjt|d t
d W d    n	1 sw   Y  tt td W d    n	1 sw   Y  tt td W d    n	1 sw   Y  tt td W d    n	1 sw   Y  tt td W d    n	1 sw   Y  tt t
d W d    n	1 s*w   Y  tt t
d W d    n	1 sDw   Y  tt tdd d W d    d S 1 saw   Y  d S )Nr0   r-   r1   r5   r.   r:   r;   r<   	   r,   )r1   r1   rz   rz   r1   zThe least populated classrB   ignorer\   r   )r1   r1   rz   rz   r-   r   z>k-fold cross-validation requires at least one train/test split      ?       @rV   shuffle)rD   rE   rS   nextr   rL   r   rQ   warnsWarningr"   arangelenwarningscatch_warningssimplefilterr   rR   	TypeError)X1X2r[   skf_3sgkf_3naive_groupserror_stringru   ru   rv   test_kfold_valueerrors   s   (















$r   c                  C   sb   t d} td}t|| d d dd t d}td}t||d d dd dtd|ks/J d S )N   r1   )r[   r\   r      r.   )rD   onesr   r   rH   )r   r_   r   ru   ru   rv   test_kfold_indices%  s   

r   c                  C   s   ddgddgddgddgd	d
gg} t d| d d }t|\}}t|ddg t|ddg t|\}}t|ddg t|ddg t d| }t|\}}t|g d t|ddg t|\}}t|ddg t|g d d S )Nr0   r-   r1   r5   r.   r:   r;   r<   r   r,   rz   r   r   r0   r-   )r   rL   r   r   )r   splitsrr   rs   ru   ru   rv   test_kfold_no_shuffle5  s   "r   c                  C   s  t dg d} }td| |}t|\}}t|ddg t|ddg t|\}}t|ddg t|ddg t dg d} }td| |}t|\}}t|g d	 t|g d
 t|\}}t|g d
 t|g d	 dtd| |ksJ t d} g d}g d}t jt	td| |t	td| | g d}t 
|} t jt	td| |t	td| | d S )Nr5   )r0   r0   r   r   r-   r   r0   r1   r;   )r0   r0   r0   r   r   r   r   )r   r0   r1   r5   )r-   r.   r:   r.   )r7   r7   r7   0r   r   r   )r   r0   r   r0   r   r0   r   r0   )rD   r   r   rL   r   r   rH   rI   rJ   rK   	ones_liker   )rY   r[   r   rr   rs   y1y2ru   ru   rv    test_stratified_kfold_no_shuffleL  s8   
$
$r   r   FTk)r5   r.   r:   r;   r<   r   r,   kfoldc                 C   s  d}t |}t dgtd|  dgtd|   dgtd|   }t t|}t |t| }g }|s;d nd}	|| |	|d}
|
j|||d	D ]+\}}tt || t| |d
d tt || t| |d
d |	t| qLt 
|dksJ d S )N  r5   皙?r   {Gz?r0   {Gz?r>   r   r\   {Gz?atol)rD   r   rE   intr   r   bincountrL   r
   appendptp)r   r   r   rT   rY   r[   r\   distr
test_sizesr>   r`   rr   rs   ru   ru   rv   test_stratified_kfold_ratiosw  s&   
  r   )r5   r:   r;   c           
         s   d}t dgtd|  dgtd|   dgtd|   }t t| t t| fdd	}||}tg d
D ]}t ||}||}	|	|ksTJ qBd S )Nd   r-   r   r   r   r0   r   c                    s0   sd nd}dd |dj  | dD S )Nr   c                 S   s    g | ]\}}t |t |fqS ru   )rK   ).0rr   rs   ru   ru   rv   
<listcomp>  s    zNtest_stratified_kfold_label_invariance.<locals>.get_splits.<locals>.<listcomp>r   r   )rL   )r[   r>   rY   r\   r   r   r   ru   rv   
get_splits  s   z:test_stratified_kfold_label_invariance.<locals>.get_splitsr   )rD   rE   r   r   r   r   r	   take)
r   r   r   rT   r[   r   splits_basepermy_permsplits_permru   r   rv   &test_stratified_kfold_label_invariance  s"   	r   c                  C   sf   t ddD ]+} tdjt| d}dd |D }t|t| dks'J t|| ks0J qd S )N   r   r.   rY   c                 S      g | ]\}}t |qS ru   r   r   _rs   ru   ru   rv   r         z&test_kfold_balance.<locals>.<listcomp>r0   )r   r   rL   rD   r   maxminsum)rA   r_   sizesru   ru   rv   test_kfold_balance  s   r   c           	   	   C   s   t d}dgd dgd  }t t|}dD ]A}| d|d}tddD ]3}||d | |d | |d | }d	d
 |D }t |t | dksOJ t ||ksXJ q%qd S )Nr   r   r1   r0      TFr   r   c                 S   r   ru   r   r   ru   ru   rv   r     r   z0test_stratifiedkfold_balance.<locals>.<listcomp>)	rD   r   r   r   r   rL   r   r   r   )	r   rY   r[   r\   r   rp   rA   r`   r   ru   ru   rv   test_stratifiedkfold_balance  s   
&r   c                  C   s   t d} t dddd}t dddd}td}td}t| |||||D ]*\\}}\}}\}	}
t|||	fdD ]\}}tt||t|ksPJ q>d||< q+t	|dks^J d S )Nr1   Tr   r   r>   r0   ,  r-   )
r   rD   r   zerosrG   rL   r   r   intersect1dr   )r_   kf2kf3rY   	all_foldstr1te1tr2te2tr3te3tr_atr_bru   ru   rv   test_shuffle_kfold  s   


r   c              
   C   s(  t d}dgd dgd  }t t|}t d}dgd dgd  }t t|}| dddd	}t jt||||t|||| | ddt j	dd	}t
||f||f||fD ]2}t
|j| |j| D ]$\\}	}
\}	}tt t j|
| W d    n1 sw   Y  qlq_d S )
N   r   r;   r0   r<      r1   Tr   )rD   r   r   r   rI   rJ   rK   rL   r   r   rG   rQ   rR   AssertionErrorr   )r   rY   r[   groups_1r   r   groups_2r_   datar   test_atest_bru   ru   rv   2test_shuffle_kfold_stratifiedkfold_reproducibility  s&   

 &r  c                  C   s
  t d} dgd dgd  }tdddd}tdddd}t|| ||| |D ]\\}}\}}t|t|ks<J q*t|| |d dd t d	}dgd dgd  }tdddd}tdddd}td
d |||D }	tdd |||D }
|	|
ksJ d S )N(   r      r0   r.   Tr   r   r,   c                 S      g | ]}t |d  qS r0   tupler   sru   ru   rv   r     r   z0test_shuffle_stratifiedkfold.<locals>.<listcomp>c                 S   r	  r
  r  r  ru   ru   rv   r     r   )	rD   r   r   rG   rL   r   r   r   sorted)X_40r[   kf0kf1r   test0test1rY   r   	test_set1	test_set2ru   ru   rv   test_shuffle_stratifiedkfold  s   
*
r  c                  C   s   t jd d t jd d } }tddd}d}t|dd}t|| ||d }d	|ks-J |d
ks3J t|ddd}t|| ||d }|d	ksJJ t|ddd}t|| ||d }|d	ksaJ t|}t|| ||d }d|ksuJ |d
ks{J d S )NiX  r,   g{Gzt?)Cgammar1   Fr   rp   gq=
ףp?皙?Tr   r   r0   gGz?)digitsr  targetr+   r   r   meanr   )rY   r[   modelrV   rp   
mean_scoreru   ru   rv   1test_kfold_can_detect_dependent_samples_on_digits  s"   		r!  c                  C   s   t dd} tdgd dgd  }t|dd}td}t|t| }g }| |||D ]:\}}t	|| || j
dksEJ tt|| t| |d	d
 tt|| t| |d	d
 |t| q2t|dksvJ d S )Nr1   r   r0   r:   r      rz   )r0   r-   r1   r5   r.   r:   r0   r0   r-   r-   r1   r1   r5   r5   r.   r.   r:   r:   r   r   )r"   rD   rE   r   r   rM   r   r   rL   r   ry   r
   r   r   )re   r[   rY   r\   r   r   rr   rs   ru   ru   rv   #test_stratified_group_kfold_trivialI  s   

  r#  c            
      C   s   t dd} tdgd dgd  }t|dd}tg d}td	d
gddgddgg}g }t| ||||D ].\\}}}t|| || j	dksQJ t
|| t| }	t|	|dd |t| q<t|dkstJ d S )Nr1   r   r0   r:   r   r"  rz   )r0   r-   r1   r1   r5   r5   r0   r0   r-   r-   r1   r5   r.   r.   r.   r:   r:   r:   g-?gsh|??gZd;O?gZd;O?r|   MbP?r   )r"   rD   rE   r   r   rM   rG   rL   r   ry   r   r   r
   r   r   )
re   r[   rY   r\   expectedr   rr   rs   expect_dist
split_distru   ru   rv   'test_stratified_group_kfold_approximate[  s   
 r(  zy, groups, expectedr:   r0   )r0   r0   r-   r-   r1   r1   r5   r5   r.   r.   r:   r:   r|   r   r1   )r0   r0   r0   r-   r-   r-   r1   r1   r1   r5   r.   r:         ?      ?c           	      C   s   t dd}t| dd}t||| ||D ]'\\}}}t|| || jdks-J t| | t	| }t
||dd qd S )Nr1   r   rz   r0   r   r$  r   )r"   rD   r   r   rG   rL   r   ry   r   r   r
   )	r[   r\   r%  re   rY   rr   rs   r&  r'  ru   ru   rv   .test_stratified_group_kfold_homogeneous_groupsm  s   
 r+  	cls_distr))g?333333?)g333333?ffffff?)皙?r  )r  r/  r   )r.   rx   F   c                 C   s(  d}t |d}t|d}tjd}d}|jd|| d}t|dd}|||}	|j|||	d	}
|j|||	d	}d}d}t	|
|D ]?\\}}\}}t
|	| |	| jdks[J t|| t| }t|| t| }|tj|| d
7 }|tj|| d
7 }qD|| }|| }||ksJ d S )Nr.   r   r   r   r-   )ry   rW   rz   r0   r   )qk)r"   r   rD   r   r   choicer   r   rL   rG   r   ry   r   r   r   entropy)r,  r   rV   re   gkfr   n_pointsr[   rY   g
sgkf_folds	gkf_folds	sgkf_entrgkf_entr
sgkf_train	sgkf_testr   gkf_test
sgkf_distr	gkf_distrru   ru   rv   /test_stratified_group_kfold_against_group_kfold  s*   

r@  c                  C   s   t dddt} t dddt}t tdddt}t tdddt}t| |||D ]<\}}}}t|d |d  t|d |d  t|d |d  t|d |d  t|d |d  t|d |d  q0d S )Nr/  r   r~   r>   r-   r0   )r   rL   rY   rD   int32r   rG   r   )ss1ss2ss3ss4t1t2t3t4ru   ru   rv   test_shuffle_split  s   rK  split_classztrain_size, exp_train, exp_test))Nr   r0   r<   r<   r-   r  r<   r-   c                 C   sR   t d}t d}t| |d||\}}t||ksJ t||ks'J d S Nr,   
train_size)rD   r   r   rL   r   )rL  rQ  	exp_trainexp_testrY   r[   X_trainX_testru   ru   rv   $test_shuffle_split_default_test_size  s
   

rV  ))Nr<   r-   )r;   r;   r1   )r.  r;   r1   c                 C   s\   t d}t d}td}tt| d|||\}}t||ks$J t||ks,J d S rO  )rD   r   r   r   r   rL   r   )rQ  rR  rS  rY   r[   r\   rT  rU  ru   ru   rv   *test_group_shuffle_split_default_test_size  s   

rW  c                  C   s~  t d} t g d}tt ttddd| | W d    n1 s(w   Y  tt ttddd| | W d    n1 sIw   Y  tt ttdddd| | W d    n1 skw   Y  t d} t g d	}tt ttdd
| | W d    n1 sw   Y  tt ttdd| | W d    d S 1 sw   Y  d S )Nr;   )r   r0   r0   r0   r-   r-   r-   r1   r/  r}   r-   r~   rQ  r   )	r   r   r   r0   r0   r0   r-   r-   r-   rP  )	rD   r   rM   rQ   rR   rS   r   r   rL   rY   r[   ru   ru   rv   "test_stratified_shuffle_split_init  s&   

"rZ  c                  C   sj   t g d} d}d}td||ddt t| | }|D ]\}}t||ks*J t||ks2J qd S )Nr3   r.   r,   r:   r   )r~   rQ  r>   )rD   rE   r   rL   r   r   )r[   r~   rQ  sssrr   rs   ru   ru   rv   0test_stratified_shuffle_split_respects_test_size  s   r\  c            	   
   C   s  t g dt g dt g dd t g dt dgd dgd	  t d
d tdD g dg dg} | D ]}tddddt t||}t |}t 	dt| }t|| }|D ]m\}}t
t || t ||  t t j|| ddd tt||  }t t j|| ddd tt||  }t||d t|t| |jksJ t||ksJ t||ksJ t
t jj||g  qcq;d S )Nr/   r2   r3   r-   r4   rz   i   r0   2   c                 S   s   g | ]	}|gd |  qS )r   ru   )r   rA   ru   ru   rv   r     s    z6test_stratified_shuffle_split_iter.<locals>.<listcomp>r   r6   r:   gQ?r   rA  T)return_inverse)rD   rE   concatenater   r   rL   r   r   
asanyarrayceilr   uniquer   floatr   ry   libarraysetopsr   )	ysr[   r[  r~   rQ  rr   rs   p_trainp_testru   ru   rv   "test_stratified_shuffle_split_iter  s>   


ri  c                     s  d} d  fdd}dD ]}t |d ddg }t d	|  dd
}dg| }dg| }d}|jt ||dD ]"\}}	|d7 }||f||	ffD ]\}
}|D ]
}|
|  d7  < qNqHq8| ksaJ t|d	|  d	d	|   d\}}t||ksxJ t|	|ksJ tt||	dksJ t 	|}|j
d	|  ksJ || t|ksJ t|dksJ t|| }t|| }||| ||| qd S )Nr.   r   c                    s<   d  }t  |}| D ]}||}||ksJ dqd S )N皙?z=An index is not drawn with chance corresponding to even draws)r   binompmf)
idx_countsrW   	thresholdbfcountprobr   ru   rv   assert_counts_are_ok"  s   

z@test_stratified_shuffle_split_even.<locals>.assert_counts_are_ok)r:      r-   r   r0         ?rV   r~   r>   rY  rX  )rD   rE   r   rL   r   r$   r   r   r   rb  r~   rc  )n_foldsrr  rT   r\   r   train_countstest_countsn_splits_actualrr   rs   counteridsidn_trainn_testgroup_counts	ex_test_p
ex_train_pru   r   rv   "test_stratified_shuffle_split_even  sD   





r  c                  C   sx   g dd ddgd  } t | }tdddd}t|j|| d	\}}tt ||g  tt ||t t	|  d S )
Nr   r0   r-   r1   r1   r5   r.   r0   r|   r   ru  rY  )
rD   r   r   r   rL   r   r   union1dr   r   )r[   rY   r[  rr   rs   ru   ru   rv   4test_stratified_shuffle_split_overlap_train_test_bugP  s   
 r  c                  C   s  t ddgddgddgddggt ddgddgddgddggfD ]a} t | }tdddd}t|j|| d\}}| | }| | }tt ||g  tt ||t 	t
|  t | d d df }|t |d d df kstJ |t |d d df ksJ q"d S )Nr   r0   r|   ru  rY  )rD   rE   r   r   r   rL   r   r   r  r   r   r  )r[   rY   r[  rr   rs   y_trainy_testexpected_ratioru   ru   rv   (test_stratified_shuffle_split_multilabela  s     
 r  c            
      C   s   g ddgd  g d } g ddgd  g d }t | gd |gd  }t |}tdddd}t|j||d	\}}|| }|| }t |d d d
f }	|	t |d d d
f ks`J |	t |d d d
f ksoJ d S )N)r0   r   r0   r   r   r0   r,   r   r|   ru  rY  r5   )rD   rE   r   r   r   rL   r  )
row_with_many_zerosrow_with_many_onesr[   rY   r[  rr   rs   r  r  r  ru   ru   rv   4test_stratified_shuffle_split_multilabel_many_labelsz  s   
"r  c            	      C   s   t dd} g }g }ttdddtD ]\}\}}|| || || |< qt| }tt 	| |
 ks;J t|  \}}t|| t|| d S )Nr,   g      r.   Tr   )rD   fullrF   r   rL   rY   r   r   r   rb  rH   rG   r   )	foldskf_trainkf_testrA   	train_indtest_indrd   ps_trainps_testru   ru   rv   %test_predefinedsplit_with_kfold_split  s   "



r  c                  C   sP  t D ]} tt|  }}d}d}t||dd}t| |j||| d|ks(J t| }t| }|j	||| dD ]j\}}	t|| }
t||	 }t
t|| |rYJ t
t||	 |
rfJ || j||	 j |jksuJ tt||	g  tt|t|t|  dksJ tt|
td| t|  dksJ q:qd S )Nr:   gUUUUUU?r   rA  r   r0   rt  )test_groupsrD   r   r   r   rP   rH   rb  rM   rL   anyin1dry   r   r   absround)groups_irY   r[   rV   r~   slol_uniquelrr   rs   l_train_uniquel_test_uniqueru   ru   rv   test_group_shuffle_split  s(   

$*r  c               	   C   s  t  } tdd}tdd}t| dksJ t|dksJ t|dks%J ttdddks0J t| df|df|dffD ]v\}\}}ttD ]k\}}tt|}|dkrX|n||d  d }	tt| }
}|j	|
||d	|	ksuJ t
|}|j|
||d	D ]/\}}tt|| ||  g  t|t| t|ksJ t|| jd
 sJ |qqGq=| 	d d g ddksJ | j	g dd	dksJ |	d d tddksJ |j	tdd	dksJ tt | 	d d dtjdg W d    n	1 sw   Y  tt |	d d dtjdg W d    n	1 s%w   Y  d}tjt|d | 	d d d  W d    n	1 sFw   Y  tjt|d |	d d d  W d    d S 1 sfw   Y  d S )Nr0   r   r-   r?   zLeavePGroupsOut(n_groups=1)r@   r1   zLeavePGroupsOut(n_groups=3)r   r   )abcr  r  )rt  皙?rt  333333?r5   r:           z*The 'groups' parameter should not be None.rB   )r   r   rP   rF   r  r   rD   rb  r   rH   rM   rL   r   r   tolistshaper   rQ   rR   rS   naninf)logolpgo_1lpgo_2jrp   p_groups_outrA   r  r   rV   rY   r[   
groups_arrrr   rs   rt   ru   ru   rv   test_leave_one_p_group_out  sN   

&
$r  c                  C   s  t g d} t t| }t j| dd}t j|| d}t j|| d}tddj|| d}tddj|| d}d|d d < ||f||ffD ]\}}t||D ]\\}	}
\}}t|	| t|
| qPqGdtddj	||| d	kssJ dt j	||| d	ksJ d S )
N)r   r0   r-   r0   r0   r-   r   r   T)copyr   r-   r   r   r1   )r[   r\   )
rD   rE   r   r   r   rL   r   rG   r   rH   )r\   rY   groups_changingra   lolo_changinglplolplo_changingllollo_changingrr   rs   
train_chan	test_chanru   ru   rv   $test_leave_group_out_changing_groups  s   
r  c                  C   s   t g d} t t| }tt j|| d}g dddgfg dddgfg d	d
dgfg}|D ]\}}t|\}}t|| t|| q1d S )N)r-   r-   r   r   r0   r0   r   )r   r0   r5   r.   r-   r1   r  r5   r.   r-   r1   r5   r.   r   r0   )	rD   rE   r   r   iterr   rL   r   r   )r\   rY   r   expected_indicesexpected_trainexpected_testrr   rs   ru   ru   rv   %test_leave_group_out_order_dependence  s   
r  c                  C   s  t d }  }}td}tjt|d tt 	| || W d    n1 s*w   Y  t d }  }}td| d}tjt|d tt 	| || W d    n1 s]w   Y  t d }  }}td| d}tjt|d tt
d	d
	| || W d    n1 sw   Y  t d	 }  }}td| d}tjt|d tt
d	d
	| || W d    d S 1 sw   Y  d S )Nr   zFound array with 0 sample(s)rB   r0   z:The groups parameter contains fewer than 2 unique groups (z'). LeaveOneGroupOut expects at least 2.z^The groups parameter contains fewer than (or equal to) n_groups (3) numbers of unique groups (zR). LeavePGroupsOut expects that at least n_groups + 1 (4) unique groups be presentr1   r   )rD   r   reescaperQ   rR   rS   r   r   rL   r   r   )rY   r[   r\   rt   ru   ru   rv   :test_leave_one_p_group_out_error_on_fewer_number_of_groups(  s<   

"r  c               	   C   sz   t tfD ]6} tt | dd W d    n1 sw   Y  tt | dd W d    n1 s5w   Y  qd S )Nr   )	n_repeatsr   )r    r!   rQ   rR   rS   r  ru   ru   rv   test_repeated_cv_value_errorsK  s   r  
RepeatedCVc                 C   s6   d\}}| ||d}d |jj}|t|ksJ d S )N)r-   r:   rV   r  z.{}(n_repeats=6, n_splits=2, random_state=None))r   	__class____name__rP   )r  rV   r  repeated_cvrepeated_cv_reprru   ru   rv   test_repeated_cv_reprU  s   r  c               	   C   s"  ddgddgddgddgd	d
gg} d}t dd|d}tdD ]p}|| }t|\}}t|ddg t|g d t|\}}t|g d t|ddg t|\}}t|ddg t|g d t|\}}t|g d t|ddg tt t| W d    n1 sw   Y  qd S )Nr0   r-   r1   r5   r.   r:   r;   r<   r   r,   i{icrV   r  r>   )r   r0   r1   r   )r-   r1   r5   )r    r   rL   r   r   rQ   rR   StopIteration)rY   r>   rkfr   r   rr   rs   ru   ru   rv   &test_repeated_kfold_determinstic_split_  s,   "

r  c                  C   0   d} d}t | |d}| | }|| ksJ d S Nr1   r5   r  )r    rH   )rV   r  r  r   ru   ru   rv   $test_get_n_splits_for_repeated_kfold|  
   r  c                  C   r  r  )r!   rH   )rV   r  rskfr   ru   ru   rv   /test_get_n_splits_for_repeated_stratified_kfold  r  r  c               	   C   s,  ddgddgddgddgd	d
gg} g d}d}t dd|d}tdD ]q}|| |}t|\}}t|ddg t|g d t|\}}t|g d t|ddg t|\}}t|ddg t|g d t|\}}t|g d t|ddg tt t| W d    n1 sw   Y  q"d S )Nr0   r-   r1   r5   r.   r:   r;   r<   r   r,   )r0   r0   r0   r   r   iqsr  )r   r-   r1   )r   r0   r5   )r!   r   rL   r   r   rQ   rR   r  )rY   r[   r>   r  r   r   rr   rs   ru   ru   rv   1test_repeated_stratified_kfold_determinstic_split  s.   "
r  c                   C   s  t tt t jtttddd t jtttdddd t jtttdtdtdd t jtttddd t jtttddd	d t jtttddd
 t tttdtd t jtttdddd t jtdd ttdddd W d    d S 1 sw   Y  d S )Nr1   r  rP  r-  rX  
wrong_typer}   r-   r5   )some_argument*   r,   FT)r   stratifyzrtrain_size=11 should be either positive and smaller than the number of samples 10 or a float in the \(0, 1\) rangerB   r   r0   rQ  r~   )rQ   rR   rS   r   r   rD   float32r   ru   ru   ru   rv   test_train_test_split_errors  s*   "r  ztrain_size,test_size))r  r  )rt  r  )r  r  )皙ɿr  )r  r  )r  rt  )r  r  )r  r  c                 C   F   t jtdd ttd| |d W d    d S 1 sw   Y  d S )Nz"should be .* in the \(0, 1\) rangerB   r,   r  rQ   rR   rS   r   r   r  ru   ru   rv   $test_train_test_split_invalid_sizes1  s   "r  ))r  )r   r  )r   r  )r  r  )r  r   )r  r   c                 C   r  )Nz%should be either positive and smallerrB   r,   r  r  r  ru   ru   rv   $test_train_test_split_invalid_sizes2  s   "r  ))Nr;   r1   rM  rN  c                 C   s4   t t| d\}}t||ksJ t||ksJ d S )NrP  )r   rY   r   )rQ  rR  rS  rT  rU  ru   ru   rv   'test_train_test_split_default_test_size  s   r  c                  C   s  t dd} t| }t d}t| |d dd}|\}}}}t|t|ks)J t|d d df |d  t|d d df |d  t| || }|\}}}}	}}t|t	sZJ t|t	saJ t dddd	d
}
t dddd}t|
|}|d j
dksJ |d j
dksJ |d
 j
dksJ |d	 j
dksJ t g d}tg dg dD ]4\}}t|||dd\}}t||ksJ t|t| t|ksJ t |dkt |d
kksJ qt d}dD ]}t|d|d\}}t|ddg t|g d qd S )Nr   r,   r,   r,   r|   rX  r   r   r.   r1   r-   i  r;   r   )r;   r.   r1   r-   r0   )r1   r.   r1   r-   )r;   r;   r   )r1   r;   r   )r0   r0   r0   r0   r-   r-   r-   r-   )r-   r5   r*  r|   r)  )r-   r5   r-   r5   r:   )r~   r  r>   )r-   r/  F)r   r~   r<   r   )r   r0   r-   r1   r5   r.   r:   r;   )rD   r   r   r   r   r   r   r  
isinstancerK   r  rE   rG   r   )rY   X_sr[   rL   rT  rU  r  r  	X_s_trainX_s_testX_4dy_3dr~   exp_test_sizerr   rs   ru   ru   rv   test_train_test_split  sB   


"
r  c                  C   sb   d} t | }|d|  k}t|||dd}|\}}}}|j|j | ks%J |j|j | ks/J dS )zCheck for integer overflow on 32-bit platforms.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/20774
    i Gz?r*  )r  rQ  N)rD   r   r   ry   )
big_numberrY   r[   rL   rT  rU  r  r  ru   ru   rv   $test_train_test_split_32bit_overflow  s   	
r  c                  C   sr   t g} zddlm} | | W n	 ty   Y nw | D ]}|t}t|\}}t||s/J t||s6J qd S )Nr   )	DataFrame)r   pandasr  r   ImportErrorrY   r   r  )typesr  InputFeatureTypeX_dfrT  rU  ru   ru   rv   test_train_test_split_pandas3  s   r  c                  C   sX   t dd} tttg}|D ]}|| }t|\}}t|ts"J t|ts)J qd S )Nr   r  )rD   r   r   r   r   r   r   r  )rY   sparse_typesr  r  rT  rU  ru   ru   rv   test_train_test_split_sparseE  s   
r  c                  C   s@   t t} t| \}}t|t sJ t|t sJ t| \}}d S r   )r   rY   r   r  )r  rT  rU  X_train_arr
X_test_arrru   ru   rv   !test_train_test_split_mock_pandasQ  s
   r  c                  C   s   t d} dgd dgd  }t t dt df}| }dD ]N}t| ||r+|nd dd\}}}}t| ||r;|nd dd\}	}
}}t| ||rK|nd dd\}}}}t j||	 t j|| t j|| t j|| q"d S )	Nr;   r7   r5   r   r1   r   r   )r  r>   )rD   r   hstackr   r  r   rI   rJ   )rY   r   r   y3r  X_train1X_test1y_train1y_test1X_train2X_test2y_train2y_test2X_train3X_test3y_train3y_test3ru   ru   rv    test_train_test_split_list_inputZ  s&   
r  ztest_size, train_size))r   N)rt  N)r   gffffff?)Ny              ?)r   N)r,   N)r<   r1   c                 C   sF   t t tt| |dt W d    d S 1 sw   Y  d S )NrX  )rQ   rR   rS   r   r   rL   rY   rX  ru   ru   rv   test_shufflesplit_errorsr  s   "r  c                  C   s@   t dd} ttdd | tD tdd | tD  d S )N   r=   c                 s   s    | ]\}}|V  qd S r   ru   )r   r  r  ru   ru   rv   	<genexpr>  s    z1test_shufflesplit_reproducible.<locals>.<genexpr>)r   r   rK   rL   rY   )rc   ru   ru   rv   test_shufflesplit_reproducible{  s   
6r  c                  C   s   t ddd} td}dgd dgd  }ttdtdf}| }tjt| 	||t| 	|| tjt| 	||t| 	|| d S )	Nr-   r  rA  r;   r7   r5   r   r1   )
r   rD   r   r   r   r  rI   rJ   rK   rL   )r[  rY   r   r   r  ru   ru   rv   &test_stratifiedshufflesplit_list_input  s   
&*r  c                  C   sX   t jdt jddd} t j| dd d f< t ddg| jd d }t| |dd	d
 d S )N   rN   r,   rz   r-   r   r0   r/  r  rA  )rD   r   float64r   r  repeatr  r   rY  ru   ru   rv    test_train_test_split_allow_nans  s   r  c                  C   sD  t d} tddd}t jttd| t||  t g d}td|dd}t jtt	d| |t|| | t g d}td|dd}t jtt	d| |t|| | |
dd	}td|dd}t jtt	d| |t|| | t tt	d| |d
 ttd| |d
 krJ t d} t g dg dg dg dg dg}td|dd}t jttd| t||  t d	dgd
dgd
d
gdd	gdd
gg}td|dd}t jttd| t||  tt tdd W d    d S 1 sw   Y  d S )Nr   r1   F)
classifier)	r   r0   r   r0   r   r   r0   r0   r0   T)	r   r0   r   r0   r-   r0   r-   r   r-   rz   r0   r   r.   )r   r   r   r   )r   r0   r0   r   )r   r   r   r0   )r0   r0   r   r0   )r   r   r0   r   r-   ra   r  )rD   r   r   rI   rJ   rK   r   rL   rE   r   r   allr   rQ   rR   rS   )rY   rp   y_binaryy_multiclassy_multiclass_2dr   y_multioutputru   ru   rv   test_check_cv  sH   
&  
 &(&$r  c                  C   s   t  tt} t| }tjt|ttt|tt t dddtt}t|}tjt|ttt|tt zd}tjt|ttt|tt W n t	ya   d}Y nw |rhJ dd S )NTr   r   FzVIf the splits are randomized, successive calls to split should yield different results)
r   rL   rY   r[   r   rD   rI   rJ   rK   r   )kf_iterkf_iter_wrappedkf_randomized_iterkf_randomized_iter_wrappedsplits_are_equalru   ru   rv   test_cv_iterable_wrapper  s.   r%  c                 C   sl  t jd}d}d}d}t | }}d| }|d||}|| }	tt | t |}
| |d}t|	|||D ]
\}\}}||
|< q<t|
t|ksQJ t |
D ]}|t
t|
|k|	 ksfJ qVt |D ]}tt |
||k dks}J qlt j|td}|	|||D ]\}}tt || || dksJ qt g d	}tt |}t|}d}d| }|| }	t | }}t |}
t|	|||D ]
\}\}}||
|< qt|
t|ksJ t |
D ]}|t
t|
|k|	 ksJ qt ' td
t t |D ]}tt |
||k dksJ qW d    n	1 s,w   Y  t j|td}|	|||D ]\}}tt || || dksTJ q?t|	||| }t|	||||D ]\\}}\}}t|| t|| qkt g d}t t| }}tjtdd ttdd	||| W d    d S 1 sw   Y  d S )Nr   r   r   r.   rj  r   r0   r  )&AlbertJeanBertrandMichelr'  FrancisRobertr)  RachelLoisMichelleBernardMarionLaurar'  r,  FranckJohnGaelAnnaAlixr+  r0  DavidTonyAbelBeckyMadmoodCaryMary	Alexandrer7  r*  BarackAbdoulRashaXiSilviar   )r0   r0   r0   r-   r-   z%Cannot have number of splits.*greaterrB   r1   )rD   r   r   r   r   r   rb  r   rF   rL   r  r   rM   objectr   rE   r   r   r   FutureWarningrK   r  rG   r   rQ   rR   rS   r   r   )r   r   r   rT   rV   rY   r[   	tolerancer\   ideal_n_groups_per_foldr  lkfrA   r   rs   grouprr   cv_itertrain1r  train2test2ru   ru   rv   test_group_kfold  sl   


 "+


$&$
$rN  c                  C   sr  ddgddgddgddgd	d
gddgddgg} t jtdd ttdd|  W d    n1 s3w   Y  td}|| d d }t|\}}t|ddg t|ddg t|\}}t|g d t|ddg td| }t|\}}t|g d t|ddg t|\}}t|g d t|ddg td| }tt|}||	 ksJ |dksJ d S )Nr0   r-   r1   r5   r.   r:   r;   r<   r   r,   r   r"     r   z$Cannot have number of folds.*greaterrB   r   rz   r   r  r   r   r0   r-   r1   r5   )
rQ   rR   rS   r   r   rL   r   r   rK   rH   )rY   tscvr   rr   rs   ry  ru   ru   rv   test_time_series_cva  s.   .rR  c                 C   s^   t | |D ]'\\}}\}}t|| t||ksJ tt|| d}t|||d   qd S )Nr   )rG   r   r   r   )r   check_splitsmax_train_sizerr   rs   check_train
check_testsuffix_startru   ru   rv   !_check_time_series_max_train_size  s   
rX  c                  C   s~   t d} tdd| }tddd| }t||dd tddd| }t||dd tddd| }t||dd d S )N)r:   r0   r1   r   )rV   rT  )rT  r-   r.   )rD   r   r   rL   rX  )rY   r   rS  ru   ru   rv   test_time_series_max_train_size  s   
rY  c                  C   sD  t d} tddd| }t|\}}t|dg t|g d t|\}}t|g d t|g d t|\}}t|g d t|g d	 td
d
dd| }t|\}}t|g d t|ddg t|\}}t|g d t|ddg tjtdd tdd
d| }t| W d    d S 1 sw   Y  d S )Nr,   r0   r1   )rV   r~   r   )r0   r-   r1   r  r5   r.   r:   )r   r0   r-   r1   r5   r.   r:   r;   r<   r   r-   r5   )rV   r~   rT  r  r:   r;   )r5   r.   r:   r;   r<   r   zToo many splits.*with test_sizerB   r.   	rD   r   r   rL   r   r   rQ   rR   rS   rY   r   rr   rs   ru   ru   rv   test_time_series_test_size  s,   

"r_  c                  C   s  t d} tddd| }t|\}}t|ddg t|g d t|\}}t|g d t|g d td	ddd
| }t|\}}t|ddg t|ddg t|\}}t|dd	g t|ddg t|\}}t|ddg t|ddg tddddd| }t|\}}t|g d t|ddg t|\}}t|g d t|ddg tddd	d| }t|\}}t|ddg t|g d t|\}}t|g d t|g d tjtdd tddd| }t| W d    d S 1 sw   Y  d S )NrZ  r-   )rV   gapr   r0   r[  rP  r\  r1   )rV   r`  rT  r5   r.   r:   r;   r<   r   )rV   r`  rT  r~   r  r  )rV   r`  r~   zToo many splits.*and gaprB   r]  r^  ru   ru   rv   test_time_series_gap  sH   

$ra  c               	   C   s   t jd} tdddd\}}| ddd}t tddtddg}t|dD ]\}}t	t
 dd	d
gi|dd}t|||||d|id q'd S )Nr   r   r-   )rT   	n_classesr>   r.   r   r1   strategy
stratifiedmost_frequentraise)
param_gridrp   error_scorer\   )rY   r[   r\   rp   
fit_params)rD   r   r   r*   r   r   r   r   r   r   r#   r   )r   rY   r[   r\   cvsinner_cvouter_cvgsru   ru   rv   test_nested_cv  s$   
rn  c                  C   s(   G dd d} t | dddksJ d S )Nc                   @   s   e Zd ZdddZdd ZdS )z%test_build_repr.<locals>.MockSplitterr   Nc                 S   s   || _ || _|| _d S r   )r  r  r  )selfr  r  r  ru   ru   rv   __init__  s   
z.test_build_repr.<locals>.MockSplitter.__init__c                 S   s   t | S r   r%   )ro  ru   ru   rv   __repr__  s   z.test_build_repr.<locals>.MockSplitter.__repr__)r   N)r  
__module____qualname__rp  rq  ru   ru   ru   rv   MockSplitter  s    
rt  r.   r:   zMockSplitter(a=5, b=6, c=None))rP   )rt  ru   ru   rv   test_build_repr  s   	ru  
CVSplitterc                 C   sd   | dd}dggdg}}t jtdd t|j||dgd W d    d S 1 s+w   Y  d S )Nr  r}   r0   r   [With n_samples=1, test_size=0.99 and train_size=None, the resulting train set will be emptyrB   r   )rQ   rR   rS   r   rL   )rv  rp   rY   r[   ru   ru   rv   !test_shuffle_split_empty_trainset  s   
"rx  c                  C   s   dgg\} t jtdd t| dd W d    n1 sw   Y  dgdgdgg} t jtdd t| dd W d    d S 1 sCw   Y  d S )Nr0   rw  rB   r  r}   z[With n_samples=3, test_size=0.67 and train_size=None, the resulting train set will be emptygq=
ףp?)rQ   rR   rS   r   r   ru   ru   rv   $test_train_test_split_empty_trainset/  s   
	"ry  c                  C   sZ   t  } dggdg}}tjtdd t| || W d    d S 1 s&w   Y  d S )Nr0   r   z+Cannot perform LeaveOneOut with n_samples=1rB   )r   rQ   rR   rS   r   rL   rp   rY   r[   ru   ru   rv   !test_leave_one_out_empty_trainsetE  s
   "r{  c                  C   sl   t dd} dgdggddg}}tjtdd t| j||ddgd W d    d S 1 s/w   Y  d S )	Nr-   r{   r0   r   r1   z6p=2 must be strictly less than the number of samples=2rB   r   )r   rQ   rR   rS   r   rL   rz  ru   ru   rv   test_leave_p_out_empty_trainsetM  s   
"r|  Klassc                 C   sB   t jtdd | dddd W d    d S 1 sw   Y  d S )Nz$has no effect since shuffle is FalserB   r1   Fr   r   )rQ   rR   rS   )r}  ru   ru   rv   test_random_state_shuffle_falseW  s   "r~  zcv, expected{   r   r=   r   r{   c                 C   s   t | |ksJ d S r   r'   )rp   r%  ru   ru   rv   test_yields_constant_splits^  s   %r  r   )__doc__r   rQ   r  numpyrD   scipy.sparser   r   r   scipyr   scipy.specialr   	itertoolsr   r   r	   sklearn.utils._testingr
   r   r   r   sklearn.utils.validationr   sklearn.utils._mockingr   sklearn.model_selectionr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   sklearn.dummyr#   sklearn.model_selection._splitr$   r&   r(   sklearn.datasetsr)   r*   sklearn.svmr+   r   rY   r   r[   eyeP_sparserE   r  r  rw   r   r   r   r   r   r   r   markparametrizer   r   r   r   r   r  r  r!  r#  r(  rM   r+  r@  rK  rV  rW  rZ  r\  ri  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r%  rN  rR  rX  rY  r_  ra  rn  ru  rx  ry  r{  r|  r~  r   r   r  ru   ru   ru   rv   <module>   s   

U
'I+


,



%4%7#
	
	


	/
	
.!
{%#9



	
$