o
    tBh;                     @   s>  d dl Z d dlZd dlZd dlmZ d dlmZ d dlZd dl	m
Z
 d dl	mZ d dl	mZ d dl	mZ d dlmZ d d	lmZ d d
lmZ d dlmZmZ d dlmZ d dlmZmZmZ d dlmZ d dlmZ d dlm Z  d dl!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z( dd Z)dd Z*dd Z+e j,-dg ddd Z.e j,-dg dd d! Z/e j,-dg d"d#d$ Z0e j,-dg d"d%d& Z1e j,-dg dd'd( Z2d)d* Z3d+d, Z4d-d. Z5d/d0 Z6e j,-dd1d2ge j,-d3de7e8gd4d5 Z9e j,-dd1d2ge j,-d6d7d8gd9d: Z:e j,-dd;d<ge j,-d3e8e;d=e;d>gd?d@ Z<dAdB Z=e j,-dCdej>dDdEd gdFdG Z?e j,-d3e7dHgdIdJ Z@e j,-dKdLdMej>fgdNdO ZAdPdQ ZBe j,-dRejCejDgdSdT ZEe j,-dCdej>dDdEd gdUdV ZFe j,-d3e7dHgdWdX ZGe j,-dYdZgd[ggdZgej>gggd\d] ZHd^d_ ZId`da ZJdbdc ZKddde ZLdfdg ZMe j,-dhg didjdk ZNe j,-dlde e e e gdmdn ZOdodp ZPdqdr ZQdsdt ZRe j,-dg d"dudv ZSdwdx ZTdydz ZUd{d| ZVe j,-d}d~dgdd ZWdd ZXe j,-dddeYdfdZdeYdfgdd ZZdd Z[dd Z\e j,j-dd de]d gd~ dgd~ gfdde]ej^ gd~ ej^gd~ gfej^ ej^e]ej^ gd~ ej^gd~ gfg dg de]g dg dgfdej^ dgddej^ge]dej^ dgddej^ggfgg dddd Z_e j,-ddej^ej^ dfddgg ddfgdd Z`e j,j-dddgej^ ej^gfddgdgd dgd gfgddgddd Zae j,-dddgdd Zbe j,-dddZejcjddZdge j,-dddZejcjddZdgdd Zee j,-de]ddZgdZd[gge]ddZgdZdggddddfe]ddZgdZd[gge]ddZgdZd[ggddddfe]ddZgdZd[gge]ddZgdZd[ggddddfej]ddgddgge8dej]ddgddgge8di dfgdd Zfe j,-dej>ejgej]fd ejhej]fdejhej]fej>ejgejifdejhejifej>ejgejCfdejhejCfej>ejgejjfdejhejjfej>ejgejkfdejhejkfej>ejgejlfdejhejlfge j,-ddd~e]g dŢfdd~e]g dŢfgddǄ Zme j,-dejiejCejjejkejlgddʄ Zne j,-dg d̢e j,-dej>ej]fd ej]fej>ejifej>ejCfej>ejjfej>ejkfgddτ Zoddф Zpe j,-dej]ddgddgge7ddej]g dӢg dԢge7dfe]ej>dMgdMej>ggej>e]g dբg d֢gfej]ej>dgdej>gge7dej>ej]g dӢg dԢge7dfej]ddgddgge7ddej]g dӢg dԢge7dfgdd؄ Zqe j,-deege j,-ddej>dfdgdd߄ Zrdd Zsdd Zte j,-deegdd Zue j,-dejiejCejjejkejlgdd Zve j,-dddgdd Zwe j,-ddg dfdg dfgdd Zxe j,-ddej>gdd Zye j,-ddej>gdd Zze j,-ddg de7dd[fdg de7ddZfdddge7dd[fdg d e7dd[fdg de{dd[fdZg de{ddZfdg de{dd[fdZg de{dd[fgdd Z|dd	 Z}d
d Z~dS (      Nsparse)kstest)assert_allclose)assert_allclose_dense_sparse)assert_array_equal)assert_array_almost_equal)enable_iterative_imputer)load_diabetes)MissingIndicator)SimpleImputerIterativeImputer)DummyRegressor)BayesianRidgeARDRegressionRidgeCV)Pipeline)
make_union)GridSearchCV)tree)_sparse_random_matrix)ConvergenceWarning)_most_frequentc                 C      t | | | j|jksJ d S N)r   dtypexy r   w/var/www/html/riverr-enterprise-integrations-main/venv/lib/python3.10/site-packages/sklearn/impute/tests/test_impute.py"_assert_array_equal_and_same_dtype       
r!   c                 C   r   r   )r   r   r   r   r   r    _assert_allclose_and_same_dtype%   r"   r#   c           	      C   s   d||f }t }| jjdks|jjdkrt}t||d}|| |  }||j||	dd ||||	dd t||d}|t
|  |t
|  }t
|r\| }||j||	dd ||||	dd dS )zUtility function for testing imputation for a given strategy.

    Test with dense and sparse arrays

    Check that:
        - the statistics (mean, median, mode) are correct
        - the missing values are imputed correctlyz<Parameters: strategy = %s, missing_values = %s, sparse = {0}fmissing_valuesstrategyF)err_msgTN)r   r   kindr   r   fit	transformcopystatistics_formatr   
csc_matrixissparsetoarray)	XX_truer'   
statisticsr&   r(   	assert_aeimputerX_transr   r   r    _check_statistics*   s$   	
r8   r'   )meanmedianmost_frequentconstantc                 C   s   t jdd}t j|d d d< t| d}|t|}|jdks#J ||}|jdks/J t	| d}||}|jdks@J d S )N
      r'   )r=   r>   )initial_strategy)
nprandomrandnnanr   fit_transformr   
csr_matrixshaper   )r'   r2   r6   	X_imputediterative_imputerr   r   r    test_imputation_shapeO   s   



rJ   )conste   Nc                 C   s`   t d}t j|d< tjtt| d t| d}|| W d    d S 1 s)w   Y  d S )N      r   r   matchr?   )	rA   onesrD   pytestraises
ValueErrorstrr   rE   r'   r2   r6   r   r   r    &test_imputation_error_invalid_strategy`   s   


"rY   r9   r:   r;   c                 C   s   t d}t j|d d df< t| dd}tjtdd || W d    n1 s+w   Y  tjtdd |	| W d    d S 1 sHw   Y  d S )NrM   r      r'   verboseThe 'verbose' parameterrQ   Skipping)
rA   rS   rD   r   rT   warnsFutureWarningr*   UserWarningr+   rX   r   r   r     test_imputation_deletion_warningj   s   
"rc   c                 C   s   t d}tj}tjg dtd}|j||d|gd|ddgg|d}t| dd	}t jt	d
d |
| W d    n1 s?w   Y  t|j| t jtdd || W d    d S 1 sbw   Y  d S )Npandasabcdr   r[      r>   r=   columnsr\   r^   rQ   z6Skipping features without any observed values: \['b'\])rT   importorskiprA   rD   arrayobject	DataFramer   r`   ra   r*   r   feature_names_in_rb   r+   )r'   pdr&   feature_namesr2   r6   r   r   r    .test_imputation_deletion_warning_feature_namesx   s&   


"ru   c                 C   s   t d}d|d< t|}t| dd}tjtdd || W d    n1 s+w   Y  ||	  tjtdd |
| W d    d S 1 sOw   Y  d S )NrM   r   )r'   r&   zProvide a dense arrayrQ   )rA   rS   r   r/   r   rT   rU   rV   r*   r1   r+   rX   r   r   r    test_imputation_error_sparse_0   s   

"rv   c                 O   >   t | dr| jnt| }|dkrtjS tj| g|R i |S Nsizer   )hasattrry   lenrA   rD   r:   arrargskwargslengthr   r   r    safe_median      &r   c                 O   rw   rx   )rz   ry   r{   rA   rD   r9   r|   r   r   r    	safe_mean   r   r   c               
   C   sp  t jd} d}d}|| || f}t |d }t d|d d }|dd d  |dd d< dt jdd fd	t jd
d fg}|D ]\}}}	t |}
t |}t |d }t|d D ]}|| d dk|| d  || d  }t|d ||  || ||   d}|d | | }|d | }t 	||}|| 
t|d |  }|	|||||< t |||f|
d d |f< d|krt |t 	|| || f|d d |f< nt ||t 	|| |f|d d |f< t j||
d d |f  t j||d d |f  q^|d	krt |jdd }n
t |jdd }|d d |f }t|
|||| qBd S )Nr   r=   r[   rk   r>   r9   c                 S      t t| |fS r   )r   rA   hstackzvpr   r   r    <lambda>       z-test_imputation_mean_median.<locals>.<lambda>r:   c                 S   r   r   )r   rA   r   r   r   r   r    r      r   )axis)rA   rB   RandomStatezerosarangerD   emptyrangemaxrepeatpermutationr{   r   shuffleisnananyallr8   )rngdimdecrG   r   valuestestsr'   test_missing_valuestrue_value_funr2   r3   true_statisticsjnb_zerosnb_missing_values	nb_valuesr   r   r   cols_to_keepr   r   r    test_imputation_mean_median   sL   

(& 
r   c                  C   s   t dt jt jgdt jt jgddt jgddt jgddt jgddt jgddt jgddt jgg } t g dg d	g dg d
g dg dg dg dg }g d}t| |d|t j d S )Nr   rO   rk   r>   r   r   r   )rO   rO   rO   )r   r         )r   rO         @)rk   rO         @)r   r         )r   r>         ?)r   rO   r   r   r   r   r   r   r:   )rA   ro   rD   	transposer8   )r2   X_imputed_medianstatistics_medianr   r   r    $test_imputation_median_special_cases   s8   





r   r9   r:   r   c                 C   sl   t jg dg dg dg|d}d}tjt|d t| d}|| W d    d S 1 s/w   Y  d S )Nrf   rg   rN   rk   e   gh	   rj   6non-numeric data:
could not convert string to float: 'rQ   r?   )rA   ro   rT   rU   rV   r   rE   )r'   r   r2   msgr6   r   r   r    .test_imputation_mean_median_error_invalid_type  s    
"r   typelist	dataframec                 C   s~   g dg dg dg}|dkrt d}||}d}t jt|d t| d}|| W d    d S 1 s8w   Y  d S )	Nr   r   r   r   rd   r   rQ   r?   )rT   rn   rq   rU   rV   r   rE   )r'   r   r2   rs   r   r6   r   r   r    :test_imputation_mean_median_error_invalid_type_list_pandas   s   


"r   r<   r;   USc                 C   s   t jt jt jddgt jdt jdgt jddt jgt jdddgg|d}d}tjt|d	 t| d
}||| W d    d S 1 sDw   Y  d S )Nrf   r$   rh   ri   rg   r   rj   z#SimpleImputer does not support datarQ   r?   )	rA   ro   rD   rT   rU   rV   r   r*   r+   )r'   r   r2   r(   r6   r   r   r    /test_imputation_const_mostf_error_invalid_types-  s   

"r   c                  C   sb   t g dg dg dg dg} t g dg dg dg dg}t| |d	t jd
ddgd d S )N)r   r   r   rO   )r   r>   r   rN   )r   r[   rN   r   )r   r>   rN      )r>   r   rO   )r>   rN   rN   )r[   rN   rN   )r>   rN   r   r;   r>   rN   r   )rA   ro   r8   rD   )r2   r3   r   r   r    test_imputation_most_frequentB  s   	r   markerNAN c                 C   s   t j| | ddg| d| dg| dd| g| dddggtd}t jg dg d	g d
g dgtd}t| dd}|||}t|| d S )Nrf   r$   rh   ri   rg   r   rj   )rh   rf   r$   )rh   ri   ri   )rg   ri   ri   )rh   ri   r   r;   r%   )rA   ro   rp   r   r*   r+   r   r   r2   r3   r6   r7   r   r   r    %test_imputation_most_frequent_objects]  s&   





r   categoryc                 C   j   t d}td}|j|| d}tjg dg dg dg dgtd}tdd	}|	|}t
|| d S )
Nrd   ,Cat1,Cat2,Cat3,Cat4
,i,x,
a,,y,
a,j,,
b,j,x,rj   )rf   ir   )rf   r   r   )rf   r   r   )rg   r   r   r;   r?   rT   rn   ioStringIOread_csvrA   ro   rp   r   rE   r   r   rs   r$   dfr3   r6   r7   r   r   r    $test_imputation_most_frequent_pandasz  s   



r   zX_data, missing_value)r[   r         ?c                 C   sd   t jd| td}||d< tjtdd t|ddd}|| W d    d S 1 s+w   Y  d S )	NrM   rj   rP   zimputing numericalrQ   r<   r   r&   r'   
fill_value)rA   fullfloatrT   rU   rV   r   rE   )X_datamissing_valuer2   r6   r   r   r    +test_imputation_constant_error_invalid_type  s   "r   c                  C   sj   t g dg dg dg dg} t g dg dg dg dg}td	d
dd}|| }t|| d S )N)r   r>   rN   r   )rk   r   rO   r   )r   r   r   r   )   r   r   r   )r   r>   rN   r   )rk   r   rO   r   )r   r   r   r   )r   r   r   r   r   r<   r   r   )rA   ro   r   rE   r   )r2   r3   r6   r7   r   r   r     test_imputation_constant_integer  s
   ""
r   array_constructorc              	   C   s   t t jddt jgdt jdt jgddt jt jgdddt jgg}t g dg dg d	g d
g}| |}| |}tddd}||}t|| d S )N皙?r   333333??ffffff?      ?)r   r   r   r   )r   r   r   r   )r   r   r   r   )r   r   r   r   r<   r   )r'   r   )rA   ro   rD   r   rE   r   )r   r2   r3   r6   r7   r   r   r    test_imputation_constant_float  s   	
r   c                 C   s   t j| dd| gd| d| gdd| | gddd	| ggtd
}t jg dg dg dg dgtd
}t| ddd}||}t|| d S )Nrf   rg   rh   ri   r   r$   r   r   r   rj   )missingrf   rg   r   )rh   r   ri   r   )r   r$   r   r   )r   r   r   r   r<   r   r   )rA   ro   rp   r   rE   r   r   r   r   r    test_imputation_constant_object  s*   






r   c                 C   r   )
Nrd   r   rj   )r   r   r   r   )rf   r   r   r   )rf   r   r   r   )rg   r   r   r   r<   r?   r   r   r   r   r    test_imputation_constant_pandas  s   




r   r2   r[   r>   c                 C   sf   t  | }|jdksJ t  }|dgdgg |jdks J |dgtjgg |jdks1J d S )Nr   r[   r>   )r   r*   n_iter_rA   rD   r2   r6   r   r   r    "test_iterative_imputer_one_feature  s   r   c                  C   sr   t dddd} | jd }tdt|dfdtjddfg}d	g d
i}t dddd }t||}|| | d S )Nd   皙?)densityr   r6   r&   r   random_stateimputer__strategyrZ   r[   )	r   datar   r   r   DecisionTreeRegressorr1   r   r*   )r2   r&   pipeline
parametersYgsr   r   r    $test_imputation_pipeline_grid_search  s   

r   c                  C   st  t ddddd} |   }tdddd}|||}d|d	< t||kr*J |  }t|jd ddd}|||}d|jd< t|j|jkrPJ |   }tddd
d}|||}d|d	< t	|| |  
 }t|jd dd
d}|||}d|jd< t	|j|j |  }t|jd dd
d}|||}d|jd< t|j|jkrJ d S )NrO   g      ?r   r   r   r9   T)r&   r'   r,   r   rP   F)r   r,   r1   r   r*   r+   rA   r   r   r   tocsc)X_origr2   r6   Xtr   r   r    test_imputation_copy  s4   



r  c                  C   s   t jd} d}d}t||d| d }|dk}t j||< tdd}||}t||j	
| tdd|}t |
||j	
|krHJ d|_t|
||j	
| d S )Nr   r   r=   r   r   )max_iterrO   )rA   rB   r   r   r1   rD   r   rE   r   initial_imputer_r+   r*   r   r   )r   nri   r2   missing_flagr6   rH   r   r   r    !test_iterative_imputer_zero_itersC  s   


 r  c                  C   sp   t jd} d}d}t||d| d }tdddd}|| || tdddd}|| || d S )	Nr   r   rN   r   r   r[   )r&   r  r]   r>   )rA   rB   r   r   r1   r   r*   r+   )r   r  ri   r2   r6   r   r   r    test_iterative_imputer_verbose[  s   


r  c                  C   sB   d} d}t | |f}tddd}||}t||j| d S )Nr   rN   r   r[   )r&   r  )rA   r   r   rE   r   r  r+   )r  ri   r2   r6   rH   r   r   r    "test_iterative_imputer_all_missingi  s   
r  imputation_order)rB   roman	ascending
descendingarabicc           
      C   sT  t jd}d}d}d}t||d|d }d|d d df< td|dd	d
ddd| |d
}|| dd |jD }t||j	 |j
ksEJ | dkr^t |d |d  t d|ks\J d S | dkrzt |d |d  t |d ddksxJ d S | dkr|d |d  }||d d  }	||	ksJ d S d| v rt|||d  ksJ d S d S )Nr   r   r=   r>   r   r   r[   rO   FT)
r&   r  n_nearest_featuressample_posteriorskip_complete	min_value	max_valuer]   r	  r   c                 S      g | ]}|j qS r   feat_idx).0r   r   r   r    
<listcomp>  r   z;test_iterative_imputer_imputation_order.<locals>.<listcomp>r
  r  r   rB   ending)rA   rB   r   r   r1   r   rE   imputation_sequence_r{   r   n_features_with_missing_r   r   )
r	  r   r  ri   r  r2   r6   ordered_idxordered_idx_round_1ordered_idx_round_2r   r   r    'test_iterative_imputer_imputation_orderr  sB   
*0r  	estimatorc           	      C   s   t jd}d}d}t||d|d }tdd| |d}|| g }|jD ]}| d ur0t| ntt	 }t
|j|s=J |t|j q&tt|t|ksRJ d S )Nr   r   r=   r   r   r[   )r&   r  r  r   )rA   rB   r   r   r1   r   rE   r  r   r   
isinstancer  appendidr{   set)	r  r   r  ri   r2   r6   hashestripletexpected_typer   r   r    !test_iterative_imputer_estimators  s   

r'  c                  C   s   t jd} d}d}t||d| d }tdddd| d}||}tt ||dk d tt 	||dk d t||dk ||dk  d S )	Nr   r   r=   r   r   r[   皙?)r&   r  r  r  r   
rA   rB   r   r   r1   r   rE   r   minr   r   r  ri   r2   r6   r   r   r   r    test_iterative_imputer_clip  s   

r,  c                  C   s   t jd} d}d}t||d| d }d|d d df< tdddd	dd
dd| d	}||}tt ||dk d tt 	||dk d
 t||dk ||dk  d S )Nr   r   r=   r   r   r[   r>   rO   Tr(  rB   )	r&   r  r  r  r  r  r]   r	  r   r)  r+  r   r   r    %test_iterative_imputer_clip_truncnorm  s(   
r-  c                     s   t jd} | jdd t j d d< tddd| d  t  fdd	td
D }t	|dks7J t	|dks?J |
 | }}t|| | d\}}|dkr[|d7 }t|| | d\}}|dk sr|dkstJ dd S d S )N*   )rO   rO   )ry   r   r   T)r  r  r  r   c                    s   g | ]}  d  d  qS )r   )r+   )r  _r   r   r    r    s    zEtest_iterative_imputer_truncated_normal_posterior.<locals>.<listcomp>r   normg-q=r(  r   z&The posterior does appear to be normal)rA   rB   r   normalrD   r   rE   ro   r   r   r9   stdr   )r   imputationsmusigmaks_statisticp_valuer   r   r    1test_iterative_imputer_truncated_normal_posterior  s    
 r8  c                 C   s   t jd}d}d}|jdd||fd}|jdd||fd}d|d d df< d|d< tdd| |d|}td| d	|}t||d d df ||d d df  d S )
Nr   r   r=   rN   )lowhighry   r[   rP   )r&   r  r@   r   r%   )	rA   rB   r   randintr   r*   r   r   r+   )r'   r   r  ri   X_trainX_testr6   initial_imputerr   r   r    +test_iterative_imputer_missing_at_transform  s    (r?  c                  C   s   t jd} t jd}d}d}t||d| d }tddd| d}|| ||}||}t |t	
t |ks@J tddd	d d
| d}tddd	d d
|d}	|| |	| ||}
||}|	|}t|
| t|
| d S )Nr   r[   r   r=   r   r   T)r&   r  r  r   Fr  )r&   r  r  r  r	  r   )rA   rB   r   r   r1   r   r*   r+   r9   rT   approxr   )rng1rng2r  ri   r2   r6   
X_fitted_1
X_fitted_2imputer1imputer2X_fitted_1aX_fitted_1br   r   r    .test_iterative_imputer_transform_stochasticity  sF   


	





rI  c                  C   s   t jd} | dd}t j|d d df< td| d}td| d}|||}||}t	|d d dd f | t	|| d S )Nr   r   r=   )r  r   r[   )
rA   rB   r   randrD   r   r*   r+   rE   r   )r   r2   m1m2pred1pred2r   r   r    !test_iterative_imputer_no_missingH  s   
rO  c            	      C   s   t jd} d}| |d}| d|}t ||}| ||dk }| }t j||< tdd| d}||}t	||dd d S )	Nr   2   r[   r   rO   r  r]   r   g{Gz?atol)
rA   rB   r   rJ  dotr,   rD   r   rE   r   )	r   ri   ABr2   nan_mask	X_missingr6   X_filledr   r   r    test_iterative_imputer_rank_oneV  s   

rZ  rankrN   rO   c                 C   s   t jd}d}d}||| }|| |}t ||}|||dk }| }t j||< |d }|d | }	||d  }
||d  }tddd|d|	}|	|}t
|
|d	d
 d S )Nr   F   r   r>   rO   r  r[   )r  r	  r]   r   r   rR  )rA   rB   r   rJ  rT  r,   rD   r   r*   r+   r   )r[  r   r  ri   rU  rV  rY  rW  rX  r<  X_test_filledr=  r6   
X_test_estr   r   r    )test_iterative_imputer_transform_recoverye  s(   

r_  c               	   C   s  t jd} d}d}| ||}| ||}t |j}t|D ])}t|D ]"}|d d || | f  |d d |f |d d |f  d 7  < q&q | ||dk }| }	t j	|	|< |d }|	d | }
||d  }|	|d  }t
dd| d|
}||}t||dd	d
 d S )Nr   r   r=   r>   g      ?r[   rQ  MbP?{Gz?)rtolrS  )rA   rB   r   rC   r   rG   r   rJ  r,   rD   r   r*   r+   r   )r   r  ri   rU  rV  rY  r   r   rW  rX  r<  r]  r=  r6   r^  r   r   r    &test_iterative_imputer_additive_matrix~  s(   B

rc  z"max_iter, tol, error_type, warningr   r`  zshould be a positive integergMbPzshould be a non-negative floatc                 C   sT   t d}t| |d}tj||d || W d    d S 1 s#w   Y  d S )N)r   r>   )r  tolrQ   )rA   r   r   rT   rU   rE   )r  rd  
error_typewarningr2   r6   r   r   r    "test_iterative_imputer_error_param  s
   
"rg  c                  C   s   t jd} d}d}| |d}| d|}t ||}| ||dk }| }t j||< tdddd| d	}||}	t	|j
||j ksGJ t|jdd| d
}||}
t|	|
dd tdddd| d	}|| |j|jksrJ d S )Nr   rP  rO   r[   r   r   ra  F)r  rd  r  r]   r   )r  r  r]   r   gHz>rR  )rA   rB   r   rJ  rT  r,   rD   r   rE   r{   r  r   r   r*   r  )r   r  ri   rU  rV  r2   rW  rX  r6   X_filled_100X_filled_earlyr   r   r    %test_iterative_imputer_early_stopping  s0   






rj  c            
      C   s   t dd\} }| j\}}d| d d df< tjd}d}t|D ]}|jt|t|| dd}tj	| ||f< q t
d	dd
}t  tdt || |}	W d    n1 sZw   Y  tt|	riJ d S )NT)
return_X_yr[   rN   r   g333333?F)ry   replacerO   )r  r  error)r
   rG   rA   rB   r   r   choicer   intrD   r   warningscatch_warningssimplefilterRuntimeWarningrE   r   r   )
r2   r   	n_samples
n_featuresr   missing_ratefeat
sample_idxr6   X_fillr   r   r    $test_iterative_imputer_catch_warning  s    

rz  z$min_value, max_value, correct_outputr   )r   rO   r=   )r      i,  r   r=   r{  )scalarszNone-defaultinflistszlists-with-inf)idsc                 C   s   t jddd}t| |d}|| t|jt jr#t|j	t js%J |jj
d |j
d kr;|j	j
d |j
d ks=J t|dd d f |j t|dd d f |j	 d S )Nr   r=   rN   r  r  r[   )rA   rB   r   rC   r   r*   r   
_min_valuendarray
_max_valuerG   r   )r  r  correct_outputr2   r6   r   r   r    )test_iterative_imputer_min_max_array_like  s   
r  zmin_value, max_value, err_msg)r   r   min_value >= max_value.r  )r   r{  r   z_value' should be of shapec                 C   sV   t jd}t| |d}tjt|d || W d    d S 1 s$w   Y  d S )Nr=   rN   r  rQ   )rA   rB   r   rT   rU   rV   r*   )r  r  r(   r2   r6   r   r   r    *test_iterative_imputer_catch_min_max_error  s
   "r  zmin_max_1, min_max_2irk   zNone-vs-infzScalar-vs-vectorc              	   C   s   t t jdddgdt jt jdgddt jdgt jddt jgg}t t jdt jdgddt jt jgt jdddgg}t| d | d dd	}t|d |d dd	}|||}|||}t|d d df |d d df  d S )
Nr>   r[   r=   r   rN   rk   rO   r   )r  r  r   )rA   ro   rD   r   r*   r+   r   )	min_max_1	min_max_2r<  r=  rE  rF  X_test_imputed1X_test_imputed2r   r   r    4test_iterative_imputer_min_max_array_like_imputation  s&   *&r  r  TFc                 C   s   t jd}t g dg dg dg dg}t t jdddgt jdd	dgt jd	d
d	gg}td| |d}|||}| rUt|d d df t 	|d d df  d S t|d d df g ddd d S )Nr   )rO   r>   r>   r[   )r=   r[   r>   r   )rN   r[   r[   r[   )r   rk   r>   r>   r>   rk   rO   r[   r=   r9   )r@   r  r   )   r      g-C6?)rb  )
rA   rB   r   ro   rD   r   r*   r+   r   r9   )r  r   r<  r=  r6   r^  r   r   r    'test_iterative_imputer_skip_non_missing,  s   ".,"r  
rs_imputer)seedrs_estimatorc                 C   sH   G dd d}||d}t | d}td}|| |j|ks"J d S )Nc                   @   s$   e Zd Zdd Zdd Zdd ZdS )zCtest_iterative_imputer_dont_set_random_state.<locals>.ZeroEstimatorc                 S   s
   || _ d S r   r   )selfr   r   r   r    __init__C  s   
zLtest_iterative_imputer_dont_set_random_state.<locals>.ZeroEstimator.__init__c                 _   s   | S r   r   )r  r~   kgardsr   r   r    r*   F  s   zGtest_iterative_imputer_dont_set_random_state.<locals>.ZeroEstimator.fitc                 S   s   t |jd S )Nr   )rA   r   rG   )r  r2   r   r   r    predictI  s   zKtest_iterative_imputer_dont_set_random_state.<locals>.ZeroEstimator.predictN)__name__
__module____qualname__r  r*   r  r   r   r   r    ZeroEstimatorB  s    r  r   r  )r   rA   r   r*   r   )r  r  r  r  r6   r<  r   r   r    ,test_iterative_imputer_dont_set_random_state?  s   




r  zX_fit, X_trans, params, msg_errmissing-onlyauto)featuresr   zBhave missing values in transform but have no missing values in fitrB   z3'features' has to be either 'missing-only' or 'all'r   z&'sparse' has to be a boolean or 'auto'rf   rg   rh   rj   z1MissingIndicator does not support data with dtypec                 C   s^   t dd}|jdi | tjt|d || | W d    d S 1 s(w   Y  d S )Nr   r   rQ   r   )r   
set_paramsrT   rU   rV   r*   r+   )X_fitr7   paramsmsg_err	indicatorr   r   r    test_missing_indicator_errorS  s
   
"r  zmissing_values, dtype, arr_typez,param_features, n_features, features_indicesr   r[   r>   c                 C   s  t | | dgdd| gg}t | | dgg dg}t g dg dg}t g dg dg}	|||}|||}||}|	|}	t| |dd	}
|
|}|
|}|jd |ksaJ |jd |ksjJ t|
j| t	||d d |f  t	||	d d |f  |j
tksJ |j
tksJ t|t jsJ t|t jsJ |
jd
d |
|}|
|}|j
tksJ |j
tksJ |jdksJ |jdksJ t	| | t	| | d S )Nr[   rk   r>   rk   r  r=   )r[   r[   r   )r   r   r[   r   F)r&   r  r   Tr   csc)rA   ro   astyper   rE   r+   rG   r   	features_r   r   boolr   r  r  r.   r1   )r&   arr_typer   param_featuresru  features_indicesr  r7   X_fit_expectedX_trans_expectedr  
X_fit_maskX_trans_maskX_fit_mask_sparseX_trans_mask_sparser   r   r    test_missing_indicator_neww  s>   





r  r  c                 C   s   d}t ||dgd|dgg}t ||dgg dg}| |}| |}t|d}tjtdd || W d    n1 s?w   Y  || tjtdd || W d    d S 1 saw   Y  d S )	Nr   r[   rk   r>   r  r   z"Sparse input with missing_values=0rQ   )rA   ro   r   rT   rU   rV   rE   r+   )r  r&   r  r7   X_fit_sparseX_trans_sparser  r   r   r    5test_missing_indicator_raise_on_sparse_with_missing_0  s   

"r  param_sparse)TFr  zmissing_values, arr_typec                 C   sH  t ||dgd|dgg}t ||dgg dg}| |t j}| |t j}t||d}||}||}|du rM|jdksDJ |jdksKJ d S |dkrg|d	krgt|t j	s]J t|t j	seJ d S |d
u r}t|t j	ssJ t|t j	s{J d S t
|r|jdksJ |jdksJ d S t|t j	sJ t|t j	sJ d S )Nr[   rk   r>   r  )r&   r   Tr  r  r   F)rA   ro   r  float64r   rE   r+   r.   r   r  r   r0   )r  r&   r  r  r7   r  r  r  r   r   r    #test_missing_indicator_sparse_param  s*   


r  c                  C   sP   t jg dg dgtd} tddd}|| }t|t g dg dg d S )	Nrf   rg   rh   )rg   rh   rf   rj   rf   r   )r&   r  )TFF)FFT)rA   ro   rp   r   rE   r   )r2   r  r7   r   r   r    test_missing_indicator_string  s   
 r  zX, missing_values, X_trans_exp)rg   rg   TF)rg   rg   FT)r   r   TF)r   r   FTc                 C   s0   t t|ddt|d}|| }t|| d S )Nr;   r%   r   )r   r   r   rE   r   )r2   r&   X_trans_exptransr7   r   r   r    #test_missing_indicator_with_imputer  s   

r  imputer_constructorz.imputer_missing_values, missing_value, err_msgNaNzInput X contains NaN)z-1r   z(types are expected to be both numerical.c                 C   sh   t jd}|dd}||d< | |d}tjt|d || W d    d S 1 s-w   Y  d S )Nr.  r=   rP   r   rQ   )rA   rB   r   rC   rT   rU   rV   rE   )r  imputer_missing_valuesr   r(   r   r2   r6   r   r   r    (test_inconsistent_dtype_X_missing_values*  s   
"r  c                  C   sB   t ddgddgg} tddd}|| }|jd dksJ d S )Nr[   r  r   r  r&   r   )rA   ro   r   rE   rG   r2   mir   r   r   r    !test_missing_indicator_no_missingA  s   
r  c                  C   sJ   t g dg dg dg} tddd}|| }| | ks#J d S )Nr  )r[   r>   r   )r>   r   r[   r   r[   r  )r   rF   r   rE   getnnzsumr  r   r   r    /test_missing_indicator_sparse_no_explicit_zerosL  s   
r  c                 C   s8   t ddgddgg}|  }|| |jd u sJ d S )Nr[   )rA   ro   r*   
indicator_)r  r2   r6   r   r   r    test_imputer_without_indicatorW  s   
r  c                 C   s   | t jddgdt jdgddt jgg dg}t g dg dg d	g d
g}tt jdd}||}t|s:J |j|jksBJ t|	 | d S )Nr[   rO   r>   r   rN   )r[   r>   r   )      @r         @r           r  )       @r  r   r  r   r  )g      @r  r  r  r  r   )r   r  g      "@r  r  r  T)r&   add_indicator)
rA   rD   ro   r   rE   r   r0   rG   r   r1   )r  X_sparser3   r6   r7   r   r   r    2test_simple_imputation_add_indicator_sparse_matrix`  s   ,	
r  zstrategy, expected)r;   rg   )r<   r   c                 C   sN   ddgdt jgg}t jddgd|ggtd}t| d}||}t|| d S )Nrf   rg   rh   rj   r?   )rA   rD   ro   rp   r   rE   r   )r'   expectedr2   r3   r6   r7   r   r   r    "test_simple_imputation_string_list}  s
   

r  zorder, idx_orderr  )rN   rk   r>   r   r[   r  )r[   r   r>   rk   rN   c                 C   s   t jd}|dd}t j|d ddf< t j|d ddf< t j|d dd	f< t j|d d
df< tt! td| dd	|}dd |j
D }||ksNJ W d    d S 1 sYw   Y  d S )Nr.  r   rO   rP  r[      r      r>   r=   rk   )r  r	  r   c                 S   r  r   r  )r  r   r   r   r    r    r   z)test_imputation_order.<locals>.<listcomp>)rA   rB   r   rJ  rD   rT   r`   r   r   r*   r  )order	idx_orderr   r2   trsidxr   r   r    test_imputation_order  s   "r  r   c              	   C   s2  t d| ddgg ddd| dgddd| gg}t g d	d
d| dgd| ddgddd
| gg}t d| ddg| d| | gd
| d| g| d| dgg}t g d| d
| dgg d| d| d
gg}t| ddd}||}||}||}||}	t|| t|	| ||fD ]}
||
}||}t||
 qd S )Nr   rN   r   rk   r   rO   rk   r   r   r   r   )rO   rk   r>   r[   r>   r[   rk   rO   )r[   r[   r[   rN   )r>   rN   rN   rk   r9   T)r&   r'   r  )rA   ro   r   rE   inverse_transformr+   r   )r   X_1X_2X_3X_4r6   	X_1_transX_1_inv_trans	X_2_transX_2_inv_transr2   r7   X_inv_transr   r   r    (test_simple_imputation_inverse_transform  sT   


	


	



	

	







r  c              	   C   s   t d| ddgg ddd| dgddd| gg}t| d	d
}||}tjtd|j dd || W d    d S 1 s?w   Y  d S )Nr   rN   r   r  r   r   r   r   r9   r%   zGot 'add_indicator='rQ   )	rA   ro   r   rE   rT   rU   rV   r  r  )r   r  r6   r  r   r   r    3test_simple_imputation_inverse_transform_exceptions  s   


	
"r  z)expected,array,dtype,extra_value,n_repeatextra_valuer  most_frequent_value)r  r  valuer  min_valuevalue)r  r  r  r   )r[   r>   rN   )r[   r[   r>   )r  r  r[   )r[   r[   r  c                 C   s"   | t tj||d||ksJ d S )Nrj   )r   rA   ro   )r  ro   r   r  n_repeatr   r   r    test_most_frequent  s   r  c                  C   s  t d} | d| jg dddi}t| jddd}t||tj	d	gdgd
ggt
d | d| jg dddi}tddd}t||tj	d	gd
gdggt
d | d| jg dddi}t| jddd}t||tj	dgdgdggdd ttjddd}t||tj	dgdgdggdd | d| jg dddi}t| jdd}t||tj	dgdgdgdggdd | d| jg dddi}t| jdd}t||tj	dgdgdggdd | d| jg dddi}t| jddd}t||tj	dgdgdggdd | d| jg d ddi}t| jdd}t||tj	dgd!gd!gdggdd d S )"Nrd   feature)abcNdestringrj   r<   nar   r  r  )r  r  fghok)r   r'   r  )r[   NrN   Int64r   r[   rN   r  )r[   Nr>   rN   r:   r%   r>   )r[   Nr>   r9   r   )r   Nr  g       r   r  )r   Nr  r  r  )rT   rn   rq   Seriesr   NAr!   rE   rA   ro   rp   r#   rD   )rs   r   r6   r   r   r    test_simple_impute_pd_na  sR   
    $  r  c                  C   sd   t d} tj}| j||d|gd|ddggg dd}t|d|}| }g d	}t|| d
S )zDCheck that missing indicator return the feature names with a prefix.rd   r[   rk   r>   r=   re   rl   r   )missingindicator_amissingindicator_bmissingindicator_dN)	rT   rn   rA   rD   rq   r   r*   get_feature_names_outr   )rs   r&   r2   r  rt   expected_namesr   r   r    (test_missing_indicator_feature_names_outD  s   


r  )rT   rp  numpyrA   scipyr   scipy.statsr   r   sklearn.utils._testingr   r   r   r   sklearn.experimentalr	   sklearn.datasetsr
   sklearn.imputer   r   r   sklearn.dummyr   sklearn.linear_modelr   r   r   sklearn.pipeliner   r   sklearn.model_selectionr   sklearnr   sklearn.random_projectionr   sklearn.exceptionsr   sklearn.impute._baser   r!   r#   r8   markparametrizerJ   rY   rc   ru   rv   r   r   r   r   rp   rW   r   r   r   r   r   rD   r   r   r   r   rF   asarrayr   r   r   r   r   r  r  r  r  r  r'  r,  r-  r8  r?  rI  rO  rZ  r_  rc  rV   rg  rj  rz  ro   r}  r  r  r  r  rB   r   r  r  r  int32r/   
coo_matrix
lil_matrix
bsr_matrixr  r  r  r  r  r  r  r  r  r  r  r  r  r  ro  r  r  r  r   r   r   r    <module>   s*   %

	


C 





"
+	
%
!
2



$*"


	0


&,







	






9
<