o
    tBh[>                     @   s*  d dl Z d dlZd dlmZ d dlZd dlmZ d dl	m
Z
 d dl	mZ d dlmZmZmZ g dg dg dg d	gZe jd
dg dg dg dg dgfdg dg dg dg dgfdg dg dg dg dgfgdd Zdd Zdd Zdd Ze jd
dg dg dg dg dgfdg dg dg dg dgfdg dg dg dg dgfgdd  Ze jd!g d"d#d$ Zd%d& Ze jd'ed(d)d*d+ Zd,d- Zd.d/ Zd0d1 Ze jd2dg d3g d4g d5fdg d3g d6g d7fdg d8g d6g d9fgd:d; Ze jd<dg d=g d>g d?g d@gfdg dAg dAg dBg dCgfdg dDg dEg dFg dFgfge jdGg dHdIdJ Z e jd!g d"dKdL Z!dMdN Z"e jdOdg dPfdg dQfgdRdS Z#dTdU Z$e jdVej%ej&ej'ge jdWdej%ej&ej'ge jdGg dHdXdY Z(e jdZej%ej&ej'ge jdGg dHd[d\ Z)e j*d]e jd^dd_gd`da Z+dbdc Z,ddde Z-dfdg Z.e jd^d e/dhgdidj Z0e jdkdldmdn edoD fdpdqdn edoD fdrdsdn edoD fgdtdu Z1dS )v    N)clone)KBinsDiscretizer)OneHotEncoder)assert_array_almost_equalassert_array_equalassert_allclose_dense_sparse      ?)r         @      )r   g      @r	         ?)   g      @r      zstrategy, expecteduniform)r   r   r   r   )r   r   r   r   )r   r   r   r   )r   r   r   r   kmeans)r   r   r   r   quantilec                 C   s,   t dd| d}|t t||t d S )N   ordinaln_binsencodestrategy)r   fitXr   	transform)r   expectedest r!   /var/www/html/riverr-enterprise-integrations-main/venv/lib/python3.10/site-packages/sklearn/preprocessing/tests/test_discretization.pytest_fit_transform   s   	
r#   c                   C   sR   t ddt t tdgd dt t ddtjjttks'J d S )Nr   r   r   )	r   fit_transformr   nparrayr   n_bins_dtypeintr!   r!   r!   r"   test_valid_n_bins    s   &r+   c                  C   s   t dd} d}tjt|d | t W d    n1 sw   Y  t dd} d}tjt|d | t W d    d S 1 sBw   Y  d S )Nr   r$   zUKBinsDiscretizer received an invalid number of bins. Received 1, expected at least 2.matchg?zOKBinsDiscretizer received an invalid n_bins type. Received float, expected int.)r   pytestraises
ValueErrorr%   r   r    err_msgr!   r!   r"   test_invalid_n_bins&   s   

"r3   c                  C   sB  t dd} t| d}d}tjt|d |t W d    n1 s$w   Y  g d} t| d}d}tjt|d |t W d    n1 sKw   Y  g d} t| d}d}tjt|d |t W d    n1 srw   Y  g d	} t| d}d
}tjt|d |t W d    d S 1 sw   Y  d S )N)r             @r$   z:n_bins must be a scalar or array of shape \(n_features,\).r,   )r   r   r   r   r   r   r   z{KBinsDiscretizer received an invalid number of bins at indices 0, 3. Number of bins must be at least 2, and must be an int.) @r   r7   r   z{KBinsDiscretizer received an invalid number of bins at indices 0, 2. Number of bins must be at least 2, and must be an int.)r&   fullr   r.   r/   r0   r%   r   )r   r    r2   r!   r!   r"   test_invalid_n_bins_array8   s4   



"r9   )r   r   r   r   r6   )r   r   r   r   )r   r   r   r   c                 C   sz   t g dd| dt}t||t ttjd }|jj|fks%J t	|j|j
D ]\}}|j|d fks:J q,d S )Nr   r   r   r   r   r   r   )r   r   r   r   r   r&   r'   shape
bin_edges_zipr(   )r   r   r    
n_features	bin_edgesr   r!   r!   r"   test_fit_transform_n_bins_array^   s   	
r@   r   )r   r   r   c                 C   s   t d tddgddgddgddgg}t| ddd}d	}tjt|d
 || W d    n1 s6w   Y  |j	d dksDJ |
|}t|d d df t|jd  d S )Nalwaysr   r	   r   r   r   r   )r   r   r   z2Feature 0 is constant and will be replaced with 0.r,   )warningssimplefilterr&   r'   r   r.   warnsUserWarningr   r(   r   r   zerosr;   )r   r   r    warning_messageXtr!   r!   r"   test_same_min_maxs   s   
"
&rI   c                  C   s   t d} tdd}tt ||  W d    n1 sw   Y  tdd}|| dd tt ||  W d    d S 1 sHw   Y  d S )Nr4   r   r$   r   r   )	r&   aranger   r.   r/   r0   r   reshaper   )r   r    r!   r!   r"   test_transform_1d_behavior   s   


"rL   ir   	   c                 C   sX   t g ddd}t g ddd}|d|   }tddd|}t|| d S )	N)r5         @g      @g       @g      $@r   r   )r   r   r   r   r   
   r   r   r   r   )r&   r'   rK   r   r%   r   )rM   X_initXt_expectedr   rH   r!   r!   r"   test_numeric_stability   s
   rT   c                  C   R   t g ddd} d}tjt|d | t W d    d S 1 s"w   Y  d S )Nr:   zinvalid-encoderQ   zlValid options for 'encode' are \('onehot', 'onehot-dense', 'ordinal'\). Got encode='invalid-encode' instead.r,   r   r.   r/   r0   r   r   r1   r!   r!   r"   test_invalid_encode_option      "rW   c                  C   s   t g dddt} | t}t g dddt} | t}t|r'J ttdd dD dd|| t g dd	dt} | t}t|sOJ ttd
d dD dd|	 |	  d S )Nr:   r   rQ   onehot-densec                 S      g | ]}t |qS r!   r&   rJ   .0rM   r!   r!   r"   
<listcomp>       z'test_encode_options.<locals>.<listcomp>F)
categoriessparseonehotc                 S   rZ   r!   r[   r\   r!   r!   r"   r^      r_   T)
r   r   r   r   spissparser   r   r%   toarray)r    Xt_1Xt_2Xt_3r!   r!   r"   test_encode_options   s*   


ri   c                  C   rU   )Nr:   zinvalid-strategyr   r   znValid options for 'strategy' are \('uniform', 'quantile', 'kmeans'\). Got strategy='invalid-strategy' instead.r,   rV   r1   r!   r!   r"   test_invalid_strategy_option   rX   rk   z8strategy, expected_2bins, expected_3bins, expected_5bins)r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r4   r4   )r   r   r   r   r   r   )r   r   r   r   r   r4   )r   r   r   r   r   r   )r   r   r   r   r4   r4   c                 C   s   t g ddd}td| dd}||}t||  td| dd}||}t||  td| dd}||}t||  d S )	N)r   r   r   r   rN   rP   r   r   r   r   r   r   r   r      )r&   r'   rK   r   r%   r   ravel)r   expected_2binsexpected_3binsexpected_5binsr   r    rH   r!   r!   r"   test_nonuniform_strategies   s   


rr   zstrategy, expected_inv)      r5         r   )r         @      r   )r   rO   rs   r   )r   rO   rs   r
   )g      g      @g      g      )g      g      @g      g      ?)g      ?g      @g      g      ?)rs   r5   rt   g      )r   ru   rv   g        )r   rO   rs   g      ?r   )r   rb   rY   c                 C   s0   t d| |d}|t}||}t|| d S )Nr   rl   )r   r%   r   inverse_transformr   )r   r   expected_invkbdrH   Xinvr!   r!   r"   test_inverse_transform   s   "

r{   c                 C   s   t g dd d d f }td| dd}|| t ddgd d d f }||}t|jddd	 |j t|jdddg d S )
Nr   r   r   r   r4   r   rl   r	   rm   r   )axisr   )	r&   r'   r   r   r   r   maxr(   min)r   r   ry   X2X2tr!   r!   r"    test_transform_outside_fit_range
  s   

r   c                  C   s   t g dd d d f } |  }tddd}|| }t| | | }||}t|| t|t dgdgdgdgg d S )Nr|   r   r   rQ   r   r
   r   )r&   r'   copyr   r%   r   rw   )r   X_beforer    rH   	Xt_beforerz   r!   r!   r"   test_overwrite  s   



$r   zstrategy, expected_bin_edges)r   r   r   )r   r
   r   c                 C   sx   dgdgdgdgdgdgg}t d| d}d}tjt|d || W d    n1 s-w   Y  t|jd | d S )Nr   r   rj   'Consider decreasing the number of bins.r,   )r   r.   rD   rE   r   r   r<   )r   expected_bin_edgesr   ry   rG   r!   r!   r"   test_redundant_bins$  s   r   c                  C   s   t g ddd} t g d}t g ddd}tdddd	}d
}tjt|d ||  W d    n1 s=w   Y  t|j	d | t|
| | d S )N)皙?r   ffffff?r   r   )r   gq=
ףp?g=
ףp=?gzG?gp=
ף?r   )r   r   r4   rP   r   r   r   r   r,   r   )r&   r'   rK   r   r.   rD   rE   r   r   r<   r   )r   r?   rH   ry   rG   r!   r!   r"   !test_percentile_numeric_stability0  s   r   in_dtype	out_dtypec                 C   s   t jt| d}td||d}|d t jt jfvr6tjtdd |	| W d    d S 1 s/w   Y  d S |	| |d urB|}n|d u rP|j
t jkrPt j}n|j
}||}|j
|ks_J d S )Nr)   r   r   r   r)   zValid options for 'dtype' arer,   )r&   r'   r   r   float32float64r.   r/   r0   r   r)   float16r   )r   r   r   X_inputry   expected_dtyperH   r!   r!   r"   test_consistent_dtype=  s   "

r   input_dtypec                 C   sd   t jt| d}td|t jd}|| ||}td|t jd}|| ||}t|| d S )Nr   r   r   )	r&   r'   r   r   r   r   r   r   r   )r   r   r   kbd_32Xt_32kbd_64Xt_64r!   r!   r"   test_32_equal_64W  s   



r   z,ignore:In version 1.3 onwards, subsample=2e5	subsamplewarnc                 C   s   t g ddd}tdddd}|| t|}|j| d || t|jd	 |jd	 D ]\}}t j	
|| q1|jj|jjksGJ d S )
Nr   r   r   rP   r   r   r   r   r   )r&   r'   rK   r   r   r   
set_paramsr=   r<   testingassert_allcloser;   )r   r   kbd_defaultkbd_with_subsamplingbin_kbd_defaultbin_kbd_with_subsamplingr!   r!   r"   'test_kbinsdiscretizer_subsample_defaultl  s   

r   c                  C   h   t g ddd} tddddd}d	}tjt|d
 ||  W d    d S 1 s-w   Y  d S )Nr   r   r   rP   r   r   r   r   r   r   r   z4`subsample` must be used with `strategy="quantile"`.r,   )r&   r'   rK   r   r.   r/   r0   r   )r   ry   r2   r!   r!   r"   0test_kbinsdiscretizer_subsample_invalid_strategy  s   "r   c                  C   r   )Nr   r   r   rP   r   r   r8   r   z.subsample must be an instance of int, not str.r,   )r&   r'   rK   r   r.   r/   	TypeErrorr   r   ry   msgr!   r!   r"   ,test_kbinsdiscretizer_subsample_invalid_type  s   "r   c                  C   sf   t jdddd} tdddd}d}tjt|d	 ||  W d    d S 1 s,w   Y  d S )
NiA r   r   d   r   r   r   >In version 1.3 onwards, subsample=2e5 will be used by default.r,   )	r&   randomrandrK   r   r.   rD   FutureWarningr   r   r!   r!   r"   $test_kbinsdiscretizer_subsample_warn  s   "r   g     jAc                 C   s   t jdddd}tdddd}t|}|j| d | d	kr?tjt	d
d |
| W d    d S 1 s8w   Y  d S d}tjt|d |
| W d    n1 sXw   Y  |
| t |jd	 |jd	 krqJ |jj|jjks{J d S )Ni`[ r   r   rP   r   r   r   r   r   zsubsample == 0, must be >= 1.r,   r   )r&   r   r   rK   r   r   r   r.   r/   r0   r   rD   r   allr<   r;   )r   r   r   r   r   r!   r!   r"   &test_kbinsdiscretizer_subsample_values  s"   "
r   zencode, expected_namesrb   c                 C   .   g | ]}t d D ]}d| dt| qqS r4   feat_rangefloatr]   col_idbin_idr!   r!   r"   r^         r^   r   rY   c                 C   r   r   r   r   r!   r!   r"   r^     r   r   c                 C      g | ]}d | qS r   r!   )r]   r   r!   r!   r"   r^     r_   c                 C   sz   g dg dg dg dg}t d| d|}||}dd td	D }||}|jd
 |jd ks6J t|| dS )z[Check get_feature_names_out for different settings.
    Non-regression test for #22731
    )r	   r   r   )r   r   r   )r   r   r	   )r   r4   r   r4   rQ   c                 S   r   r   r!   r\   r!   r!   r"   r^     r_   z>test_kbinsdiscrtizer_get_feature_names_out.<locals>.<listcomp>r   r   r   N)r   r   r   r   get_feature_names_outr;   r   )r   expected_namesr   ry   rH   input_featuresoutput_namesr!   r!   r"   *test_kbinsdiscrtizer_get_feature_names_out  s   

r   )2r.   numpyr&   scipy.sparsera   rc   rB   sklearnr   sklearn.preprocessingr   r   sklearn.utils._testingr   r   r   r   markparametrizer#   r+   r3   r9   r@   rI   rL   r   rT   rW   ri   rk   rr   r{   r   r   r   r   r   r   r   r   r   filterwarningsr   r   r   r   r*   r   r   r!   r!   r!   r"   <module>   s    
&






 

	
		
	