o
    tBh                     @   s   d Z ddlmZ ddlmZ ddlZddlZddlm	Z
 ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ d
d Zdd Zdd Zdd Zdd Zdd Zdd ZdS )zTest the 20news downloader, if the data is available,
or if specifically requested via environment variable
(e.g. for travis cron job).    )partial)patchN)check_as_framecheck_pandas_dependency_message)check_return_X_y)assert_allclose_dense_sparse	normalizec           	      C   s(  | ddd}|j dsJ | d|jddd dd}|j|jdd  ks'J t|j d	d
gks5J t|jt|jksAJ t|jt|j	ksMJ |j	d	 }|j|jd	  }|j
|}|j	t|j|kd	 d	  }||ksuJ | dddd\}}t|t|j	ksJ |j|jjksJ d S )NallF)subsetshuffle.. _20newsgroups_dataset:)r   
categoriesr   r      T)r   r   
return_X_y)DESCR
startswithtarget_namesnpuniquetargettolistlen	filenamesdataindexwhereshape)	fetch_20newsgroups_fxtr   	data2catsentry1categorylabelentry2Xy r*   y/var/www/html/riverr-enterprise-integrations-main/venv/lib/python3.10/site-packages/sklearn/datasets/tests/test_20news.pytest_20news   s"   
r,   c                 C   s\   | dd}t |d t |jksJ t |d t |jksJ t |d t |jks,J dS )zuChecks the length consistencies within the bunch

    This is a non-regression test for a bug present in 0.16.1.
    r   r   r   r   r   N)r   r   r   r   )r"   r   r*   r*   r+   test_20news_length_consistency3   s   
r.   c                 C   s:  | dd}t |jsJ |jjdksJ |jjd dksJ |jjtjks(J |j	ds0J | dd}t |js=J |jjdksEJ |jjd d	ksOJ |jjtjksXJ |j	ds`J t
| dd}t|| | d
d}t |jsxJ |jjdksJ |jjd dksJ |jjtjksJ |j	dsJ d S )Ntrainr-   )2,  ; r   r0   r   test)l  r1   r3   r   )I  r1   r4   )spisspmatrix_csrr   r!   r   dtyper   float64r   r   r   r   )!fetch_20newsgroups_vectorized_fxtbunch
fetch_funcr*   r*   r+   test_20news_vectorized?   s(   



r<   c                 C   sf   | dd}| dd}|d d d }|d d d }t |t| ttjj| ddds1J d S )NFr	   Tr   d   r   )axis)r   r
   r   allcloselinalgnormtodense)r9   r(   X_X_normr*   r*   r+   test_20news_normalization]   s   

$rE   c                    s   t d | dd}t||  |j}|jdksJ t fdd|jjD s(J dD ]
}|| v s4J q*d| v s=J |j	j
dksEJ d S )	NpandasTas_frame)r0   i< c                    s   g | ]}t | jqS r*   )
isinstanceSparseDtype).0colpdr*   r+   
<listcomp>o   s    z(test_20news_as_frame.<locals>.<listcomp>)beginner	beginners	beginning
beginningsbeginsbegleybegonecategory_class)pytestimportorskipr   framer!   r   r   dtypeskeysr   name)r9   r:   rZ   expected_featurer*   rM   r+   test_20news_as_frameg   s   


	r_   c                 C   s   t |  d S )Nr   )r9   hide_available_pandasr*   r*   r+   test_as_frame_no_pandas   s   ra   c              
   C   s   t dI}t d,}d|_d|_d}tjt|d | dd W d    n1 s)w   Y  W d    n1 s8w   Y  W d    d S W d    d S 1 sPw   Y  d S )Nzos.path.existszjoblib.loadT)r(   r)   zThe cached dataset located in)matchrG   )r   return_valuerX   raises
ValueError)r9   mock_is_exist	mock_loaderr_msgr*   r*   r+   test_outdated_pickle   s   

"ri   )__doc__	functoolsr   unittest.mockr   rX   numpyr   scipy.sparsesparser5   "sklearn.datasets.tests.test_commonr   r   r   sklearn.utils._testingr   sklearn.preprocessingr
   r,   r.   r<   rE   r_   ra   ri   r*   r*   r*   r+   <module>   s$     
