
     h                         d Z ddlmZ ddlmZ ddlZddlZddlm	Z
 ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ d
 Zd Zd Zd Zd Zd Zd ZdS )zTest the 20news downloader, if the data is available,
or if specifically requested via environment variable
(e.g. for CI jobs).    )partial)patchN)check_as_framecheck_pandas_dependency_message)check_return_X_y)assert_allclose_dense_sparse	normalizec                 n    | dd          }|j                             d          sJ  | d|j        ddd         d          }|j        |j        dd          k    sJ t          j        |j                                                  d	d
gk    sJ t          |j                  t          |j                  k    sJ t          |j                  t          |j	                  k    sJ |j	        d	         }|j        |j        d	                  }|j        
                    |          }|j	        t          j        |j        |k              d	         d	                  }||k    sJ  | ddd          \  }}t          |          t          |j	                  k    sJ |j        |j        j        k    sJ d S )NallF)subsetshuffle.. _20newsgroups_dataset:)r   
categoriesr   r      T)r   r   
return_X_y)DESCR
startswithtarget_namesnpuniquetargettolistlen	filenamesdataindexwhereshape)	fetch_20newsgroups_fxtr    	data2catsentry1categorylabelentry2Xys	            ^/var/www/html/Sam_Eipo/venv/lib/python3.11/site-packages/sklearn/datasets/tests/test_20news.pytest_20newsr-      s   !!>>>D:  !<===== '&!22b8!<e  I
 !T%6rss%;;;;;9Y%&&--//Aq69999 y"##s9+;'<'<<<<<y"##s9>':'::::: ^AF%i&6q&9:H##H--EYrxu 455a8;<FV "!$OOODAqq66S^^####7dk'''''''    c                 ,    | d          }t          |d                   t          |j                  k    sJ t          |d                   t          |j                  k    sJ t          |d                   t          |j                  k    sJ dS )zuChecks the length consistencies within the bunch

    This is a non-regression test for a bug present in 0.16.1.
    r   r   r    r   r   N)r   r    r   r   )r$   r    s     r,   test_20news_length_consistencyr1   3   s     "!///DtF|DI....tH~#dk"2"22222tK !!S%8%8888888r.   c                 ~    | d          }t          j        |j                  sJ |j        j        dk    sJ |j        j        d         dk    sJ |j        j        t          j        k    sJ |j        	                    d          sJ  | d          }t          j        |j                  sJ |j        j        dk    sJ |j        j        d         d	k    sJ |j        j        t          j        k    sJ |j        	                    d          sJ t          | d          }t          ||            | d
          }t          j        |j                  sJ |j        j        dk    sJ |j        j        d         dk    sJ |j        j        t          j        k    sJ |j        	                    d          sJ d S )Ntrainr0   )2,  ; r   r4   r   test)l  r5   r7   r   )I  r5   r8   )spisspmatrix_csrr    r#   r   dtyper   float64r   r   r   r   )!fetch_20newsgroups_vectorized_fxtbunch
fetch_funcs      r,   test_20news_vectorizedr@   ?   s   --W===EUZ(((((:....<a E)))):rz))));!!"=>>>>> .-V<<<EUZ(((((:~----<a D((((:rz))));!!"=>>>>> :6JJJJUJ''' .-U;;;EUZ(((((:55555<a L0000:rz))));!!"=>>>>>>>r.   c                 @    | d          } | d          }|d         d d         }|d         d d         }t          |t          |                     t          j        t          j                            |                                d          d          sJ d S )NFr
   Tr    d   r   )axis)r	   r   r   allcloselinalgnormtodense)r=   r*   X_X_norms       r,   test_20news_normalizationrJ   ]   s    ))E:::A	*	*T	:	:	:BZF	&	$3$A 1666;ry~~fnn&6&6Q~??CCCCCCCr.   c                 l   t          j        d           | d          }t          ||            |j        }|j        dk    sJ t          fd|j        j        D                       sJ dD ]}||                                v sJ d|                                v sJ |j	        j
        dk    sJ d S )NpandasTas_frame)r4   i< c                 :    g | ]}t          |j                  S  )
isinstanceSparseDtype).0colpds     r,   
<listcomp>z(test_20news_as_frame.<locals>.<listcomp>o   s%    MMMC
3//MMMr.   )beginner	beginners	beginning
beginningsbeginsbegleybegonecategory_class)pytestimportorskipr   framer#   r   r    dtypeskeysr   name)r=   r>   ra   expected_featurerU   s       @r,   test_20news_as_framerf   g   s    		X	&	&B--t<<<E5;<<<KE;/))))MMMM5:;LMMMNNNNN 	0 	0  5::<</////uzz||++++< 0000000r.   c                 $    t          |            d S )Nr   )r=   hide_available_pandass     r,   test_as_frame_no_pandasri      s    #$EFFFFFr.   c                 B   t          d          5 }t          d          5 }d|_        d|_        d}t          j        t          |          5   | d           d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   d d d            d S # 1 swxY w Y   d S )Nzos.path.existszjoblib.loadT)r*   r+   zThe cached dataset located in)matchrM   )r   return_valuer_   raises
ValueError)r=   mock_is_exist	mock_loaderr_msgs       r,   test_outdated_picklerr      s   		 	  AM=!! 	AY)-M&%/I"5Gz999 A A114@@@@A A A A A A A A A A A A A A A	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	AA A A A A A A A A A A A A A A A A AsX   B,A<A%A<%A))A<,A)-A<0B<B 	 BB 	BBB)__doc__	functoolsr   unittest.mockr   r_   numpyr   scipy.sparsesparser9   "sklearn.datasets.tests.test_commonr   r   r   sklearn.utils._testingr	   sklearn.preprocessingr   r-   r1   r@   rJ   rf   ri   rr   rP   r.   r,   <module>r|      s5                           = = = = = = N N N N N N ? ? ? ? ? ? ? ? ? ? ? ? + + + + + +( ( (@	9 	9 	9? ? ?<D D D1 1 12G G G	A 	A 	A 	A 	Ar.   