o
    tBhe                     @   sp  d Z ddlZddlZddlZddlZddlZddlmZ ddlm	Z	 ddl
ZddlZddlZddlZddlmZ ddlmZ ddlmZmZmZmZmZmZmZ ddlmZmZmZ dd	l m!Z! dd
l"m#Z#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z* ddl+m,Z, ddl"m-Z- dZ.dZ/e,eddZdd Z0dd Z1G dd dZ2dd Z3ej45dddde6fddde6fd ddej7fd ddej7fd!ddej7fd!ddej7fd"ddej8fd"ddej7fd#ddd$fd#ddd$fg
d%d& Z9ej45d'd(ddgd)d* Z:e-d+d, Z;e-d-d. Z<e-d/d0 Z=e-d1d2 Z>e-d3d4 Z?d5d6 Z@e-d7d8 ZAe-d9d: ZBe-d;d< ZCe-d=d> ZDe-d?d@ ZEe-dAdB ZFe-dCdD ZGej45dEddFgdGdH ZHdIdJ ZIej45dEddFgdKdL ZJej45dEddFgdMdN ZKdOdP ZLej45dEddFgdQdR ZMej45dEddFgdSdT ZNdUdV ZOej45dEddFgdWdX ZPej45dEddFgdYdZ ZQej45dEddFgd[d\ ZRej45dEddFgd]d^ ZSd_d` ZTej45dEddFgdadb ZUej45dEddFgej45dcddFgddde ZVdfdg ZWdhdi ZXej45dEddFgdjdk ZYej45dEddFgdldm ZZej45dEddFgdndo Z[ej45dEddFgdpdq Z\ej45dEddFgdrds Z]ej45dEddFgdtdu Z^ej45dEddFgdvdw Z_ej45dEddFgdxdy Z`ej45dEddFgdzd{ Zaej45dEddFgd|d} Zbej45dEddFgd~d Zcdd Zdej45dEddFgdd Zee-ej45dddFgdd Zfdd Zgdd Zhdd ZidS )zTest the openml loader.
    N)	resources)BytesIO)config_context)fetch_openml)_open_openml_url_arff
_DATA_FILE_OPENML_PREFIX_get_data_description_by_id_get_local_path_retry_with_clean_cache)_convert_arff_data_convert_arff_data_dataframe_feature_to_dtype)is_scalar_nan)assert_allcloseassert_array_equal	HTTPError)check_return_X_y)ArffContainerType)partial)fails_if_pypyz"sklearn.datasets.tests.data.openmlT	data_homec           
      C   s   dd }t | dd dd}t| d }|d  dk}|du r!tdt|d	 }t|d d
}tjdd |D |r<tj	ntj
dd}W d    n1 sLw   Y  tjt|d dd}tt|jD ]}	tj|d d |	f |||	 qcd S )Nc                    s^   | j | }|| jv r&| j|   fdd| jd d |f D }tj|ddS | jd d |f S )Nc                    s$   g | ]}t |r
d n t| qS N)r   int).0idxcat y/var/www/html/riverr-enterprise-integrations-main/venv/lib/python3.10/site-packages/sklearn/datasets/tests/test_openml.py
<listcomp>?   s    z>_test_features_list.<locals>.decode_column.<locals>.<listcomp>Odtype)feature_names
categoriesdatanparray)
data_bunchcol_idxcol_nameresultr!   r   r"   decode_column9   s   



z*_test_features_list.<locals>.decode_columnFdata_idcachetarget_columnas_frameformatsparse_arffTzIThis test is not intended for sparse data, to keep code relatively simplefile_idr   c                 s   s    | ]}| d V  qdS )utf-8N)decode)r   liner!   r!   r"   	<genexpr>V   s    z&_test_features_list.<locals>.<genexpr>)return_typeencode_nominalr)   r$   r%   )r   r
   lower
ValueErrorr   r6   r   r   loadCOO	DENSE_GENr*   r+   listrangelenr'   testingr   )
r2   r0   r,   data_descriptionsparseurlf	data_arffdata_downloadedir!   r!   r"   _test_features_list5   s0   
rO   c              	   C   s  t ||ddd}t|jd | ksJ t  tjdtd t |ddd W d    n1 s0w   Y  t | d|dd}|jd |ksFJ |jj||fksPJ t	|t
rg|jj|fks^J |j|gksfJ nt	|tr|jj|t|fksxJ |j|ksJ |jj|ksJ |jj|ksJ t|j|ksJ |jD ]	}t	|t
sJ q|j D ]G\}}|j|}t  tjdtd	d
 t|jd d |f }W d    n1 sw   Y  |t| }t|ttt|ksJ q|
r"t | ddd}tj|j|j |jjtjkrtj|j|j nt|j|js"J |	r1t	|jtj j!s0J nt	|jtj"s;J t#t$|j|ksIJ t%t | d|dd}t&|| |S )NF)nameversionr3   r5   idignore)category)rP   r3   r5   r1   rP   zelementwise comparison failed)rT   messager2   r3   r5   )'r   r   detailswarningscatch_warningssimplefilterUserWarningr)   shape
isinstancestrtargettarget_namesrD   rF   r&   r'   r(   itemsindexfilterwarningsDeprecationWarningr*   uniqueisfinitesetrE   rG   r   float64array_equalscipyrI   
csr_matrixndarraycount_nonzeroisnanr   r   )r2   	data_namedata_versionr4   expected_observationsexpected_featuresexpected_missingexpected_data_dtypeexpected_target_dtypeexpect_sparsecompare_default_targetdata_by_name_id
data_by_idfeaturer(   feature_idxvaluesdata_by_id_default
fetch_funcr!   r!   r"   _fetch_dataset_from_openmlf   sn   
	




r   c                   @   sF   e Zd Zdd ZdddZdd Zdd	 Zd
d Zdd Zdd Z	dS )_MockHTTPResponsec                 C   s   || _ || _d S r   )r)   is_gzip)selfr)   r   r!   r!   r"   __init__   s   
z_MockHTTPResponse.__init__c                 C   s   | j |S r   )r)   read)r   amtr!   r!   r"   r      s   z_MockHTTPResponse.readc                 C   s   | j   d S r   )r)   closer   r!   r!   r"   r      s   z_MockHTTPResponse.closec                 C   s   | j rddiS i S )NzContent-Encodinggzipr   r   r!   r!   r"   info   s   z_MockHTTPResponse.infoc                 C   s
   t | jS r   )iterr)   r   r!   r!   r"   __iter__   s   
z_MockHTTPResponse.__iter__c                 C   s   | S r   r!   r   r!   r!   r"   	__enter__      z_MockHTTPResponse.__enter__c                 C   s   dS )NFr!   )r   exc_typeexc_valexc_tbr!   r!   r"   __exit__   r   z_MockHTTPResponse.__exit__N)r   )
__name__
__module____qualname__r   r   r   r   r   r   r   r!   r!   r!   r"   r      s    
r   c                    s   d
ddddt j	td d|  fdd	  	fd
d
fddfddfdd 	fdd
fdd}tr]| tjjd| d S d S )Nz$https://openml.org/api/v1/json/data/z-https://openml.org/api/v1/json/data/features/zhttps://openml.org/data/v1/z)https://openml.org/api/v1/json/data/list/z.gz.id_c                    s~   t dd| tdd  |   }|dddddd	d
dddddddddddddddS )Nz\W-zhttps://openml.org/z-json-data-listz-jdlz-json-data-featuresz-jdfz-json-data-qualitiesz-jdqz
-json-dataz-jdz
-data_namez-dnz	-downloadz-dlz-limitz-lz-data_versionz-dvz-statusz-sz-deactivatedz-dactz-activez-act)resubrF   replace)rJ   suffixoutput)path_suffixr!   r"   
_file_name   s    
z4_monkey_patch_webbased_functions.<locals>._file_namec                    s   |  |sJ  | |}t|.}|r)r)t| }t|dW  d    S |d}t| }t|dW  d    S 1 sCw   Y  d S )NTrbF)
startswithr   open_binaryr   r   r   )rJ   has_gzip_headerexpected_prefixr   data_file_namerK   fpdecompressed_f)r   data_modulegzip_responseread_fnr!   r"   _mock_urlopen_shared  s   

$z>_monkey_patch_webbased_functions.<locals>._mock_urlopen_sharedc                        | |ddS N.jsonrJ   r   r   r   r!   rJ   r   )r   url_prefix_data_descriptionr!   r"   _mock_urlopen_data_description     zH_monkey_patch_webbased_functions.<locals>._mock_urlopen_data_descriptionc                    r   r   r!   r   )r   url_prefix_data_featuresr!   r"   _mock_urlopen_data_features  r   zE_monkey_patch_webbased_functions.<locals>._mock_urlopen_data_featuresc                    r   )Nz.arffr   r!   r   )r   url_prefix_download_datar!   r"   _mock_urlopen_download_data  r   zE_monkey_patch_webbased_functions.<locals>._mock_urlopen_download_datac                    s   |  sJ  | d}t|}|d}| d}t|}W d    n1 s.w   Y  d|v r@td ddd d dt|,}|r[t| }t	|dW  d    S |d}t| }t	|d	W  d    S 1 suw   Y  d S )
Nr   r   r9   error  Simulated mock errorrJ   codemsghdrsr   TF)
r   r   r   r   r:   jsonloadsr   r   r   )rJ   r   r   rK   r   	decoded_s	json_datar   )r   r   r   url_prefix_data_listr!   r"   _mock_urlopen_data_list'  s(   



$zA_monkey_patch_webbased_functions.<locals>._mock_urlopen_data_listc                    sr   |   }| ddk}|r||S |r||S |r)||S |r3 ||S td| )NzAccept-encodingr   zUnknown mocking URL pattern: %s)get_full_url
get_headerr   r@   )requestargskwargsrJ   r   )r   r   r   r   r   r   r   r   r!   r"   _mock_urlopen?  s   







z7_monkey_patch_webbased_functions.<locals>._mock_urlopenurlopen)r   openOPENML_TEST_DATA_MODULEtest_offlinesetattrsklearndatasets_openml)contextr2   r   r   r!   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r"    _monkey_patch_webbased_functions   s"   r   zfeature, expected_dtypestring0)	data_typenumber_of_missing_values1numericrealintegernominalrT   c                 C   s   t | |ksJ d S r   )r   )rz   expected_dtyper!   r!   r"   test_feature_to_dtypeR  s   r   rz   datatimec                 C   sF   d | }tjt|d t|  W d    d S 1 sw   Y  d S )NzUnsupported feature: {}match)r6   pytestraisesr@   r   )rz   r   r!   r!   r"   test_feature_to_dtype_errore  s   

"r   c                 C   sl  t d}|jjj}d}d}d}d}|g d}tjgd }g d}	d	}
t| |d
 t|d
dd}|j	}|j
}|j}t||jsBJ t|j|ksLJ |j|ksSJ t|j|	ks]J t|j|	ksgJ |j|
gksoJ t||jswJ |j|ks~J |j|ksJ |j|
ksJ |jjsJ t||jsJ |j|ksJ t|j||g ksJ |jjsJ d S )Npandas=   )      )r   r      zIris-setosazIris-versicolorzIris-virginicar   )sepallength
sepalwidthpetallength
petalwidthclassTFr2   r5   r3   )r   importorskipapitypesCategoricalDtyper*   rh   r   r   r)   r_   framer]   	DataFramealldtypesr\   columnsr'   r`   Seriesr&   rP   rb   	is_unique)monkeypatchpdr   r2   
data_shapetarget_shapeframe_shapetarget_dtypedata_dtypes
data_namestarget_namebunchr)   r_   r   r!   r!   r"   test_fetch_openml_iris_pandasp  s@   

r	  c                 C   sf   t d d}t| |d t|ddd}|j}|j}t|ddd}|j}|j}t|| t|| d S )Nr   r   TFr   )r   r   r   r   r)   r_   r   r   )r   r2   frame_bunch
frame_dataframe_target
norm_bunch	norm_datanorm_targetr!   r!   r"   /test_fetch_openml_iris_pandas_equal_to_no_frame  s   

r  c                 C   s  t d}|jjj}d}d}d}d}ddg}|g d}tjtjg|g }	g d	}
tjtjg}ddg}t| |d
 t|d
d|d}|j	}|j
}|j}t||jsRJ t|j|	ks\J |j|kscJ t|j|
ksmJ t|j|
kswJ |j|ks~J t||jsJ t|j|ksJ |j|ksJ t|j|ksJ t||jsJ |j|ksJ t|jtjgd |g ksJ d S )Nr   r   )r      )r      r   r   r   r   )r   r   r   TFr2   r5   r3   r4   r   )r   r   r   r   r   r*   rh   r   r   r)   r_   r   r]   r   r   r   r\   r   r'   r`   )r   r   r   r2   r  r  r  r4   	cat_dtyper  r  target_dtypesr`   r  r)   r_   r   r!   r!   r"   )test_fetch_openml_iris_multitarget_pandas  s@   

&r  c                    s  t d}|jjj d}d}d}d}d}d}d}t| |d	 t|d	|d
d}	|	j}
|	j}|	j	}t
|
|js7J |
j|ks>J t fdd|
jD }tdd |
jD }||ksZJ ||ks`J t
||jshJ |j|ksoJ t
|j swJ t
||jsJ |j|ksJ d S )Nr   r  r   )   &   )r  )r  '          TF)r2   r5   r4   r3   c                       g | ]	}t | r|qS r!   r]   r   r&   r   r!   r"   r#         z3test_fetch_openml_anneal_pandas.<locals>.<listcomp>c                 S      g | ]	}|j d kr|qS rK   kindr  r!   r!   r"   r#     r   )r   r   r   r   r   r   r   r)   r_   r   r]   r   r\   rF   r   r   r&   )r   r   r2   r4   r  r  r  expected_data_categoriesexpected_data_floatsr  r)   r_   r   n_categoriesn_floatsr!   r  r"   test_fetch_openml_anneal_pandas  s:   

r)  c                 C   sB  t d}|jjj}d}d}d}d}|g d}|gtjgd  }g d}	d	}
t| |d
 t|d
dd}|j	}|j
}|j}t||jsEJ |j|ksLJ t|j|ksVJ t|j|	ks`J t|j|	ksjJ |j|
gksrJ t||jszJ |j|ksJ |jtjksJ |j|
ksJ t||jsJ |j|ksJ d S )Nr   1  )      )r+  )r+     )adviseramdahlapollobasfbti	burroughszc.r.dcdccambexdecdg	formationz
four-phasegouldhpharris	honeywellibmiplmagnuson	microdatanasncrnixdorfzperkin-elmerprimesiemenssperrysratuswangr  )vendorMYCTMMINMMAXCACHCHMINCHMAXr   TFr   )r   r   r   r   r   r*   rh   r   r   r)   r_   r   r]   r   r\   r   r   r   r'   r`   r   r&   rP   )r   r   r   r2   r  r  r  r  r  r'   r  r  r)   r_   r   r!   r!   r"   test_fetch_openml_cpu_pandas
  s:   

"rP  c                 C   sV   d}t | |d d}tjt|d t|ddd W d    d S 1 s$w   Y  d S )N$  Tz(Cannot return dataframe with sparse datar   Fr   r   r   r   r@   r   r   r2   r   r!   r!   r"   0test_fetch_openml_australian_pandas_error_sparseP  s   "rT  c                 C   sp   t d}d}t| |d t|ddd}t|j|jsJ d}t| |d t|ddd}t|jtjj	s6J d S )Nr   r   TautoFr   rQ  )
r   r   r   r   r]   r)   r   rj   rI   rk   )r   r   r2   r)   r!   r!   r"   test_fetch_openml_as_frame_auto\  s   
rV  c              	   C   s   t d d}t| |d d}t jt|d- tdd t|ddd	 W d    n1 s.w   Y  W d    d S W d    d S 1 sFw   Y  d S )
Nr   _  Tz*Could not adhere to working_memory config.r   gư>)working_memoryFr   )r   r   r   warnsr[   r   r   rS  r!   r!   r"   :test_convert_arff_data_dataframe_warning_low_memory_pandasm  s   
"rZ  c                    s   t d}|jjj d}d}d}d}d}d}t| |d t|dd	dd
\}}	t||js.J |j	|ks5J t
 fdd|jD }
t
dd |jD }||
ksQJ ||ksWJ t|	|js_J |	j	|ksfJ |	j|ksmJ d S )Nr   rW  
      r\  r-  r  r   TF)r2   r5   r3   
return_X_yc                    r  r!   r  r  r  r!   r"   r#     r   zCtest_fetch_openml_adultcensus_pandas_return_X_y.<locals>.<listcomp>c                 S   r!  r"  r#  r  r!   r!   r"   r#     r   )r   r   r   r   r   r   r   r]   r   r\   rF   r   r   rP   )r   r   r2   r  r  r%  r&  r4   Xyr'  r(  r!   r  r"   /test_fetch_openml_adultcensus_pandas_return_X_y|  s*   

rb  c                    s  t d}|jjj d}d}d}d}d}d}d}t| |d	 t|d	d
d}	|	j}
|	j}|	j	}t
|
|js6J |
j|ks=J t fdd|
jD }tdd |
jD }||ksYJ ||ks_J t
||jsgJ |j|ksnJ |j|ksuJ t
||js}J |j|ksJ d S )Nr   rW  r[  r^  )r\     r-  r  r   TFr   c                    r  r!   r  r  r  r!   r"   r#     r   z8test_fetch_openml_adultcensus_pandas.<locals>.<listcomp>c                 S   r!  r"  r#  r  r!   r!   r"   r#     r   )r   r   r   r   r   r   r   r)   r_   r   r]   r   r\   rF   r   r   rP   )r   r   r2   r  r  r  r%  r&  r4   r  r)   r_   r   r'  r(  r!   r  r"   $test_fetch_openml_adultcensus_pandas  s6   

rd  c                    s2  t d}|jjj d}d}d}d}d}d}d}t| |d	 t|d	d
d}	|	j}
|	j}|	j	}t
|
|js6J |
j|ks=J t|
jtjksHJ t
||jsPJ t
|j sXJ |j|ks_J |j|ksfJ t
||jsnJ |j|ksuJ t fdd|jD }tdd |jD }||ksJ ||ksJ d S )Nr     )r,  M   )r,  )r,  N   r      rf  TFr   c                    r  r!   r  r  r  r!   r"   r#     r   z8test_fetch_openml_miceprotein_pandas.<locals>.<listcomp>c                 S   r!  r"  r#  r  r!   r!   r"   r#     r   )r   r   r   r   r   r   r   r)   r_   r   r]   r   r\   r*   r   r   rh   r   r&   rP   rF   )r   r   r2   r  r  r  r4   frame_n_categoriesframe_n_floatsr  r)   r_   r   r'  r(  r!   r  r"   $test_fetch_openml_miceprotein_pandas  s:   

rk  c                    s  t d}|jjj d}g d}d}d}d}d}d}t| |d	 t|d	d
|d}	|	j}
|	j}|	j	}t
|
|js9J |
j|ks@J t
||jsHJ |j|ksOJ t|j|ksYJ t
||jsaJ |j|kshJ t fdd|jD }tdd |jD }||ksJ ||ksJ d S )Nr   鍞  zamazed.suprisedzhappy.pleasedzrelaxing.calmzquiet.stillz
sad.lonelyzangry.aggresive)   H   )rn  r  )rn  rg  r  ro  TFr  c                    r  r!   r  r  r  r!   r"   r#     r   z5test_fetch_openml_emotions_pandas.<locals>.<listcomp>c                 S   r!  r"  r#  r  r!   r!   r"   r#     r   )r   r   r   r   r   r   r   r)   r_   r   r]   r   r\   r*   r   r   rF   r   )r   r   r2   r4   r  r  r  expected_frame_categoriesexpected_frame_floatsr  r)   r_   r   r'  r(  r!   r  r"   !test_fetch_openml_emotions_pandas  s:   

rr  c                    sr  t d}|jjj}d}d}d}d}tjt|ddgtjtjtjttjt|g dttjt|d	d
gd g d} fdd|D }g d}	d}
t| |d t	|ddd}|j
}|j}|j}t||jsfJ |j|ksmJ t|j|	kswJ |j|
gksJ t||jsJ |j|ksJ |j|
ksJ |j |
 ksJ t||jsJ |j|ksJ t|j|ksJ d S )Nr     )  rn  )rt  )rt  r]  femalemale)CQSr   r   )pclassrP   sexagesibspparchticketfarecabinembarkedboatbody	home.destsurvived)rz  r  rP   r{  r|  r}  r~  r  r  r  r  r  r  r  c                    s   g | ]} | qS r!   r!   )r   colname_to_dtyper!   r"   r#   K  s    z4test_fetch_openml_titanic_pandas.<locals>.<listcomp>)rz  rP   r{  r|  r}  r~  r  r  r  r  r  r  r  r  TFr   )r   r   r   r   r   r*   rh   objectr   r   r)   r_   r   r]   r   r\   r   r   r`   r   rP   r&   r   )r   r   r   r2   r  r  r  frame_columnsframe_dtypesr'   r  r  r)   r_   r   r!   r  r"    test_fetch_openml_titanic_pandas   sR   




r  r   Fc                 C   sZ   d}d}t | || d}tjt|d t|ddd W d    d S 1 s&w   Y  d S )Nr   iriszMultiple active versions of the dataset matching the name iris exist. Versions may be fundamentally different, returning version 1.r   F)rP   r5   r3   r   r   rY  r[   r   )r   r   r2   ro   r   r!   r!   r"   test_fetch_openml_irisr  s   "r  c                 C      d}t | |d t| d S )Nr   Fr   rO   r   r2   r!   r!   r"   test_decode_iris     r  c           	      C   sR   d}d}d}ddg}d}d}d}t | || t|||||||tjtjd	d	d
 d S )Nr   r  rh  r   r   r   r  r   Frv   rw   r   r   r*   rh   	r   r   r2   ro   rp   r4   rq   rr   rs   r!   r!   r"   "test_fetch_openml_iris_multitarget  s*   
r  c           	      C   L   d}d}d}d}d}d}d}t | || t|||||||tjtdd	d
 d S )Nr  annealrh  r   r  r    FTr  r   r   r*   rh   r  r  r!   r!   r"   test_fetch_openml_anneal  *   
r  c                 C   r  )Nr  Fr  r  r!   r!   r"   test_decode_anneal  r  r  c           	      C   sP   d}d}d}g d}d}d}d}t | || t|||||||tjtddd	 d S )
Nr  r  rh  )r   zproduct-typer\   r  $   r  Fr  r  r  r!   r!   r"   $test_fetch_openml_anneal_multitarget  s*   
r  c           	      C   sN   d}d}d}d}d}d}d}t | || t|||||||tjtjdd	d
 d S )Nr*  cpurh  r   r+  r,  r   FTr  r  r  r!   r!   r"   test_fetch_openml_cpu  s*   
r  c                 C   r  )Nr*  Fr  r  r!   r!   r"   test_decode_cpu  r  r  c           
      C   s   d}d}d}d}d}d}d}t | || d}	tjt|	d	 tdi |||||||d
tjtdd W d    d S 1 s=w   Y  d S )NrQ  
Australianrh  YU   r]  r   z,Version 1 of dataset Australian is inactive,r   TF)r2   ro   rp   r4   rq   rr   rs   rv   rt   ru   rw   r!   )r   r   rY  r[   r   r*   rh   r  )
r   r   r2   ro   rp   r4   rq   rr   rs   r   r!   r!   r"   test_fetch_openml_australian   s2   "r  c           	      C   r  )NrW  zadult-censusrh  r   r\  r]  r   FTr  r  r  r!   r!   r"   test_fetch_openml_adultcensus"  r  r  c           	      C   sL   d}d}d}d}d}d}d}t | || t|||||||tjtddd	 d S )
Nre  MiceProteinr   r   r,  rf  FTr  r  r  r!   r!   r"   test_fetch_openml_miceprotein=  s*   
r  c           	      C   sP   d}d}d}g d}d}d}d}t | || t|||||||tjtdd	d
 d S )Nrl  emotionsr  rm  rn  ro  r   FTr  r  r  r!   r!   r"   test_fetch_openml_emotions[  s*   
r  c                 C   r  )Nrl  Fr  r  r!   r!   r"   test_decode_emotions}  r  r  c           	      C   st   d}t | || tjjj|}t|d}t||}t	||}t
j|s)J t||}| | ks8J d S )Nr   scikit_learn_data)r   r   r   r   r   r6   r^   mkdirr   r   ospathisfiler   )	r   r   tmpdirr2   openml_pathcache_directory	response1location	response2r!   r!   r"   test_open_openml_url_cache  s   


r  write_to_diskc                    s   d}t jjj|}t|d}t||  fdd}| t jjd| t	j
tdd t|| W d    n1 s=w   Y  tj rJJ d S )Nr   r  c                    sF   rt  d}|d W d    td1 sw   Y  td)Nw Invalid request)r   writer@   )r   r   r   rK   r  r  r!   r"   r     s   
z>test_open_openml_url_unlinks_local_path.<locals>._mock_urlopenr   r  r   )r   r   r   r   r6   r^   r  r   r   r   r   r@   r   r  r  exists)r   r   r  r  r2   r  r  r   r!   r  r"   'test_open_openml_url_unlinks_local_path  s   
r  c                    s   d}t jjj|}t| d}t|| t	tj
  t d}|d W d    n1 s4w   Y  t|| fdd}d}tjt|d | }W d    n1 s[w   Y  |d	ksfJ d S )
Nr   r  r  r  c                      s   t j r
tddS )NzFile exist!rh  )r  r  r  	Exceptionr!   r  r!   r"   
_load_data  s   z/test_retry_with_clean_cache.<locals>._load_dataz!Invalid cache, redownloading filer   rh  )r   r   r   r   r6   r^   r  r   r  makedirsr  dirnamer   r  r   r   rY  RuntimeWarning)r  r2   r  r  rK   r  warn_msgr/   r!   r  r"   test_retry_with_clean_cache  s   
r  c                 C   sr   d}t jjj|}t| d}t||dd }d}tj	t
|d |  W d    d S 1 s2w   Y  d S )Nr   r  c                   S   s   t d ddd d d)Nr   r   r   r   r!   r!   r!   r"   r    s   
z:test_retry_with_clean_cache_http_error.<locals>._load_datar   r   )r   r   r   r   r6   r^   r  r   r   r   r   )r  r2   r  r  r  	error_msgr!   r!   r"   &test_retry_with_clean_cache_http_error  s   
"r  c           
      C   s   dd }d}t |d}t| || t|d|ddd\}}| tjjd| t|d|ddd\}}	tj	
|| tj	
||	 d S )	Nc                 _   s   t d|   )NzhThis mechanism intends to test correct cachehandling. As such, urlopen should never be accessed. URL: %s)r@   r   r   r   r   r!   r!   r"   _mock_urlopen_raise  s
   z4test_fetch_openml_cache.<locals>._mock_urlopen_raiser  r  TF)r2   r3   r   r_  r5   r   )r^   r  r   r   r   r   r   r   r*   rG   r   )
r   r   r  r  r2   r  	X_fetched	y_fetchedX_cachedy_cachedr!   r!   r"   test_fetch_openml_cache  s*   

r  c                 C   sR   d}d }d}d}t | || t||ddd}|jj||fks J |jd u s'J d S )Nr   r   r   Fr2   r4   r3   r5   )r   r   r)   r\   r_   )r   r   r2   r4   rq   rr   r)   r!   r!   r"   test_fetch_openml_notarget  s   r  c                 C   s   d}t | || d}tjt|d t|ddd}W d    n1 s#w   Y  |jjdks0J tjt|d td ddddd	}W d    n1 sKw   Y  t|jd
 |ks[J d S )N  z(Version 1 of dataset glass2 is inactive,r   FrV   )   	   glass2rh  )r2   rP   r3   rQ   r5   rR   )	r   r   rY  r[   r   r)   r\   r   rW   )r   r   r2   r   glas2glas2_by_versionr!   r!   r"   test_fetch_openml_inactive  s   
r  c                 C   sT   d}t | || d}tjt|d tddd W d    d S 1 s#w   Y  d S )Nr  zNo active dataset glass2 foundr   r  F)rP   r3   rR  r   r   r2   r   r!   r!   r"   test_fetch_nonexiting  s   "r  c                 C   s^   d}ddg}t | || d}tjt|d t||dd W d    d S 1 s(w   Y  d S )Nr   r   r   z2Can only handle homogeneous multi-target datasets,r   Fr2   r4   r3   rR  )r   r   r2   targetsr   r!   r!   r"   test_raises_illegal_multitarget   s   "r  c                 C   sV  d}d}d}t | || d}||}tjt|d t||ddd W d    n1 s-w   Y  d}||}tjt|d t||ddd W d    n1 sSw   Y  d}||}tjt|d t||d	gddd W d    n1 s{w   Y  d}||}tjt|d t||d	gddd W d    d S 1 sw   Y  d S )
Nre  z,target_column={} has flag is_row_identifier.z$target_column={} has flag is_ignore.MouseIDr   Fr  Genotyper   )r   r6   r   rY  r[   r   )r   r   r2   expected_row_id_msgexpected_ignore_msg
target_colr   r!   r!   r"   test_warn_ignore_attribute+  sL   



"r  c                 C   V   d}t | || d}tjt|d t|ddd W d    d S 1 s$w   Y  d S )Nrs  zOSTRING attributes are not supported for array representation. Try as_frame=Truer   FrV   rR  r  r!   r!   r"   'test_string_attribute_without_dataframeS  s   "r  c                 C   r  )Nrh  zJOpenML registered a problem with the dataset. It might be unusable. Error:r   FrV   r  r  r!   r!   r"   test_dataset_with_openml_error`     "r  c                 C   r  )Nr  zFOpenML raised a warning on the dataset. It might be unusable. Warning:r   FrV   r  r  r!   r!   r"    test_dataset_with_openml_warningi  r  r  c                 C   s   d}t | || d}tjt|d t|ddd W d    n1 s#w   Y  tjt|d t|ddgdd W d    d S 1 sDw   Y  d S )Nr   zCould not find target_column=r   	undefinedFr  r   )r   r   r   KeyErrorr   r  r!   r!   r"   test_illegal_columnr  s   "r  c                 C   sT   d}t | || d}tjt|d t|dd W d    d S 1 s#w   Y  d S )Nr  zTarget column r   family)r2   r4   rR  r  r!   r!   r"   .test_fetch_openml_raises_missing_values_target~  s   "r  c                  C   s   d} t jt| d tdd dd W d    n1 sw   Y  d} t jt| d tddd W d    n1 s:w   Y  t jt| d tdddd W d    n1 sXw   Y  d	} t jt| d t  W d    d S 1 suw   Y  d S )
Nz?Dataset data_id=-1 and version=version passed, but you can onlyr   r   rQ   )r2   rP   rQ   z9Dataset data_id=-1 and name=name passed, but you can onlynAmE)r2   rP   zFNeither name nor data_id are provided. Please provide name or data_id.)r   r   r@   r   )r   r!   r!   r"   )test_fetch_openml_raises_illegal_argument  s   "r  c                 C   sT   d}t | || tjj|ddd}|d usJ |d jdks J d|d vs(J d S )N>   FrV   r)   )e      animalr'   )r   r   r   r   r\   )r   r   r2   datasetr!   r!   r"   &test_fetch_openml_with_ignored_feature  s   r  r5   c                    s>  |rt d d}t| |d td d|  }d}|d  t||}t|d}t|	 }	d	|	t
|	d
 < W d    n1 sCw   Y  t d}
|
|	 W d    n1 s^w   Y  tjjj fdd}| tjjd| t t}tjj|d|d W d    n1 sw   Y  |dsJ d S )Nr   r  Tr   r   zdata-v1-dl-1666876.arff.gzztest_invalid_checksum.arffr   %   rh  wbc                    s\   |   }|dr*t d}| }W d    n1 sw   Y  tt|ddS | S )Nzdata/v1/download/1666876r   Tr   )r   endswithr   r   r   r   )r   r   r   rJ   rK   corrupted_datacorrupt_copy_pathmocked_openml_urlr!   r"   swap_file_mock  s   

z9test_fetch_openml_verify_checksum.<locals>.swap_file_mockr   FrV   1666876)r   r   r   r   r   r   r   r   	bytearrayr   rF   GzipFiler  r   r   r   r   r   r   r@   r   r   )r   r5   r3   r  r2   original_data_moduleoriginal_data_file_name	orig_file	orig_gzipr)   modified_gzipr  excr!   r  r"   !test_fetch_openml_verify_checksum  s2   

	r  c                  C   s   t d dd tdD ddg d} d}t jt|d t| d	gd	gd d
 W d    n1 s1w   Y  ttdddg d} d}t jt|d t| dgi  W d    d S 1 s]w   Y  d S )Nr   c                 s   s    | ]}|V  qd S r   r!   )r   elr!   r!   r"   r<     s    z.test_convert_arff_data_type.<locals>.<genexpr>r  r  )r)   descriptionrelation
attributesz8shape must be provided when arr\['data'\] is a Generatorr   r   )r\   zBarff\['data'\] must be a generator when converting to pd.DataFramea)r   r   rE   r   r@   r   rD   r   )arffr   r!   r!   r"   test_convert_arff_data_type  s   
"r  c                 C   s^   t d d}t| |d t|ddd}|jjd }|jd   s%J t|j	g d dS )	zRcheck that missing values in categories are compatible with pandas
    categoricalr   iY  TFrV   r{  )FEMALEMALE_N)
r   r   r   r   r)   r   isnaanyr   r(   )r   r2   penguinsr  r!   r!   r"   test_missing_values_pandas  s   
r  c              	   C   s   dd }|  tjjd| d}tjttdt	|  dd/}tj
tdd t|d d	d
 W d    n1 s9w   Y  t|dksFJ W d    d S 1 sQw   Y  d S )Nc                 _   s   t dddd d )Nr  i  Simulated network errorr   r  r!   r!   r"   _mock_urlopen_network_error  s   zPtest_open_openml_url_retry_on_network_error.<locals>._mock_urlopen_network_errorr   zinvalid-urlz+A network error occurred while downloading z. Retrying...r   r  r   )delayr  )r   r   r   r   r   rY  r[   r   escaper	   r   r   r   rF   )r   r  invalid_openml_urlrecordr!   r!   r"   +test_open_openml_url_retry_on_network_error  s&   
"r!  )j__doc__r   rX   r   r  r   	importlibr   ior   numpyr*   scipy.sparserj   r   r   r   sklearn.datasetsr   fetch_openml_origsklearn.datasets._openmlr   r   r   r	   r
   r   r   sklearn.datasets._arff_parserr   r   r   sklearn.utilsr   sklearn.utils._testingr   r   urllib.errorr   "sklearn.datasets.tests.test_commonr   sklearn.externals._arffr   	functoolsr   r   r   r   rO   r   r   r   markparametrizer  rh   int64r   r   r	  r  r  r)  rP  rT  rV  rZ  rb  rd  rk  rr  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r!  r!   r!   r!   r"   <module>   s   $	1_s


,

,
)
E



'
+
0
Q





!


!

!






'





,