o
    tBh                  	   @   s  d Z ddlZddlmZ ddlZddlZddlmZ ddlZ	ddl
mZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZ ddlmZ ddlmZ ddlmZmZ ddlmZmZm Z m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z'm(Z(m)Z)m*Z* ddlm+Z+ ddl,m-Z- ddl.m/Z/m0Z0m1Z1 ddl2m3Z3 ddlm4Z4 ddl5m6Z6m7Z7 dd Z8dd Z9dd Z:dd  Z;d!d" Z<d#d$ Z=ej>?d%d&d'gej>?d(d)d*gej>?d+g d,d-d. Z@d/d0 ZAd1d2 ZBd3d4 ZCd5d6 ZDd7d8 ZEd9d: ZFej>?d;eGd<d=d> ZHej>Id?ej>?d@edAdB ZJdCdD ZKdEdF ZLdGdH ZMdIdJ ZNdKdL ZOdMdN ZPdOdP ZQdQdR ZRdSdT ZSdUdV ZTdWdX ZUdYdZ ZVej>?d+g d[d\d] ZWd^d_ ZXd`da ZYej>?d+g d[ej>?dbdcdddgfdedddgfdfddgfgdgdh ZZdidj Z[dkdl Z\dmdn Z]dS )oz=
Several basic tests for hierarchical clustering procedures

    N)mkdtemp)partial)sparse)	hierarchy)connected_components)adjusted_rand_score)METRICS_DEFAULT_PARAMS)assert_almost_equalcreate_memmap_backed_data)assert_array_almost_equal)ignore_warnings)	ward_tree)AgglomerativeClusteringFeatureAgglomeration)_hc_cut_TREE_BUILDERSlinkage_tree_fix_connectivity)grid_to_graph)DistanceMetric)PAIRED_DISTANCEScosine_distancesmanhattan_distancespairwise_distances)normalized_mutual_info_score)kneighbors_graph)average_merge	max_mergemst_linkage_core)IntFloatDict)assert_array_equal)
make_moonsmake_circlesc                  C   s(  t jd} | jdd}tt tdd| W d    n1 s$w   Y  tt t	|dd W d    n1 s?w   Y  tt t	|t 
dd W d    n1 s]w   Y  t | t|}t	|dd	}t|d
 t	|dd	d
  t	|td	}t|d
 t	|dd	d
  d S )N*   )   r$   sizefoo)linkage   r*   connectivityprecomputedaffinityr   cosine	manhattan)nprandomRandomStatenormalpytestraises
ValueErrorr   fitr   onesr   r   r    r   )rngXdisres r?   ~/var/www/html/riverr-enterprise-integrations-main/venv/lib/python3.10/site-packages/sklearn/cluster/tests/test_hierarchical.pytest_linkage_misc5   s"   rA   c            
   	   C   s  t jd} t jddgtd}d|ddddf< | dd}t|j }t	 D ]_}||j
|d\}}}}d	|jd
  d
 }	t|| |	ksHJ tt ||j
t dd W d    n1 sbw   Y  tt ||j
d d |d W d    n1 sw   Y  q(d S )Nr   
   dtyper*      2   d   r+         r)   )r2   r3   r4   r:   boolrandnr   shaper   valuesTlenr6   r7   r8   )
r;   maskr<   r,   tree_builderchildrenn_componentsn_leavesparentn_nodesr?   r?   r@   test_structured_linkage_treeP   s&   
rW   c                  C   sf  t jd} | dd}||d fD ]H}t ( tt t|j	dd\}}}}W d    n1 s3w   Y  W d    n1 sBw   Y  d|j
d  d }t|| |ksZJ qt D ]Q}||d fD ]H}t ( tt ||j	dd\}}}}W d    n1 sw   Y  W d    n1 sw   Y  d|j
d  d }t|| |ksJ qgq_d S )Nr   rF   rG   rB   
n_clustersrH   rI   )r2   r3   r4   rK   r   r6   warnsUserWarningr   rN   rL   rO   r   rM   )r;   r<   this_XrR   rV   rT   rU   rQ   r?   r?   r@   test_unstructured_linkage_treeg   s2   r]   c            	      C   s   t jd} t jddgtd}| dd}t|j }t	 D ] }||j
|d\}}}}d|jd  d }t|| |ks>J qd S )	Nr   rB   rC   rF   rG   r+   rH   rI   )r2   r3   r4   r:   rJ   rK   r   rL   r   rM   rN   rO   )	r;   rP   r<   r,   linkage_funcrR   rV   rT   rU   r?   r?   r@   test_height_linkage_tree   s   
r_   c                  C   sd   t jd} d}| |d}d}t|d}tt || W d    d S 1 s+w   Y  d S )Nr   rG   rF   r$   )memory)	r2   r3   r4   rK   r   r6   r7   r8   r9   )r;   	n_samplesr<   r`   
clusteringr?   r?   r@   .test_agglomerative_clustering_wrong_arg_memory   s   
"rc   c                  C   sZ   t ddgddgg} d}tjt|d t| dd W d    d S 1 s&w   Y  d S )Nr   rI   z;Cosine affinity cannot be used when X contains zero vectorsmatchr0   r.   )r2   arrayr6   r7   r8   r   )r<   msgr?   r?   r@   test_zero_cosine_linkage_tree   s
   "rh   zn_clusters, distance_threshold)N      ?)rB   Ncompute_distancesTFr(   wardcompleteaveragesinglec                 C   s   t jd}t jddgtd}d}||d}t|j }t| ||||d}	|		| |s0|d urNt
|	ds7J |	jjd }
|
d }|	jj|d fksLJ d S t
|	drUJ d S )	Nr   rB   rC   rG   rF   )rY   r,   r(   distance_thresholdrj   
distances_rI   )r2   r3   r4   r:   rJ   rK   r   rL   r   r9   hasattr	children_rq   )rY   rj   rp   r(   r;   rP   ra   r<   r,   rb   
n_childrenrV   r?   r?   r@   'test_agglomerative_clustering_distances   s&   

ru   c               
   C   s^  t jd} t jddgtd}d}| |d}t|j }dD ]}td||d}|	| z&t
 }td|||d}|	| |j}t t |dksLJ W t| nt| w td||d}d	|_|	| tt|j|d
 d |_|	| t t |jdksJ tdt| d dd df |d}tt |	| W d    n1 sw   Y  qtd| ddd}tt |	| W d    n1 sw   Y  t D ]+}	tdt ||f|	dd}|	| tdd |	dd}
|
	| tt|
j|jd
 qtd|dd}|	| t|}td|ddd}
|
	| t|j|
j d S )Nr   rB   rC   rG   rF   rk   rY   r,   r(   )rY   r,   r`   r(   FrI   r1   rl   )rY   r,   r/   r(   rm   r-   )r2   r3   r4   r:   rJ   rK   r   rL   r   r9   r   labels_r&   uniqueshutilrmtreecompute_full_treer	   r   r,   r   
lil_matrixtoarrayr6   r7   r8   r   keysr   r    )r;   rP   ra   r<   r,   r(   rb   tempdirlabelsr/   clustering2X_distr?   r?   r@   test_agglomerative_clustering   s   








r   c                  C   s2   t jd} t| dd}tddd| dS )zhAgglomerativeClustering must work on mem-mapped dataset.

    Non-regression test for issue #19875.
    r   rF   rG   	euclideanro   r/   r(   N)r2   r3   r4   r
   rK   r   r9   )r;   Xmmr?   r?   r@   +test_agglomerative_clustering_memory_mapped  s   r   c                  C   s   t jd} t jddgtd}| dd}t|j }td|d}|	| t 
t |jdks2J ||}|jd dks@J ||}t |d j
dksQJ t||| tt |	|d d  W d    d S 1 ssw   Y  d S )	Nr   rB   rC   rF   rG   r$   rY   r,   rI   )r2   r3   r4   r:   rJ   rK   r   rL   r   r9   r&   rx   rw   	transforminverse_transformr   r6   r7   r8   )r;   rP   r<   r,   aggloX_redX_fullr?   r?   r@   test_ward_agglomeration)  s   



"r   c                  C   sv   t ddd\} }tddd}||  tt|j|d tdd	dd
\}}tddd}|| tt|j|d d S )Ng?r#   )noiserandom_staterH   ro   )rY   r(   rI   ri   g?)factorr   r   )r!   r   r9   r	   r   rw   r"   )moonsmoon_labelsrb   circlescircle_labelsr?   r?   r@   test_single_linkage_clustering>  s   

r   c                 C   sv   g }| |fD ]&}t |}| d }t||f}d|t||f< |t||j q|d |d k s9J dS )zUtil for comparison with scipyrI   r   N)	rO   maxr2   zerosarangeappenddotrN   all)cut1cut2co_clustcutnkecutr?   r?   r@   assess_same_labellingO  s   r   c                  C   sT  d\} }}t jd}t | | f}t D ]s}tdD ]l}d|j| |fd }|dt | d d t j	f  8 }||j
ddd d t j	f 8 }tj||d	}|d d d d
f jtdd}	t| ||d\}
}}}|
jdd t|
|	d|  t||
|}t||	|}t|| qqtt t|d |
| W d    d S 1 sw   Y  d S )NrB   r$      r   r$   皙?r%         @rI   axismethodrH   Fcopyr+   z2linkage tree differs from scipy impl for linkage: )r2   r3   r4   r:   r   r~   ranger5   r   newaxismeanr   r(   astypeintsortr    r   r   r6   r7   r8   )r   pr   r;   r,   r(   ir<   outrs   rR   _rT   r   cut_r?   r?   r@   test_sparse_scikit_vs_scipy[  s4   
 "r   seedr$   c                 C   s   d\}}}t j| }d|j||fd }|dt |d d t jf  8 }||jddd d t jf 8 }tj|dd}|d d d d	f 	t
}td |\}}	}
}	|jdd t||d
 t|||
}t|||
}t|| d S )Nr   r   r%   r   rI   r   ro   r   rH   z8linkage tree differs from scipy impl for single linkage.)r2   r3   r4   r5   r   r   r   r   r(   r   r   r   r   r    r   r   )r   ra   
n_featuresrY   r;   r<   r   children_scipyrR   r   rT   r   	cut_scipyr?   r?   r@   )test_vector_scikit_single_vs_scipy_single  s"   
 r   z/ignore:WMinkowskiDistance:FutureWarning:sklearnmetric_param_gridc                 C   s   t jjdd}|jdd}t|}| \}}| }tj|  D ]#}t	t
||}tj|fi |}	t||	}
t||	}t j|
| q dS )zoThe MST-LINKAGE-CORE algorithm must work on mem-mapped dataset.

    Non-regression test for issue #19875.
    rI   )r   )   r*   r%   N)r2   r3   r4   r5   r
   r~   	itertoolsproductrM   dictzipr   
get_metricr   testingassert_equal)r   r;   r<   r   metric
param_gridr~   valskwargsdistance_metricmstmst_mmr?   r?   r@   #test_mst_linkage_core_memory_mapped  s   

r   c               	   C   s   t g dg dg dg dg dg dg} t g d}t| ddd}d||j  }t| |d	\}}d
D ]}td||d}||  tt|j	|d q6d S )N)r   r   r   )rI   rI   rI   )rH   rH   rH   )r   r   rI   rI   rH   rH   r   Fn_neighborsinclude_selfri   r   )ro   rn   rn   rl   )rY   r(   r,   rI   )
r2   rf   r   rN   r   r   r9   r	   r   rw   )r<   true_labelsr,   rS   r(   rb   r?   r?   r@   test_identical_points  s   .
r   c                  C   s8   t g d} t| ddd}td|dd}||  d S )N))y&1?gQ?)r   gMbX?)r   gEԸ?g rh?/$?r   ;On?r   r   r   r   r   r   r   )r   g~jt?)r   gOn?)r   g;On?rB   Fr   r*   rl   rv   )r2   rf   r   r   r9   )r<   r,   rl   r?   r?   r@   test_connectivity_propagation  s   r   c                  C   s   d\} }t jd}t | | f}tdD ]>}d|j| |fd }|dt | d d t jf  8 }||jddd d t jf 8 }t	|}t	||d	}t
|d |d  qd S )
NrB   r$   r   r$   r   r%   r   rI   r   r+   )r2   r3   r4   r:   r   r5   r   r   r   r   r    )r   r   r;   r,   r   r<   out_unstructuredout_structuredr?   r?   r@   test_ward_tree_children_order  s    r   c               	   C   s8  d\} }t jd}t | | f}tdD ]}d|j| |fd }|dt | d d t jf  8 }||jddd d t jf 8 }t	|d	d
}t	||d	d}|d }|d }	t
||	 |d }
|d }t|
| dD ]/}t|||d	dd }t||d	dd }|d }|d }|d }|d }t|| t|| qiqt ddgddgddgddgddgddgg}t g dg dg dg dg d g}t g dg dg dg d!g d"g}t g dg dg dg d#g d$g}t |\}}t ||f}t	|d	d
}t	||d	d}t
|d d d d%f |d  t
|d d d d%f |d  t|d d d%f |d&  t|d d d%f |d&  g d'}||g}t||D ]L\}}t|d	|d(}t|||d	d}t
|d d d d%f |d  t
|d d d d%f |d  t|d d d%f |d&  t|d d d%f |d&  qMd S ))Nr   r   r$   r   r%   r   rI   r   T)return_distance)r,   r   )rn   rm   ro   )r,   r(   r   )r(   r   gя?geGgw7@g})J@gZ!E@gn]#g!܄@g,8g!Yz @gRա&<agڎF@gT!@)g      @r   g0rq5?       @)      ?g      @gAVJS?r   )g        r   gL/u@r   )      @       @g6SHD4"@r   )      @      "@gwʴG8@r   )r   r   gwfۣ@r   )r   r   g63C2@r   )r   r   go;@r   )r   r   g_ .@r   rH   r*   )rm   rn   ro   )r   r(   )r2   r3   r4   r:   r   r5   r   r   r   r   r    r   r   rf   rL   r   )r   r   r;   r,   r   r<   r   r   children_unstructuredchildren_structureddist_unstructureddist_structuredr(   structured_itemsunstructured_itemsstructured_distunstructured_diststructured_childrenunstructured_childrenlinkage_X_wardlinkage_X_completelinkage_X_averagera   r   connectivity_Xout_X_unstructuredout_X_structuredlinkage_optionsX_linkage_truthX_truthr?   r?   r@   &test_ward_linkage_tree_return_distance  s    





r   c                  C   s   t ddgddgg} t ddgddgg}tdd|d}t|dd}tt ||  W d    d S 1 s9w   Y  d S )	Nr   rI   TFrH   )n_xn_yrP   rl   r,   r(   )r2   rf   r   r   r6   rZ   r[   r9   )xmcwr?   r?   r@    test_connectivity_fixing_non_lilo  s   "r  c            	      C   s   t jd} t | jdddjt jdd}| t|}t	||}t
||D ]\}}|| |ks3J q't jdt jdd d d	 }t dd
d d d	 }t	||}t||t jdt jdddd t||t jdt jdddd d S )Nr   rG   rB   r%   Fr   rF   rC   rH   ri   rI   )rP   n_an_b)r2   r3   r4   rx   randintr   intprandrO   r   r   r   fullr   r:   r   )	r;   r~   rM   dkeyvalue
other_keysother_valuesotherr?   r?   r@   test_int_float_dict|  s    

"r  c                  C   sj   t jd} | dd}t|ddd}t|d}tttdddd}|| || t|j	|j	 d S )	Nr   r   r$   r   Fr   r+   r   )
r2   r3   r4   r	  r   r   r   r9   r    rw   )r;   r<   r,   aglc1aglc2r?   r?   r@   test_connectivity_callable  s   


r  c                  C   sn   t jd} | dd}t|ddd}t|ddd}t|d}t|d}|| || t|j|j d S )	Nr   r   r$   r   Fr   Tr+   )	r2   r3   r4   r	  r   r   r9   r    rw   )r;   r<   r,   connectivity_include_selfr  r  r?   r?   r@   "test_connectivity_ignores_diagonal  s   



r  c                  C   s   t jd} | dd}t|ddd}td|d}|| |jd }|jjd }||d ks1J d	}| d
d}t|ddd}t||d}|| |jd }|jjd }||| ks^J d S )Nr   rB   rH   r$   Fr   r   rI   e      )	r2   r3   r4   rK   r   r   r9   rL   rs   )r;   r<   r,   agcra   rV   rY   r?   r?   r@   test_compute_full_tree  s    



r  c                  C   sP   t jd} | dd}t d}t D ]}t|||dd dks%J qd S )Nr   r$   r+   rI   )r2   r3   r4   r	  eyer   rM   r   )r;   r<   r,   r^   r?   r?   r@   test_n_components  s   
r  c               	   C   sv   t jd} | dd}dD ]*}t|d}dt|j }tjt	|d |
| W d    n1 s3w   Y  qd S )Nr   r   rB   )r   r   rX   z@n_clusters should be an integer greater than 0. %s was provided.rd   )r2   r3   r4   r	  r   strrY   r6   r7   r8   r9   )r;   r<   n_clusr  rg   r?   r?   r@   test_agg_n_clusters  s   
r  c                  C   sr   d} t jd}|| | }t g d}t| | |t jd}G dd d}| }t|||jd |j	dks7J d S )	NrH   r   )TFFT)r   r   rP   	return_asc                   @   s   e Zd Zdd Zdd ZdS )z>test_affinity_passed_to_fix_connectivity.<locals>.FakeAffinityc                 S   s
   d| _ d S )Nr   counter)selfr?   r?   r@   __init__  s   
zGtest_affinity_passed_to_fix_connectivity.<locals>.FakeAffinity.__init__c                 _   s   |  j d7  _ | j S )NrI   r!  )r#  argsr   r?   r?   r@   	increment  s   zHtest_affinity_passed_to_fix_connectivity.<locals>.FakeAffinity.incrementN)__name__
__module____qualname__r$  r&  r?   r?   r?   r@   FakeAffinity  s    r*  )r,   r/   r   )
r2   r3   r4   rK   rf   r   ndarrayr   r&  r"  )r&   r;   r<   rP   r,   r*  far?   r?   r@   (test_affinity_passed_to_fix_connectivity  s   r-  )rl   rm   rn   c                 C   s   t jd}t jddgtd}d}||d}t|j }d}d |fD ]I}td ||| d}|	| |j
}	tt |j
}
t|  }|||d dd\}}}}}t ||kd	 }||
ks\J t|
||d
}t |	|skJ q"d S )Nr   rB   rC   rG   rF   )rY   rp   r,   r(   T)r,   rY   r   rI   )rY   rR   rT   )r2   r3   r4   r:   rJ   rK   r   rL   r   r9   rw   rO   rx   r   count_nonzeror   array_equiv)r(   r;   rP   ra   r<   r,   rp   connrb   clusters_producednum_clusters_producedrQ   rR   rS   rT   rU   	distancesnum_clusters_at_thresholdclusters_at_thresholdr?   r?   r@   5test_agglomerative_clustering_with_distance_threshold  s8   

r6  c                  C   sx   t jd} d}| jdd|dfd}td ddd	|}t|d
dd}t |t j t 	|dks3J |j
|ks:J d S )Nr   rB   ii,  r   r%   r   ro   rY   rp   r(   	minkowskirH   r   r   r   )r2   r3   r4   r  r   r9   r   fill_diagonalinfr   n_clusters_)r;   ra   r<   rb   all_distancesr?   r?   r@   test_small_distance_threshold  s   r>  c                  C   s   t jd} d}| jdd|dfd}d}td |dd	|}|j}t|d
dd}t |t j	 t 
|D ]9}||k}|| d d |f jdd }	|| d d | f jdd }
| dkrg|	|k sgJ |
|ksmJ q4d S )Nr   rG   irB   r   r%   r*   ro   r7  r8  rH   r9  r   rI   )r2   r3   r4   r  r   r9   rw   r   r:  r;  rx   minr   sum)r;   ra   r<   rp   rb   r   Dlabelin_cluster_maskmax_in_cluster_distancemin_out_cluster_distancer?   r?   r@   .test_cluster_distances_with_distance_threshold,  s,    rF  )	thresholdy_trueri   rI   r   g      ?c                 C   s:   dgdgg}t d || d}||}t||dksJ d S )Nr   rI   r7  )r   fit_predictr   )r(   rG  rH  r<   	clusterery_predr?   r?   r@   ?test_agglomerative_clustering_with_distance_threshold_edge_caseG  s   
rL  c                  C   s   dgdgg} t jtdd td d d|  W d    n1 s!w   Y  t jtdd tddd|  W d    n1 sAw   Y  dgdgg} t jtdd td ddd	|  W d    d S 1 siw   Y  d S )
Nr   rI   zExactly one of rd   )rY   rp   rH   z!compute_full_tree must be True ifF)rY   rp   r{   )r6   r7   r8   r   r9   )r<   r?   r?   r@   &test_dist_threshold_invalid_parametersW  s   
"rM  c                  C   s^   t jd} | dd}tjtdd tddd| W d    d S 1 s(w   Y  d S )	Nr   r$   r   z>Distance matrix should be square, got matrix of shape \(5, 3\)rd   r-   rm   r   )	r2   r3   r4   r	  r6   r7   r8   r   r9   )r;   r<   r?   r?   r@   *test_invalid_shape_precomputed_dist_matrixf  s   "rN  c                  C   s
  t g dg dg dg dg dg} t| d dksJ t jd}|dd}t|}td	| d
d}d}tj	t
|d || W d   n1 sNw   Y  t| d
d}tj	t
|d || W d   n1 spw   Y  t|j|j t|j|j dS )zCheck that connecting components works when connectivity and
    affinity are both precomputed and the number of connected components is
    greater than 1. Non-regression test for #16151.
    )r   rI   rI   r   r   )r   r   rI   r   r   )r   r   r   r   r   )r   r   r   r   rI   r   rH   r$   rB   r-   rm   )r/   r,   r(   z.Completing it to avoid stopping the tree earlyrd   Nr   )r2   rf   r   r3   r4   rK   r   r   r6   rZ   r[   r9   r    rw   rs   )connectivity_matrixr;   r<   r   clusterer_precomputedrg   rJ  r?   r?   r@   Btest_precomputed_connectivity_affinity_with_2_connected_componentsr  s6   
rQ  )^__doc__r   tempfiler   ry   r6   	functoolsr   numpyr2   scipyr   scipy.clusterr   scipy.sparse.csgraphr   sklearn.metrics.clusterr   'sklearn.metrics.tests.test_dist_metricsr   sklearn.utils._testingr	   r
   r   r   sklearn.clusterr   r   r   sklearn.cluster._agglomerativer   r   r   r    sklearn.feature_extraction.imager   sklearn.metricsr   sklearn.metrics.pairwiser   r   r   r   r   sklearn.neighborsr   "sklearn.cluster._hierarchical_fastr   r   r   sklearn.utils._fast_dictr   r    sklearn.datasetsr!   r"   rA   rW   r]   r_   rc   rh   markparametrizeru   r   r   r   r   r   r   r   r   filterwarningsr   r   r   r   r   r  r  r  r  r  r  r  r-  r6  r>  rF  rL  rM  rN  rQ  r?   r?   r?   r@   <module>   s    	^
(

v
&"