o
    tBhG                  
   @   s  d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlm	Z	 d dlm
Z
 d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZ eeeeeeegZdd Zdd Z dd Z!dd Z"dd Z#dd  Z$d!d" Z%d#d$ Z&d\d'd(Z'd)d* Z(d+d, Z)d-d. Z*d/d0 Z+d1d2 Z,d3d4 Z-d5d6 Z.d7d8 Z/d]d:d;Z0d<d= Z1d>d? Z2ej34d@dAgdB g dCfdDgdB g dCfg dCdAgdB fg dCdDgdB fdAgdB dAgdB fgdEdF Z5dGdH Z6dIdJ Z7dKdL Z8dMdN Z9ej34dOe:e;dPe:e;dPfe<dQe<dQfgdRdS Z=dTdU Z>dVdW Z?ej34dXg dYdZd[ Z@dS )^    N)adjusted_mutual_info_score)adjusted_rand_score)
rand_score)completeness_score)contingency_matrix)pair_confusion_matrix)entropy)expected_mutual_information)fowlkes_mallows_score)"homogeneity_completeness_v_measure)homogeneity_scoremutual_info_scorenormalized_mutual_info_score)v_measure_score_generalized_average)check_clusterings)assert_all_finite)assert_almost_equal)assert_array_equalassert_array_almost_equalassert_allclosec               	   C   s   t D ]p} d}tjt|d | ddgg d W d    n1 s!w   Y  d}tjt|d | ddgddggg d W d    n1 sGw   Y  d}tjt|d | g dddgddgg W d    n1 smw   Y  qd S )	NzDFound input variables with inconsistent numbers of samples: \[2, 3\]matchr      )r   r   r   z$labels_true must be 1D: shape is \(2z$labels_pred must be 1D: shape is \(2r   r   r   )score_funcspytestraises
ValueError)
score_funcexpected r$   /var/www/html/riverr-enterprise-integrations-main/venv/lib/python3.10/site-packages/sklearn/metrics/cluster/tests/test_supervised.py"test_error_messages_on_wrong_input'   s    r&   c                     s   d\ g d}  fdd| D }|d |d   kr*|d   kr*|d ks-J  J d	\fd
d| D }|d |d   krS|d   krS|d ksVJ  J d S )N)r      min	geometric
arithmeticmaxc                       g | ]}t  |qS r$   r   .0method)abr$   r%   
<listcomp>;       z,test_generalized_average.<locals>.<listcomp>r   r   r'      )   r6   c                    r-   r$   r   r.   )cdr$   r%   r3   >   r4   r$   )methodsmeansr$   )r1   r2   r7   r8   r%   test_generalized_average8   s   6:r;   c                  C   s  t D ]l} | g g tdksJ | dgdgtdksJ | g dg dtdks.J | g dg dtdks>J | g dg dtdksNJ | g d	g d
tdks^J | g dg dtdksnJ qttg}h d}|D ]} |D ]z}| g g |dtdksJ | dgdg|dtdksJ | g dg d|dtdksJ | g dg d|dtdksJ | g dg d|dtdksJ | g d	g d
|dtdksJ | g dg d|dtdksJ q}qyd S )N      ?r   r   )r   r   r   r   )*      r=   )        r<   r?   )      E@      @r@   )r?   r<          @)r@   rA   rB   )r   r   r'   )r=   r>   r'   >   r,   r)   r*   r+   average_method)r   r   approxr   r   )r"   score_funcs_with_changing_meansr:   meanr$   r$   r%   test_perfect_matchesB   sV       " 
rH   c                  C   @   t g dg d\} }}t| dd t|dd t|dd d S )Nr   r   r   r   r   r   )r   r   r   r   r'   r'   r<   r'   gGz?gQ?r   r   hr7   vr$   r$   r%   *test_homogeneous_but_not_complete_labelinge      rO   c                  C   rI   )Nr   r   r   r   r'   r'   )r   r   r   r   r   r   g(\?r'   r<   g\(\?rK   rL   r$   r$   r%   *test_complete_but_not_homogeneous_labelingm   rP   rR   c                  C   rI   )NrJ   r   r   r   r   r'   r'   q=
ףp?r'   zG?p=
ף?rK   rL   r$   r$   r%   .test_not_complete_and_not_homogeneous_labelingu   rP   rW   c                  C   s   d} d}d}d|  | | | | |  }t g dg d| d\}}}t||d t||d t||d tg dg d| d}t||d d S )	Ng?rT   rU   r   rJ   rS   )betar'   )r   r   r   )	beta_testh_testc_testv_testrM   r7   rN   r$   r$   r%   test_beta_parameter}   s   r]   c                  C   s   t g dg d\} }}t| dd t|dd t|dd t g dg d\} }}t| dd t|dd t|dd tg dg d}tg dg d}t|d	d t|d	d tg dg d}tg dg d}t|d
d t|d
d d S )N)r   r   r   r'   r'   r'   rS   rT   r'   rU   rV   rJ   )r      r   r^   r'   r'   gQ?gQ?)r   r   r   r   )rM   r7   rN   ari_1ari_2ri_1ri_2r$   r$   r%   test_non_consecutive_labels   s    rc   
   r=   c                 C   sr   t j|j}t t||f}t|D ]"\}}t|D ]}	|d||d}
|d||d}| |
||||	f< qq|S )Nr   )lowhighsize)nprandomRandomStaterandintzeroslen	enumeraterange)r"   	n_samplesk_rangen_runsseedrandom_labelsscoresikjlabels_alabels_br$   r$   r%   uniform_labelings_scores   s   r{   c                  C   sD   g d} d}d}t t|| |}t|jdd}t|g dd d S )N)r'   rd   2   Z   d   rd   r   )axis){Gz?Q?r   r   r'   )r{   r   rh   absr,   r   )n_clusters_rangerp   rr   ru   max_abs_scoresr$   r$   r%   test_adjustment_for_chance   s   r   c            	      C   s   t g d} t g d}t| |}t|dd t| |dd}t| ||d}t|dd t| |}t| ||d}t|dd | }t||}t|dd t| |}t|d	d tg d
g d}|t	dkskJ t t
| d g }t t
|d g }t||}t|dd d S )Nr   r   r   r   r   r   r'   r'   r'   r'   r'   r'   r5   r5   r5   r5   r5   r   r   r   r   r'   r   r'   r'   r'   r'   r5   r   r5   r5   r5   r'   r'   gS
cA?   Tsparse)contingencygpUj@?gP1?)r   r   r'   r'   )r'   r'   r5   r5   r<   n   gRQ?r'   )rh   arrayr   r   r   sumr	   r   r   rE   listflatten)	ry   rz   miCrp   emiamia110b110r$   r$   r%   test_adjusted_mutual_info_score   s*   




r   c                   C   s    t tdggddksJ d S )Nip r   )r	   rh   r   r$   r$   r$   r%   "test_expected_mutual_info_overflow   s    r   c                  C   s   t dgd dgd  dgd  dgd  d	gd
  } t dgd dgd  dgd  dgd  dgd  dgd  dgd  dgd  dgd  dgd  }tt| | tt| | d S )Nr   iy  r'   i]<  r5   i  r^   iU  r   iP  r   i  i	  iD9  i  i     i.  '   i<     )rh   r   r   r   r
   )xyr$   r$   r%   3test_int_overflow_mutual_info_fowlkes_mallows_score   sD   	r   c                  C   s>   t g d} t| dd tt g d t g ddksJ d S )N)r   r   r@   g,^R^?r   r   )r   r   r   r   r   )r   r   )entr$   r$   r%   test_entropy  s   r   c                  C   sx   t g d} t g d}t| |}t j| |t ddt ddfdd }t|| t| |dd}t||d  d S )	Nr   r   r   r   )binsr   g?)eps)rh   r   r   histogram2daranger   )ry   rz   r   C2r$   r$   r%   test_contingency_matrix  s   
(
r   c                  C   s   t g d} t g d}t| |}t| |dd }t|| tjtdd t| |ddd W d    d S 1 s<w   Y  d S )	Nr   r   Tr   z!Cannot set 'eps' when sparse=Truer   g|=)r   r   )rh   r   r   toarrayr   r   r    r!   )ry   rz   r   C_sparser$   r$   r%   test_contingency_matrix_sparse  s   

"r   c                  C   s   t dddtD ]b} t j| tdt j| td}}t||tdks'J t	||tdks3J t
||tdks?J t||tdksKJ dD ]}t
|||dtdks]J t|||dtdkskJ qMq
d S )Nr   r^   dtyper?   r(   rC   )rh   logspaceastypeintonesr   r   r   rE   r   r   )rv   ry   rz   r0   r$   r$   r%   test_exactly_zero_info_score  s&   
r   $   c                 C   s   t dddtD ]<}t j| }|dd||dd|}}tt||dt	|| t
|t
|  d d}tt||t|||d q
d S )Nr   r^   r   rd   rB   r+   rC   )rh   r   r   r   ri   rj   rk   r   r   r   r   r   )rs   rv   random_statery   rz   avgr$   r$   r%   %test_v_measure_and_mutual_information-  s(   r   c                  C   sb   t g dg d} t| dtd  t g dg d}t|d t g dg d}t|d	 d S )
NrJ   rQ   g      @g      R@)r   r   r   r   r   r   r<   )r   r   r   r   r   r   )r   r   r'   r5   r^   r   r?   )r
   r   rh   sqrt)scoreperfect_scoreworst_scorer$   r$   r%   test_fowlkes_mallows_scoreC  s   
r   c                  C   s   t g d} t g d}dt d }t| |}t|| t|| }t|| t| d d |}t|| t|| d d }t|| d S )N)r   r   r   r   r   r'   )r   r   r'   r'   r   r   r<   g      (@r   r5   r'   )rh   r   r   r
   r   )ry   rz   r#   score_originalscore_symmetricscore_permuted
score_bothr$   r$   r%   %test_fowlkes_mallows_score_propertiesQ  s   




r   zlabels_true, labels_predr1      )r   r   r   r   r   r   r   c                 C   s   t | |dks	J d S )Nr   r   )labels_truelabels_predr$   r$   r%   .test_mutual_info_score_positive_constant_labelh  s   r   c                  C   sj   t jd} | d}t dddd }d}tjt|d t|| W d    d S 1 s.w   Y  d S )Nr=   i  g{Gz?r   gư>zuClustering metrics expects discrete values but received continuous values for label, and continuous values for targetr   )	rh   ri   rj   randlinspacer   warnsUserWarningr   )rngnoise
wavelengthmsgr$   r$   r%   test_check_clustering_errorx  s   
"r   c                  C   sF   d} t t| }|}t| | d  dgddgg}tt||| d S )Nr~   r   r   )r   ro   rh   r   r   r   Nclustering1clustering2r#   r$   r$   r%   *test_pair_confusion_matrix_fully_dispersed  
   r   c                  C   sF   d} t | f}|}t ddgd| | d  gg}tt||| d S )Nr~   r   r   )rh   rl   r   r   r   r   r$   r$   r%   )test_pair_confusion_matrix_single_cluster  r   r   c                     s   d  d } t  fddt D }t  fddt D d |  }t jdt jd}tt|D ]-}tt|D ]$}||kr`t|| || k}t|| || k}|||f  d7  < q<q4tt||| d S )	Nrd   r'   c                    s   g | ]	}|d  g  qS r   r$   r/   rv   nr$   r%   r3     s    z.test_pair_confusion_matrix.<locals>.<listcomp>c                    s   g | ]}|d  g d   qS r   r$   r   r   r$   r%   r3     s    )r'   r'   )shaper   r   )	rh   hstackro   rl   int64rm   r   r   r   )r   r   r   r#   rv   rx   same_cluster_1same_cluster_2r$   r   r%   test_pair_confusion_matrix  s   $r   zclustering1, clustering2r~   )r~   c                 C   s   t t| |d d S )Nr<   r   r   )r   r   r$   r$   r%   test_rand_score_edge_cases  s   r   c            	      C   s`   g d} g d}d}d}d}d| | | }|| }|| | | }|| }t t| || d S )NrJ   rS   r^      r'      r   )	r   r   D11D10D01D00expected_numeratorexpected_denominatorr#   r$   r$   r%   test_rand_score  s   r   c                  C   sx   t jd} | jdddt jd}| jdddt jd}t  tdt t	|| W d   dS 1 s5w   Y  dS )zCheck that large amount of data will not lead to overflow in
    `adjusted_rand_score`.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/20305
    r   r'   i r   errorN)
rh   ri   rj   rk   int8warningscatch_warningssimplefilterRuntimeWarningr   )r   y_truey_predr$   r$   r%   !test_adjusted_rand_score_overflow  s   
"r   rD   )r)   r+   r*   r,   c                 C   sx   dgd }dg|dd  }ddg|dd  }t ||| d}|dks%J t ||| d}d|  kr7dk s:J  J dS )zCheck that nmi returns a score between 0 (included) and 1 (excluded
    for non-perfect match)

    Non-regression test for issue #13836
    r   i  r   Nr'   rC   r   )rD   labels1labels2labels3nmir$   r$   r%   )test_normalized_mutual_info_score_bounded  s   
 r   )rd   r=   )r   )Ar   numpyrh   r   sklearn.metrics.clusterr   r   r   r   r   r   r   r	   r
   r   r   r   r   r   #sklearn.metrics.cluster._supervisedr   r   sklearn.utilsr   sklearn.utils._testingr   numpy.testingr   r   r   r   r&   r;   rH   rO   rR   rW   r]   rc   r{   r   r   r   r   r   r   r   r   r   r   r   markparametrizer   r   r   r   r   r   ro   rl   r   r   r   r   r$   r$   r$   r%   <module>   s    
#





		*
