o
    tBh6                     @   s  d dl Z d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlZd dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ dd Zdd Zdd Zdd Zdd Zdd Zdd Z ej!"dddd Z#d d! Z$d"d# Z%d$d% Z&eej!"ddd&d' Z'ed(d) Z(d*d+ Z)ej!"ddd,d- Z*ej!"ddd.d/ Z+d0d1 Z,d2d3 Z-d4d5 Z.d6d7 Z/d8d9 Z0d:d; Z1ej!"d<g d=d>d? Z2d@dA Z3dS )B    N)
block_diag)
csr_matrix)psi)assert_array_equal)LatentDirichletAllocation)_dirichlet_expectation_1d_dirichlet_expectation_2d)assert_allclose)assert_array_almost_equal)assert_almost_equal)!if_safe_multiprocessing_with_blas)NotFittedError)StringIOc                  C   s6   d} t jd| td}|g|  }t| }t|}| |fS )N   )r   r   )dtype)npfullintr   r   )n_componentsblockblocksX r   /var/www/html/riverr-enterprise-integrations-main/venv/lib/python3.10/site-packages/sklearn/decomposition/tests/test_online_lda.py_build_sparse_mtx   s   
r   c                  C   sP   t  \} }d|  }t| ||dd}t| dd}||}||}t|| d S )Ng      ?r   )r   doc_topic_priortopic_word_priorrandom_stater   r   )r   r   fit_transformr   )r   r   priorlda_1lda_2topic_distr_1topic_distr_2r   r   r   test_lda_default_prior_params%   s   


r%   c                  C   sz   t jd} t \}}t|dd| d}|| g d}|jD ]}t| dd  d d d }t	t
||v s:J qd S )Nr      batch)r   evaluate_everylearning_methodr   )r   r&      )r         )         r   randomRandomStater   r   fitcomponents_setargsorttuplesortedrngr   r   ldacorrect_idx_grps	componenttop_idxr   r   r   test_lda_fit_batch6   s   


rB   c                  C   s|   t jd} t \}}t|ddd| d}|| g d}|jD ]}t| dd  d d d }t	t
||v s;J q d S )	Nr         $@r&   online)r   learning_offsetr(   r)   r   r*   r1   r2   r3   r<   r   r   r   test_lda_fit_onlineI   s   


rF   c                  C   s   t jd} t \}}t|dd| d}tdD ]}|| qg d}|jD ]}t|	 dd  d d d }t
t||v sAJ q&d S )	Nr   rC   d   r   rE   total_samplesr   r   r*   r1   r2   r   r4   r5   r   r   rangepartial_fitr7   r8   r9   r:   r;   r=   r   r   r>   ir?   crA   r   r   r   test_lda_partial_fit]   s   

rP   c                  C   s|   t jd} t \}}t|d| d}||  g d}|jD ]}t|	 dd  d d d }t
t||v s;J q d S )Nr   r'   r   r)   r   r*   r1   r2   )r   r4   r5   r   r   r6   toarrayr7   r8   r9   r:   r;   r<   r   r   r   test_lda_dense_inputq   s   

rS   c                  C   sh   t jd} | jddd}d}t|| d}||}|dk s"J tt j|dd	t 	|j
d  d S )
Nr   r-      
   sizer   r   g        r&   axis)r   r4   r5   randintr   r   anyr
   sumonesshape)r=   r   r   r>   X_transr   r   r   test_lda_transform   s   
$ra   method)rD   r'   c                 C   sL   t jd}|jddd}td| |d}||}||}t||d d S )Nr   rV   )2   rU   rW   r-   rQ   r,   )r   r4   r5   r[   r   r   	transformr
   )rb   r=   r   r>   X_fitr`   r   r   r   test_lda_fit_transform   s   

rf   c               	   C   s   t d} dtddfdtddfdtdd	fd
tddff}|D ]$\}}d| }tjt|d ||  W d    n1 s@w   Y  q!d S )Nr-   rV   r   r   r   r)   unknown)r)   rI   )rI   rE   r2   )rE   z^Invalid %r parametermatch)r   r^   r   pytestraises
ValueErrorr6   )r   invalid_modelsparammodelregexr   r   r   test_invalid_params   s   
rs   c                  C   sT   t dd} t }d}tjt|d ||  W d    d S 1 s#w   Y  d S )Nrg         z^Negative values in data passedrj   )r   r   r   rl   rm   rn   r6   )r   r>   rr   r   r   r   test_lda_negative_input   s   "ru   c                  C   sb   t jd} | jddd}t }d}tjt|d || W d    d S 1 s*w   Y  d S )Nr   r,   rT   rW   z}This LatentDirichletAllocation instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.rj   )	r   r4   r5   r[   r   rl   rm   r   
perplexity)r=   r   r>   rr   r   r   r   test_lda_no_component_error   s   "rw   c                 C   s|   t  \}}tjd}t|d| d|d}|| g d}|jD ]}t| dd  d d d }t	t
||v s;J q d S )Nr   r+   r&   )r   n_jobsr)   r(   r   r*   r1   r2   )r   r   r4   r5   r   r6   r7   r8   r9   r:   r;   )rb   r   r   r=   r>   r?   rO   rA   r   r   r   test_lda_multi_jobs   s   


ry   c                  C   s   t jd} t \}}t|ddd| d}tdD ]}|| qg d}|jD ]}t|	 dd  d d d }t
t||v sBJ q'd S )	Nr   r+         @   )r   rx   rE   rI   r   r*   r1   r2   rJ   rM   r   r   r   test_lda_partial_fit_multi_jobs   s    

r|   c                  C   s   t jd} | dd}| dd}t jjd|dfd}t|dd| d	}|| | jd|d
 |fd}tjtdd |	|| W d    n1 sLw   Y  | jd||d
 fd}tjtdd |	|| W d    d S 1 suw   Y  d S )Nr   r   r.   rV   r,   rW   rz   rU   rH   r&   zNumber of samplesrj   zNumber of topics)
r   r4   r5   r[   r   r6   rl   rm   rn   _perplexity_precomp_distr)r=   r   	n_samplesr   r>   invalid_n_samplesinvalid_n_componentsr   r   r   test_lda_preplexity_mismatch   s&   
"r   c           	      C   s   t  \}}t|d| ddd}t|d| ddd}|| |j|dd}|| |j|dd}||ks5J |j|dd}|j|dd}||ksIJ d S )	Nr&   rG   r   r   max_iterr)   rI   r   rV   Fsub_samplingT)r   r   r6   rv   )	rb   r   r   r!   r"   perp_1perp_2perp_1_subsamplingperp_2_subsamplingr   r   r   test_lda_perplexity  s.   


r   c                 C   sf   t  \}}t|d| ddd}t|d| ddd}|| ||}|| ||}||ks1J d S )Nr&   rG   r   r   rV   )r   r   r   score)rb   r   r   r!   r"   score_1score_2r   r   r   test_lda_score"  s(   




r   c                  C   sL   t  \} }t| ddddd}|| ||}|| }t|| d S )Nr&   r'   rG   r   r   )r   r   r6   rv   rR   r   )r   r   r>   r   r   r   r   r   test_perplexity_input_format=  s   


r   c                  C   sb   t  \} }t| ddd}|| |j|dd}||}td|t|j  }t	|| d S )NrV   r   )r   r   r   Fr   rt   )
r   r   r6   rv   r   r   expr]   datar   )r   r   r>   perplexity_1r   perplexity_2r   r   r   test_lda_score_perplexityN  s   


r   c                  C   sD   t  \} }t| ddddd}|| |j}||}t|| d S )Nr&   r'   r   )r   r   r)   r   r(   )r   r   r6   bound_rv   r   )r   r   r>   perplexity1perplexity2r   r   r   test_lda_fit_perplexity\  s   


r   c                  C   sR   t d} | t| fD ]}tdd|}t|jjddt |jj	d  qdS )z+Test LDA on empty document (all-zero rows).)r-   r,   i  )r   r   rY   r&   N)
r   zerosr   r   r6   r   r7   r]   r^   r_   )Zr   r>   r   r   r   test_lda_empty_docsr  s   
r   c               	   C   s   t ddd} t | }t| d| t|t t| tt |  dd | dd} tt	| t| tt j| dd	d
d
t j
f  ddd d
S )z9Test Cython version of Dirichlet expectation calculation.irV   i'  r   gҶOɃ;)atolrG   r&   rY   Ngdy=gA:)>)rtolr   )r   logspace
empty_liker   r	   r   r   r]   reshaper   newaxis)xexpectationr   r   r   test_dirichlet_expectation|  s   
&&
r   c                 C   s   t  \}}t|dd| |dd}t }tj|}t_z
|| W |t_n|t_w | d}	| d}
||	ks<J ||
ksBJ d S )Nr   r'   r   )r   r   r)   verboser(   r   
rv   )r   r   r   sysstdoutr6   getvaluecount)r   r(   expected_linesexpected_perplexitiesr   r   r>   outold_outn_linesn_perplexityr   r   r   check_verbosity  s$   
r   z;verbose,evaluate_every,expected_lines,expected_perplexities))Fr&   r   r   )Fr   r   r   )Tr   r   r   )Tr&   r   r   )Tr+   r   r&   c                 C   s   t | ||| d S )N)r   )r   r(   r   r   r   r   r   test_verbosity  s   r   c                  C   s>   t  \} }t| d|}| }tdd t| D | dS )z6Check feature names out for LatentDirichletAllocation.rh   c                 S   s   g | ]}d | qS )latentdirichletallocationr   ).0rN   r   r   r   
<listcomp>  s    z.test_lda_feature_names_out.<locals>.<listcomp>N)r   r   r6   get_feature_names_outr   rK   )r   r   r>   namesr   r   r   test_lda_feature_names_out  s   
r   )4r   numpyr   scipy.linalgr   scipy.sparser   scipy.specialr   numpy.testingr   rl   sklearn.decompositionr   sklearn.decomposition._ldar   r   sklearn.utils._testingr	   r
   r   r   sklearn.exceptionsr   ior   r   r%   rB   rF   rP   rS   ra   markparametrizerf   rs   ru   rw   ry   r|   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   sb    
	





