a
     lda"                     @   s   d dl Z d dlZd dlZd dlmZ d dlmZ d dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZmZ eeZejZG dd dZdS )    N)	getLogger)settings)SoftTimeLimitExceeded)BeautifulSoup)parser)Subquery)timezone)Review)ReviewSourcePlatform)WidgetReivewsWidgetc                   @   sD   e Zd Zdd ZeeedddZedddZe	d	d
dZ
dS )TrustpilotCrawlerc                 C   s   g }g }|D ]d}d}t jj|d D ]:}|j|jkr&|j|jkr&|j|jkr&|| d} qbq&|s|| qt jj|dd d S )NF)	source_idTignore_conflicts)	r	   objectsfilterreversedateratingauthorappendbulk_create)selfreviewsreview_source_idZinsert_reviewsZupdate_reviewsreviewcheckrow r    J/var/www/html/Ranjet/backend/verify_trusted/crawlers/trustpilot/crawler.pysave_reviews   s    $
zTrustpilotCrawler.save_reviews)urlreturnc                 C   sH   t | }t|jd}z|dd }t|j W S    Y dS 0 dS )z
        Get number of review page of biz from Trustpilot
        e.g: 40
        :param url: e.g https://www.trustpilot.com/review/texasbaycu.org
        :return:
        html.parserz%div.styles_pagination__6VmQv >nav > a   N)requestsgetr   contentselectinttextstrip)r#   rsoupZ	last_pager    r    r!   get_num_review_page&   s    
z%TrustpilotCrawler.get_num_review_page)r#   c                    s  t d | |}t | g  tj }d}d}td|  td|d D ]}| d| }t| t | t|}t	
t|jddd	 jd
 d d }	|dkrtt|jddjdddd}tt|jddjdd}t| d|  |	D ]n}
 |
d t|
d d p2d t|
d d t|
d pVd t|
d pjd |
d d qtt  t d dkrtt  dtj |   t tkrP fddt D   d t  |dkr
|dkr
d }d } qqP|| fS ) Nzc====================================trustpilot crawler=============================================r   g        z
page num: r'   z?page=r%   scriptpropsZ	pagePropsr   zdiv.styles_header__yrrqf > ptotal ,z$div.styles_header__yrrqf > h2 > span : idconsumerdisplayNamedatesZpublishedDatetitler-   r   )r9   r   r   headlinebodyr   d   z: c                    s(   g | ] \}}| |d  d vr|qS )r'   Nr    ).0jkparsed_reviewsr    r!   
<listcomp>^       z1TrustpilotCrawler.get_reviews.<locals>.<listcomp>)loggerdebugr1   datetimenowprintranger(   r)   jsonloadsr   r*   r+   r-   r,   
select_onereplacefloatr   strr.   r   parselenmax_crawler	enumerate)r   r#   page_numstarttotal_reviews
avg_ratingnum	crawl_urlresr   r   r    rD   r!   get_reviews6   sn    




 
	 zTrustpilotCrawler.get_reviewsreview_sourcec              
      s  |   j\}}}|d u r0tjj _   d S  fdd|D }tjj	 j
ddjttjjddddd  tjj|dd	 ztjj	 j
ttjjddddd
jddd}tjj	|d}|D ]X}tjj	 j
|j|jdjt|d}z|d j}	|j|	d W n   Y n0 |  qtjj	ttjj	 jdjdddddddd}
t|
}|dk rtjj	ttjj	 jdjddddddddd d|  }tjj	|djdd n^|dkrtjj	ttjj	 jdjdddddddddd  }tjj	|djdd d |du rtjj	 jd }tjj	|j
tjjdjddd}t|}g }|dk rtjj	ttjj	 jtjjdjdddddddd d|  }|D ]}|t||d qtjj|dd	 n
|dkrtjj _t d  t t!j!"  d!| d"|  | _#t t!j!"  d# | _$t t!j!"  d$    t d% W n@ t%y } z&tjj _   t | W Y d }~n
d }~0 0 d S )&Nc                    s@   g | ]8}t  |d  |d |d |d |d t |d dqS )r   r   r>   r?   r   r9   )sourcer   r   r>   r?   r   
date_parseexternal_id)r	   r   rK   )rA   r/   r`   r    r!   rF   l   s   z2TrustpilotCrawler.sync_reviews.<locals>.<listcomp>T)r   	lock_edit	review_id)flatF)id__indisplay_order__isnullr   )r   rh   ri   r9   )rh   )r   r?   r   r   )rd   )
company_id)source_id__in	is_activedisplay_orderz-date_modifyz-date   )pk__in)rl   )rl   rm   )company)widget review__source__platform__statusr      )rj   platform__status)rk   )rq   r   Znewonez - r8   z - set total reviewsz - set avg ratingdone)&r_   r#   r
   
SyncStatusFALSEsync_statussaver	   r   r   r9   excluder   r   values_listdeleter   r?   r   listrd   updaterj   order_byrU   r   firstr   StatusACTIVEr   SYNCEDrL   rJ   rK   reviews_countaverage_ratingr   )r   ra   is_addrZ   r[   r   dup_reviews_idsuncheck_reviewsr   rd   active_reviewsnum_active_reviewsnested_qrq   widget_reviewsnum_widget_reviewswsnested_wwer    r`   r!   sync_reviewsf   s    








 





zTrustpilotCrawler.sync_reviewsN)__name__
__module____qualname__r"   staticmethodrS   r,   r1   r_   r
   r   r    r    r    r!   r      s
   0r   )rJ   rN   timeloggingr   django.confr   r(   Zbilliard.exceptionsr   bs4r   dateutilr   django.db.modelsr   django.utilsr   verify_trusted.companies.modelsr	   verify_trusted.reviews.modelsr
   r   Zverify_trusted.widgets.modelsr   r   r   rH   MAX_CRAWLER_SIZErV   r   r    r    r    r!   <module>   s    