a
     ldN(                     @   s   d dl Z d dlZd dlZd dlmZ d dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZmZ d dlmZ eeZdddZejZG dd dZdS )    N)	getLogger)SoftTimeLimitExceeded)BeautifulSoup)parser)Subquery)timezone)Review)ReviewSourcePlatform)WidgetReivewsWidget)settingszapplication/jsonz)Mozilla/5.0 (Windows NT 10.0; Win64; x64))zcontent-typez
user-agentc                   @   s6   e Zd ZedddZedddZedddZd	S )
AngiCrawler)urlc                 C   s   t j|td}td|j d t|jd}z0t|dj	dd	dd	d	d
 }W n0   t|d
j	dd	dd
 }Y n0 t| zt|dj	dd}W n0   t|dj	dd	dd
 }Y n0 t| zt|dd j}W n   d}Y n0 |||fS )Nheadersz(======================= Request status: z=========================html.parserz"#Rating-Display-Wrapper > span > p( )zVerified Reviewsz#content > div.css-zv8g12.e7lprtl0 > div > div:nth-child(2) > div > div:nth-child(3) > div > div:nth-child(1) > div > div.stack > pz
Average ofzCustomer Reviewsz%#Rating-Display-Wrapper > span > spanZstarsz#content > div.css-zv8g12.e7lprtl0 > div > div:nth-child(2) > div > div:nth-child(3) > div > div:nth-child(1) > div > div.stack > div > spanz/5z#reviews > ul > ul > li > a   )requestsgetr   printstatus_coder   textint
select_onereplacestripfloatselect)selfr   rsouptotal_reviewsavg_ratingspages r)   D/var/www/html/Ranjet/backend/verify_trusted/crawlers/angi/crawler.pyget_info   sH    

zAngiCrawler.get_infoc              
   C   s$  t d tj }z| |\}}}t | d| d|  g }t|D ]}|dkr^|}n| d| }t| tj|t	d}	t
|	jd}
|
d}|D ] }dd i}d	|d
< z|dj|d< W n    t d d	|d< Y qY n0 ztj|djd|d< W n    t d d |d< Y qY n0 z|dj|d< W n   t d d	|d< Y n0 zt|dj|d< W n    t d d|d< Y qY n0 || qt|tkr|d t } qt t| dtj |   qJ|||fW S  ty } zt | W Y d }~d S d }~0 0 d S )Nzg==================================================Angi Crawler=========================================z, r   z?page=r   r   z+#reviews > div.reviews__content > div > dividr   headlinezp:nth-child(3)authorzparse author is Nonezp.review-card__report-datez%m/%d/%Ydatezparse date is Nonezp.read-more__truncated-contentbodyzparse body is Nonezspan.rating-numberratingzparse rating is Noneg        z: )r   datetimenowr+   rangeloggerdebugr   r   r   r   r   r"   r   strptimer!   appendlenmax_crawler	Exception)r#   r   start
avg_ratingr&   Z	pages_numparsed_reviewsiZ	crawl_urlr$   r%   Zreview_tagstagreviewer)   r)   r*   get_reviews5   sf    






"zAngiCrawler.get_reviewsreview_sourcec              
      s  t d |  j}|d u r2tjj _   d S |\}}} fdd|D }t d tj	j
 jddjttj	jdddd	d
  tj	j|d	d ztj	j
 jttj	jdddd	djddd}tj	j
|d}|D ]X}	tj	j
 j|	j|	jdjt|d}z|d j}
|	j|
d W n   Y n0 |  qtj	j
ttj	j
 jdjdddddddd}t|}|dk rt d tj	j
ttj	j
 jdjdddd	ddddd d|  }tj	j
|djdd nf|dkr@t d tj	j
ttj	j
 jdjdddddddddd  }tj	j
|djd	d d |du rtj	j
 jd }tj	j
|jtjjdjddd}t|}g }|d k rtj	j
ttj	j
 jtjjd!jdddd"dddd d |  }|D ]}|t||d# qtj	j|d	d n
|d krtjj  _t t!j!"  d$| d%|  | _#t t!j!"  d& | _$t t!j!"  d'    t d( W n@ t%y } z&tjj _   t | W Y d }~n
d }~0 0 d S ))Nzf-------------------start sync reviews-----------------------------------------------------------------c                    s@   g | ]8}t  |d  |d |d |d |d t |d dqS )r.   r/   r-   r0   r1   r,   )sourcer.   r/   r-   r0   r1   
date_parseexternal_id)r   r   r3   ).0r$   rD   r)   r*   
<listcomp>u   s   z,AngiCrawler.sync_reviews.<locals>.<listcomp>ze-------------------------------------end reviews ----------------------------------------------------T)	source_id	lock_edit	review_id)flatF)id__indisplay_order__isnull)ignore_conflicts)rK   rO   rP   r,   )rO   )rK   r0   r/   r   )rH   )
company_id)source_id__in	is_activedisplay_orderz-date_modifyz-date   znum active < 20)pk__in)rT   znum active > 20)rT   rU   )company)widget review__source__platform__statusrA      )rR   platform__status)rS   )rY   rA   z - z : z - set total reviewsz - set avg ratingdone)&r   rC   r   r	   
SyncStatusFALSEsync_statussaver   objectsfilterr,   excluder   r   values_listdeletebulk_creater0   r/   listrH   updaterR   order_byr9   r   firstr
   StatusACTIVEr8   SYNCEDr2   r3   reviews_countaverage_ratingr   )r#   rE   is_addresultr&   r=   reviewsdup_reviews_idsuncheck_reviewsrowrH   active_reviewsnum_active_reviewsnested_qrY   widget_reviewsnum_widget_reviewswsnested_wwrB   r)   rD   r*   sync_reviewsl   s    









 





zAngiCrawler.sync_reviewsN)__name__
__module____qualname__strr+   rC   r	   r   r)   r)   r)   r*   r      s   7r   ) r2   jsontimeloggingr   r   Zbilliard.exceptionsr   Zbs4r   dateutilr   django.db.modelsr   django.utilsr   verify_trusted.companies.modelsr   verify_trusted.reviews.modelsr	   r
   Zverify_trusted.widgets.modelsr   r   django.confr   r   r5   r   MAX_CRAWLER_SIZEr:   r   r)   r)   r)   r*   <module>   s&   