import datetime
import json
import time
from logging import getLogger

import requests
from billiard.exceptions import SoftTimeLimitExceeded
from bs4 import BeautifulSoup
from dateutil import parser
from django.db.models import Subquery
from django.utils import timezone

from verify_trusted.companies.models import Review
from verify_trusted.reviews.models import ReviewSource, Platform
from verify_trusted.widgets.models import WidgetReivews, Widget
from django.conf import settings

logger = getLogger(__name__)
headers = {'content-type': 'application/json',
           'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'}
max_crawler = settings.MAX_CRAWLER_SIZE


class AngiCrawler:
    def get_info(self, url: str):
        r = requests.get(url, headers=headers)
        print(f'======================= Request status: {r.status_code}=========================')
        soup = BeautifulSoup(r.text, 'html.parser')
        try:
            total_reviews = int(
                soup.select_one("#Rating-Display-Wrapper > span > p").text.replace("(", "").replace(")", "").replace(
                    "Verified Reviews", "").strip())
        except:

            total_reviews = int(
                soup.select_one(
                    "#content > div.css-zv8g12.e7lprtl0 > div > div:nth-child(2) > div > div:nth-child(3) > div > div:nth-child(1) > div > div.stack > p").text.replace(
                    "Average of", "").replace("Customer Reviews", "").strip())
        print(total_reviews)
        try:
            avg_ratings = float(soup.select_one("#Rating-Display-Wrapper > span > span").text.replace('stars', ''))
        except:
            avg_ratings = float(soup.select_one(
                "#content > div.css-zv8g12.e7lprtl0 > div > div:nth-child(2) > div > div:nth-child(3) > div > div:nth-child(1) > div > div.stack > div > span").text.replace(
                "/5", "").replace('stars', '').strip())
        print(avg_ratings)
        try:
            pages = int(soup.select("#reviews > ul > ul > li > a")[-1].text)
        except:
            pages = 1
        return avg_ratings, total_reviews, pages

    def get_reviews(self, url: str):
        print("==================================================Angi Crawler=========================================")
        start = datetime.datetime.now()
        try:
            avg_rating, total_reviews, pages_num = self.get_info(url)
            print(f'{avg_rating}, {total_reviews}, {pages_num}')
            parsed_reviews = []
            for i in range(pages_num):
                if i == 0:
                    crawl_url = url
                else:
                    crawl_url = f'{url}?page={i}'
                logger.debug(crawl_url)
                r = requests.get(crawl_url, headers=headers)
                soup = BeautifulSoup(r.text, 'html.parser')
                review_tags = soup.select("#reviews > div.reviews__content > div > div")
                for tag in review_tags:
                    review = {'id': None}
                    review['headline'] = ''
                    try:
                        review['author'] = tag.select_one('p:nth-child(3)').text
                    except:
                        print('parse author is None')
                        review['author'] = ''
                        continue
                    try:
                        review['date'] = datetime.datetime.strptime(tag.select_one('p.review-card__report-date').text,
                                                                    '%m/%d/%Y')
                    except:
                        print('parse date is None')
                        review['date'] = None
                        continue

                    try:
                        review['body'] = tag.select_one('p.read-more__truncated-content').text
                    except:
                        print('parse body is None')
                        review['body'] = ''

                    try:
                        review['rating'] = float(tag.select_one('span.rating-number').text)
                    except:
                        print('parse rating is None')
                        review['rating'] = 0.0
                        continue
                    parsed_reviews.append(review)
                if len(parsed_reviews) >= max_crawler:
                    parsed_reviews = parsed_reviews[:max_crawler]
                    break
                print(f"{len(parsed_reviews)}: {datetime.datetime.now() - start}")
            return total_reviews, avg_rating, parsed_reviews
        except Exception as e:
            print(e)
            return None

    def sync_reviews(self, review_source: ReviewSource, is_add):
        print("-------------------start sync reviews-----------------------------------------------------------------")
        result = self.get_reviews(review_source.url)
        # print(result)
        if result is None:
            review_source.sync_status = ReviewSource.SyncStatus.FALSE
            review_source.save()
            return
        total_reviews, avg_rating, reviews = result
        reviews = [
            Review(
                source=review_source,
                author=r['author'],
                date=r['date'],
                headline=r['headline'],
                body=r['body'],
                rating=r['rating'],
                date_parse=timezone.now(),
                external_id=r['id'],
            )
            for r in reviews
        ]
        print('-------------------------------------end reviews ----------------------------------------------------')
        Review.objects.filter(source_id=review_source.id, lock_edit=True).exclude(id__in=Subquery(
            WidgetReivews.objects.values_list('review_id', flat=True)), display_order__isnull=False).delete()
        Review.objects.bulk_create(reviews, ignore_conflicts=False)  # sua lai khi test xong
        try:

            # get ids of widget comments and review has display_order not null
            dup_reviews_ids = Review.objects.filter(source_id=review_source.id, id__in=Subquery(
                WidgetReivews.objects.values_list('review_id', flat=True)), display_order__isnull=False).values_list(
                'id', flat=True)

            # # check if comment not in widget comment then delete
            uncheck_reviews = Review.objects.filter(id__in=dup_reviews_ids)

            for row in uncheck_reviews:
                reviews = Review.objects.filter(source_id=review_source.id, body=row.body, date=row.date).exclude(
                    id__in=list(dup_reviews_ids))
                try:
                    external_id = reviews[0].external_id
                    row.update(external_id=external_id)
                except:
                    pass
                reviews.delete()
            active_reviews = Review.objects.filter(source_id__in=Subquery(ReviewSource.objects.filter(
                company_id=review_source.company_id).values_list('id', flat=True)), is_active=True).order_by(
                'display_order', '-date_modify', '-date')
            num_active_reviews = len(active_reviews)
            if num_active_reviews < 20:
                print('num active < 20')
                nested_q = Review.objects.filter(source_id__in=Subquery(ReviewSource.objects.filter(
                    company_id=review_source.company_id).values_list('id', flat=True)), is_active=False).order_by(
                    'display_order', '-date_modify', '-date')[:(20 - num_active_reviews)]
                Review.objects.filter(pk__in=nested_q).update(is_active=True)
            elif num_active_reviews > 20:
                print('num active > 20')
                nested_q = Review.objects.filter(source_id__in=Subquery(ReviewSource.objects.filter(
                    company_id=review_source.company_id).values_list('id', flat=True)),
                                                 is_active=True).order_by('display_order', '-date_modify', '-date')[20:]
                Review.objects.filter(pk__in=nested_q).update(is_active=False, display_order=None)

            if is_add is True:
                widget = Widget.objects.filter(company=review_source.company_id).first()
                widget_reviews = WidgetReivews.objects.filter(widget=widget.id, review__source__platform__status=Platform.Status.ACTIVE).values_list('review', flat=True)
                num_widget_reviews = len(widget_reviews)
                ws = []
                if num_widget_reviews < 5:
                    nested_w = Review.objects.filter(source_id__in=Subquery(ReviewSource.objects.filter(
                        company_id=review_source.company_id,platform__status=Platform.Status.ACTIVE).values_list('id', flat=True))).order_by('display_order', '-date_modify', '-date')[:(5-num_widget_reviews)]
                    for w in nested_w:
                        ws.append(WidgetReivews(widget=widget, review=w))
                    WidgetReivews.objects.bulk_create(ws, ignore_conflicts=False)
                elif num_active_reviews > 5:
                    pass

            review_source.sync_status = ReviewSource.SyncStatus.SYNCED
            print(f"{datetime.datetime.now()} - {total_reviews} : {avg_rating}")
            review_source.reviews_count = total_reviews
            print(f"{datetime.datetime.now()} - set total reviews")
            review_source.average_rating = avg_rating
            print(f"{datetime.datetime.now()} - set avg rating")
            review_source.save()
            print(f"done")
            # new flow
            # active_review = Review.objects.filter(is_active=True)
            # for row in active_review:
            #     Reviews = Review.objects.filter(source_id=review_source.id, author=row.author, date=row.date,
            #                                     is_active=False)
            #     try:
            #         external_id = Reviews.values_list('external_id', flat=True)[0]
            #         row.update(external_id=external_id)
            #     except:
            #         pass
            #     Reviews.delete()

        except SoftTimeLimitExceeded as e:
            review_source.sync_status = ReviewSource.SyncStatus.FALSE
            review_source.save()
            print(e)

        # return reviews
