import datetime
import json
import time
from logging import getLogger
from django.conf import settings

import requests
from billiard.exceptions import SoftTimeLimitExceeded
from bs4 import BeautifulSoup
from dateutil import parser
from django.db.models import Subquery
from django.utils import timezone

from verify_trusted.companies.models import Review
from verify_trusted.reviews.models import ReviewSource, Platform

from verify_trusted.widgets.models import WidgetReivews, Widget

logger = getLogger(__name__)
max_crawler = settings.MAX_CRAWLER_SIZE

class TrustpilotCrawler:
    def save_reviews(self, reviews, review_source_id):
        insert_reviews = []
        update_reviews = []

        for review in reviews:
            check = False
            for row in Review.objects.filter(source_id=review_source_id).reverse():
                if review.date != row.date and review.rating != row.rating and review.author != row.author:
                    insert_reviews.append(review)
                    check = True
                    break
            if not check:
                update_reviews.append(review)
        Review.objects.bulk_create(insert_reviews, ignore_conflicts=True)

    @staticmethod
    def get_num_review_page(url: str) -> int:
        """
        Get number of review page of biz from Trustpilot
        e.g: 40
        :param url: e.g https://www.trustpilot.com/review/texasbaycu.org
        :return:
        """
        r = requests.get(url)
        soup = BeautifulSoup(r.content, 'html.parser')
        try:
            last_page = soup.select("div.styles_pagination__6VmQv >nav > a")[-2]
            return int(last_page.text.strip())
        except:
            return 1

    def get_reviews(self, url: str):
        logger.debug(
            "====================================trustpilot crawler=============================================")
        page_num = self.get_num_review_page(url)
        logger.debug(page_num)
        parsed_reviews = []
        start = datetime.datetime.now()
        total_reviews = 0
        avg_rating = 0.0
        print(f"page num: {page_num}")
        for num in range(1, page_num + 1):
            crawl_url = f"{url}?page={num}"
            print(crawl_url)
            logger.debug(crawl_url)

            res = requests.get(crawl_url)
            reviews = \
                json.loads(BeautifulSoup(res.content, 'html.parser').select("script")[-1].text)["props"][
                    "pageProps"][
                    "reviews"]
            if num == 1:
                total_reviews = int(BeautifulSoup(res.content, 'html.parser').select_one(
                    "div.styles_header__yrrqf > p").text.replace("total", "").replace(",", ""))
                avg_rating = float(BeautifulSoup(res.content, 'html.parser').select_one(
                    "div.styles_header__yrrqf > h2 > span").text.replace(",", ""))
                print(f"{total_reviews} : {avg_rating}")
            for review in reviews:
                parsed_reviews.append({
                    'id': review['id'],
                    'author': str(review['consumer']['displayName'] or '').strip(),
                    'date': parser.parse(review['dates']['publishedDate']),
                    'headline': str(review['title'] or '').strip(),
                    'body': str(review['text'] or '').strip(),
                    'rating': review['rating'],
                })

            print(len(parsed_reviews))
            if len(parsed_reviews) % 100 == 0:
                print(f"{len(parsed_reviews)}: {datetime.datetime.now() - start}")
            if len(parsed_reviews) > max_crawler:
                parsed_reviews = [k for j, k in enumerate(parsed_reviews) if k not in parsed_reviews[j + 1:]]
                parsed_reviews = parsed_reviews[:max_crawler]
                if total_reviews == 0 and avg_rating == 0:
                    total_reviews = None
                    avg_rating = None
                break
        return total_reviews, avg_rating, parsed_reviews

    def sync_reviews(self, review_source: ReviewSource, is_add):
        total_reviews, avg_rating, reviews = self.get_reviews(review_source.url)
        if reviews is None:
            review_source.sync_status = ReviewSource.SyncStatus.FALSE
            review_source.save()
            return
        reviews = [
            Review(
                source=review_source,
                author=r['author'],
                date=r['date'],
                headline=r['headline'],
                body=r['body'],
                rating=r['rating'],
                date_parse=timezone.now(),
                external_id=r['id'],
            )
            for r in reviews
        ]
        Review.objects.filter(source_id=review_source.id, lock_edit=True).exclude(id__in=Subquery(
            WidgetReivews.objects.values_list('review_id', flat=True)), display_order__isnull=False).delete()
        Review.objects.bulk_create(reviews, ignore_conflicts=False)  # sua lai khi test xong
        try:

            # get ids of widget comments and review has display_order not null
            dup_reviews_ids = Review.objects.filter(source_id=review_source.id, id__in=Subquery(
                WidgetReivews.objects.values_list('review_id', flat=True)), display_order__isnull=False).values_list(
                'id', flat=True)

            # # check if comment not in widget comment then delete
            uncheck_reviews = Review.objects.filter(id__in=dup_reviews_ids)

            for row in uncheck_reviews:
                reviews = Review.objects.filter(source_id=review_source.id, body=row.body, date=row.date).exclude(
                    id__in=list(dup_reviews_ids))
                try:
                    external_id = reviews[0].external_id
                    row.update(external_id=external_id)
                except:
                    pass
                reviews.delete()
            active_reviews = Review.objects.filter(source_id__in=Subquery(ReviewSource.objects.filter(
                company_id=review_source.company_id).values_list('id', flat=True)), is_active=True).order_by(
                'display_order', '-date_modify', '-date')
            num_active_reviews = len(active_reviews)
            if num_active_reviews < 20:
                nested_q = Review.objects.filter(source_id__in=Subquery(ReviewSource.objects.filter(
                    company_id=review_source.company_id).values_list('id', flat=True)), is_active=False).order_by(
                    'display_order', '-date_modify', '-date')[:(20 - num_active_reviews)]
                Review.objects.filter(pk__in=nested_q).update(is_active=True)
            elif num_active_reviews > 20:
                nested_q = Review.objects.filter(source_id__in=Subquery(ReviewSource.objects.filter(
                    company_id=review_source.company_id).values_list('id', flat=True)),
                                                 is_active=True).order_by('display_order', '-date_modify', '-date')[20:]
                Review.objects.filter(pk__in=nested_q).update(is_active=False, display_order=None)

            if is_add is True:
                widget = Widget.objects.filter(company=review_source.company_id).first()
                widget_reviews = WidgetReivews.objects.filter(widget=widget.id, review__source__platform__status=Platform.Status.ACTIVE).values_list('review', flat=True)
                num_widget_reviews = len(widget_reviews)
                ws = []
                if num_widget_reviews < 5:
                    nested_w = Review.objects.filter(source_id__in=Subquery(ReviewSource.objects.filter(
                        company_id=review_source.company_id,platform__status=Platform.Status.ACTIVE).values_list('id', flat=True))).order_by('display_order', '-date_modify', '-date')[:(5-num_widget_reviews)]
                    for w in nested_w:
                        ws.append(WidgetReivews(widget=widget, review=w))
                    WidgetReivews.objects.bulk_create(ws, ignore_conflicts=False)
                elif num_active_reviews > 5:
                    pass

            review_source.sync_status = ReviewSource.SyncStatus.SYNCED
            print("newone")
            print(f"{datetime.datetime.now()} - {total_reviews} : {avg_rating}")
            review_source.reviews_count = total_reviews
            print(f"{datetime.datetime.now()} - set total reviews")
            review_source.average_rating = avg_rating
            print(f"{datetime.datetime.now()} - set avg rating")
            review_source.save()
            print(f"done")

        except SoftTimeLimitExceeded as e:
            review_source.sync_status = ReviewSource.SyncStatus.FALSE
            review_source.save()
            print(e)

        # return reviews
