import datetime
import json
import time
from logging import getLogger
from django.conf import settings

import requests
from billiard.exceptions import SoftTimeLimitExceeded
from bs4 import BeautifulSoup
from dateutil import parser
from django.db.models import Subquery
from django.utils import timezone

from verify_trusted.companies.models import Review
from verify_trusted.reviews.models import ReviewSource, Platform
from verify_trusted.widgets.models import WidgetReivews, Widget


logger = getLogger(__name__)
headers = {'content-type': 'application/json',
           'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'}
max_crawler = settings.MAX_CRAWLER_SIZE

class HomeAdvisorCrawler:
    def get_avg_rating(self, url: str):
        soup = BeautifulSoup(requests.get(url, headers=headers).text, 'html.parser')
        return float(soup.select_one(
            '#app > div > div.ha_sp-profile-body-container.\@px-2.md\:\@px-6.\@pb-5.\@bg-gray-300.\@h-auto.\@flex.\@flex-col.\@items-center.\@w-full > main > div.ha_ratings.\@mt-4 > div > div > section.sp--rating_overall.short-border-r.\@border-none.\@flex-grow.\@flex-shrink.\@pt-4.\@pl-0.\@pr-12 > div > div.numeric-rating.\@font-general.\@font-bold.md\:\@text-xl.\@text-base.\@border-2.\@border-solid.\@border-gray-350.md\:\@text-gold.\@text-gold.\@ml-2.md\:\@ml-0').text.strip())

    def get_reviews(self, url: str):
        print("==================================================HomeAdvisor Crawler=========================================")
        try:
            avg_rating = self.get_avg_rating(url)
            parsed_reviews = []
            company_id = url.split(".")[-2]
            count = 1
            total_page = 1
            total_reviews = 0
            while count <= total_page:
                url = f'https://www.homeadvisor.com/sm/reviews/{company_id}?page={count}&sort=newest&pageSize=10'
                logger.debug(url)
                r = requests.get(url, headers=headers)
                json_response = json.loads(r.text)
                if count == 1:
                    total_reviews = json_response['pagination']['totalResultCount']
                    total_page = json_response['pagination']['pageCount']
                    print(total_reviews)
                logger.debug(json.dumps(json_response, indent=2))
                reviews = json_response['ratings']
                parsed_reviews += [
                    {
                        'id': review['spRatingComment']['ratingId'],
                        'author': review['consumerName'],
                        'date': datetime.datetime.fromtimestamp(reviews[0]['createDate'] / 1e3),
                        'headline': '',
                        'body': review['comment'],
                        'rating': review['overallRating'],
                    }
                    for review in reviews
                ]
                count += 1
                if len(parsed_reviews) >= max_crawler:
                    parsed_reviews = parsed_reviews[:max_crawler]
                    break
                # time.sleep(1)
            return total_reviews, avg_rating, parsed_reviews
        except Exception as e:
            print(e)
            return None

    def sync_reviews(self, review_source: ReviewSource, is_add):
        result = self.get_reviews(review_source.url)
        if result is None:
            review_source.sync_status = ReviewSource.SyncStatus.FALSE
            review_source.save()
            return
        total_reviews, avg_rating, reviews = result
        reviews = [
            Review(
                source=review_source,
                author=r['author'],
                date=r['date'],
                headline=r['headline'],
                body=r['body'],
                rating=r['rating'],
                date_parse=timezone.now(),
                external_id=r['id'],
            )
            for r in reviews
        ]
        Review.objects.filter(source_id=review_source.id, lock_edit=True).exclude(id__in=Subquery(
            WidgetReivews.objects.values_list('review_id', flat=True)), display_order__isnull=False).delete()
        Review.objects.bulk_create(reviews, ignore_conflicts=False)  # sua lai khi test xong
        try:

            # get ids of widget comments and review has display_order not null
            dup_reviews_ids = Review.objects.filter(source_id=review_source.id, id__in=Subquery(
                WidgetReivews.objects.values_list('review_id', flat=True)), display_order__isnull=False).values_list(
                'id', flat=True)

            # # check if comment not in widget comment then delete
            uncheck_reviews = Review.objects.filter(id__in=dup_reviews_ids)

            for row in uncheck_reviews:
                reviews = Review.objects.filter(source_id=review_source.id, body=row.body, date=row.date).exclude(
                    id__in=list(dup_reviews_ids))
                try:
                    external_id = reviews[0].external_id
                    row.update(external_id=external_id)
                except:
                    pass
                reviews.delete()
            active_reviews = Review.objects.filter(source_id__in=Subquery(ReviewSource.objects.filter(
                company_id=review_source.company_id).values_list('id', flat=True)), is_active=True).order_by(
                'display_order', '-date_modify', '-date')
            num_active_reviews = len(active_reviews)
            if num_active_reviews < 20:
                nested_q = Review.objects.filter(source_id__in=Subquery(ReviewSource.objects.filter(
                    company_id=review_source.company_id).values_list('id', flat=True)), is_active=False).order_by(
                    'display_order', '-date_modify', '-date')[:(20 - num_active_reviews)]
                Review.objects.filter(pk__in=nested_q).update(is_active=True)
            elif num_active_reviews > 20:
                nested_q = Review.objects.filter(source_id__in=Subquery(ReviewSource.objects.filter(
                    company_id=review_source.company_id).values_list('id', flat=True)),
                                                 is_active=True).order_by('display_order', '-date_modify', '-date')[20:]
                Review.objects.filter(pk__in=nested_q).update(is_active=False, display_order=None)

            if is_add is True:
                widget = Widget.objects.filter(company=review_source.company_id).first()
                widget_reviews = WidgetReivews.objects.filter(widget=widget.id, review__source__platform__status=Platform.Status.ACTIVE).values_list('review', flat=True)
                num_widget_reviews = len(widget_reviews)
                ws = []
                if num_widget_reviews < 5:
                    nested_w = Review.objects.filter(source_id__in=Subquery(ReviewSource.objects.filter(
                        company_id=review_source.company_id,platform__status=Platform.Status.ACTIVE).values_list('id', flat=True))).order_by('display_order', '-date_modify', '-date')[:(5-num_widget_reviews)]
                    for w in nested_w:
                        ws.append(WidgetReivews(widget=widget, review=w))
                    WidgetReivews.objects.bulk_create(ws, ignore_conflicts=False)
                elif num_active_reviews > 5:
                    pass

            review_source.sync_status = ReviewSource.SyncStatus.SYNCED
            print(f"{datetime.datetime.now()} - {total_reviews} : {avg_rating}")
            review_source.reviews_count = total_reviews
            print(f"{datetime.datetime.now()} - set total reviews")
            review_source.average_rating = avg_rating
            print(f"{datetime.datetime.now()} - set avg rating")
            review_source.save()
            print(f"done")
            # new flow
            # active_review = Review.objects.filter(is_active=True)
            # for row in active_review:
            #     Reviews = Review.objects.filter(source_id=review_source.id, author=row.author, date=row.date,
            #                                     is_active=False)
            #     try:
            #         external_id = Reviews.values_list('external_id', flat=True)[0]
            #         row.update(external_id=external_id)
            #     except:
            #         pass
            #     Reviews.delete()

        except SoftTimeLimitExceeded as e:
            review_source.sync_status = ReviewSource.SyncStatus.FALSE
            review_source.save()
            print(e)

        # return reviews
