import datetime
import json
import time
from logging import getLogger
from typing import Tuple
from django.conf import settings
import requests
from billiard.exceptions import SoftTimeLimitExceeded
from bs4 import BeautifulSoup
from dateutil import parser
from django.db.models import Subquery
from django.utils import timezone
from verify_trusted.companies.models import Review
from verify_trusted.reviews.models import ReviewSource, Platform
from verify_trusted.widgets.models import Widget, WidgetReivews

logger = getLogger(__name__)
max_crawler = settings.MAX_CRAWLER_SIZE

class YelpCrawler:
    @staticmethod
    def get_info(url: str) -> tuple[int, float, str]:
            """
            Get ID of biz from Yelp
            e.g: HsDraKmc4d_OMUlDy0h04Q
            :param url: e.g https://www.yelp.com/biz/desert-fireshield-indio
            :return:
            """
            r = requests.get(url)
            soup = BeautifulSoup(r.content, 'html.parser')
            biz_id = soup.find('meta', attrs={'name': 'yelp-biz-id'})
            total_reviews = int(soup.select_one(
                "div.rating-text__09f24__VDRkR.padding-t0-5__09f24__lDQoQ.border-color--default__09f24__NPAKY > p").text.replace(
                "reviews", "").strip())
            
            # int(soup.select_one(
            #     "div.arrange-unit__09f24__rqHTg.arrange-unit-fill__09f24__CUubG.border-color--default__09f24__NPAKY.nowrap__09f24__lBkC2 > span").text.replace(
            #     "reviews", "").strip())
            avg_rating = float(soup.select_one(
                "div.arrange__09f24__LDfbs.gutter-1-5__09f24__vMtpw.vertical-align-middle__09f24__zU9sE.margin-b2__09f24__CEMjT.border-color--default__09f24__NPAKY > div > span > div")[
                'aria-label'].replace("star rating", "").strip())
            print(f"after avg_reviews {avg_rating}")

            return total_reviews, avg_rating, biz_id.attrs['content']

    @staticmethod
    def get_total_reviews(url: str) -> int:
        r = requests.get(url)
        if r.status_code !=200:
            return -1
        json_response = json.loads(r.text)
        total_reviews = json_response['pagination']['totalResults']

        print(total_reviews)
        return total_reviews

    # @celery.shared_task(bind=True)
    # def run_task(self, i):
    #     time.sleep(i)

    def get_reviews(self, url: str):
        try:
            logger.debug("====================================yelp crawler=============================================")
            total_reviews_count, avg_rating, biz_id = self.get_info(url)
            count = 0
            logger.debug(biz_id)
            parsed_reviews = []
            total_reviews_en = 0
            while True:
                if total_reviews_en < count:
                    break
                print(len(parsed_reviews))
                if count == 0:
                    url = f'https://www.yelp.com/biz/{biz_id}/review_feed'
                    total_reviews_en = self.get_total_reviews(url)
                else:
                    url = f'https://www.yelp.com/biz/{biz_id}/review_feed?start={count}'
                print(url)
                logger.debug(url)
                r = requests.get(url)
                try:
                    json_response = json.loads(r.text)
                    if r.status_code != 200:
                        return total_reviews_count, avg_rating, parsed_reviews
                    logger.debug(json.dumps(json_response, indent=2))
                    reviews = json_response['reviews']
                    parsed_reviews += [{
                        'id': review['id'],
                        'author': review['user']['markupDisplayName'],
                        'date': parser.parse(review['localizedDate']),
                        'headline': '',
                        'body': review['comment']['text'],
                        'rating': review['rating'],
                    } for review in reviews if review['localizedDate'] is not None]

                except Exception as e:
                    print(e)
                    break
                count += 10

                if len(parsed_reviews) >= max_crawler:
                    parsed_reviews = [k for j, k in enumerate(parsed_reviews) if k not in parsed_reviews[j + 1:]]
                    parsed_reviews = parsed_reviews[:max_crawler]
                    if total_reviews_count == 0 and avg_rating == 0:
                        total_reviews_count = None
                        avg_rating = None
                    break
            if len(parsed_reviews) >0:
                parsed_reviews = [k for j, k in enumerate(parsed_reviews) if k not in parsed_reviews[j + 1:]]
                parsed_reviews = parsed_reviews[:max_crawler]
                if total_reviews_count == 0 and avg_rating == 0:
                    total_reviews_count = None
                    avg_rating = None
            logger.debug(len(parsed_reviews))
            return total_reviews_count, avg_rating, parsed_reviews
        except Exception as e:
            print(e)
            return None


    def sync_reviews(self, review_source: ReviewSource, is_add):
        try:
            total_reviews, avg_rating,raw_reviews = self.get_reviews(review_source.url)
            print(f"{datetime.datetime.now()} - get total reviews- {total_reviews}")
            formated_reviews = []
            for r in raw_reviews:
                formated_reviews.append(
                    Review(
                        source=review_source,
                        author=r['author'],
                        date=r['date'],
                        headline=r['headline'],
                        body=r['body'],
                        rating=r['rating'],
                        date_parse=timezone.now(),
                        external_id=r['id'],
                    )

                )
            Review.objects.filter(source_id=review_source.id, lock_edit=True).exclude(id__in=Subquery(
                WidgetReivews.objects.values_list('review_id', flat=True)), display_order__isnull=False).delete()
            Review.objects.bulk_create(formated_reviews, ignore_conflicts=False)  # sua lai khi test xong
            try:

                # get ids of widget comments and review has display_order not null
                dup_reviews_ids = Review.objects.filter(source_id=review_source.id, id__in=Subquery(
                    WidgetReivews.objects.values_list('review_id', flat=True)),
                                                        display_order__isnull=False).values_list(
                    'id', flat=True)

                # # check if comment not in widget comment then delete
                uncheck_reviews = Review.objects.filter(id__in=dup_reviews_ids)

                for row in uncheck_reviews:
                    reviews = Review.objects.filter(source_id=review_source.id, body=row.body, date=row.date).exclude(
                        id__in=list(dup_reviews_ids))
                    try:
                        external_id = reviews[0].external_id
                        row.update(external_id=external_id)
                    except:
                        pass
                    reviews.delete()
                active_reviews = Review.objects.filter(source_id__in=Subquery(ReviewSource.objects.filter(
                    company_id=review_source.company_id).values_list('id', flat=True)), is_active=True).order_by(
                    'display_order', '-date_modify', '-date')
                num_active_reviews = len(active_reviews)
                if num_active_reviews < 20:
                    nested_q = Review.objects.filter(source_id__in=Subquery(ReviewSource.objects.filter(
                        company_id=review_source.company_id).values_list('id', flat=True)), is_active=False).order_by(
                        'display_order', '-date_modify', '-date')[:(20 - num_active_reviews)]
                    Review.objects.filter(pk__in=nested_q).update(is_active=True)
                elif num_active_reviews > 20:
                    nested_q = Review.objects.filter(source_id__in=Subquery(ReviewSource.objects.filter(
                        company_id=review_source.company_id).values_list('id', flat=True)),
                                                     is_active=True).order_by('display_order', '-date_modify', '-date')[
                               20:]
                    Review.objects.filter(pk__in=nested_q).update(is_active=False, display_order=None)

                print(f"{datetime.datetime.now()} - ws : {is_add}")


                if is_add is True:
                    widget = Widget.objects.filter(company=review_source.company_id).first()
                    widget_reviews = WidgetReivews.objects.filter(widget=widget.id, review__source__platform__status=Platform.Status.ACTIVE).values_list('review', flat=True)
                    num_widget_reviews = len(widget_reviews)
                    ws = []
                    if num_widget_reviews < 5:
                        nested_w = Review.objects.filter(source_id__in=Subquery(ReviewSource.objects.filter(
                            company_id=review_source.company_id,platform__status=Platform.Status.ACTIVE).values_list('id', flat=True))).order_by('display_order', '-date_modify', '-date')[:(5-num_widget_reviews)]
                        for w in nested_w:
                            ws.append(WidgetReivews(widget=widget, review=w))
                        WidgetReivews.objects.bulk_create(ws, ignore_conflicts=False)
                    elif num_active_reviews > 5:
                        pass

                    print(f"{datetime.datetime.now()} - ws : {ws}")

                review_source.sync_status = ReviewSource.SyncStatus.SYNCED
                print(f"{datetime.datetime.now()} - {total_reviews} : {avg_rating}")
                review_source.reviews_count = total_reviews
                print(f"{datetime.datetime.now()} - set total reviews")
                review_source.average_rating = avg_rating
                print(f"{datetime.datetime.now()} - set avg rating")
                review_source.save()
                print(f"done")

            except Exception as e:
                print(f"eeeeeeeeeeee: {e}")
                review_source.sync_status = ReviewSource.SyncStatus.FALSE
                review_source.save()
        except Exception as e:
            print(f"eeeeeeeeeeee2: {e}")
            review_source.sync_status = ReviewSource.SyncStatus.FALSE
            review_source.save()
            print(e)



        # return reviews
