"Module to parse a url"
import sys

import requests

sys.path.append(r".")

import re
from urllib.parse import urlparse

from dotenv import load_dotenv

load_dotenv()

from configs.config import MAX_URL_DEPTH, languages_to_check

HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.76 Safari/537.36",
    "Upgrade-Insecure-Requests": "1",
    "DNT": "1",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
    "Accept-Language": "en-US,en;q=0.5",
    "Accept-Encoding": "gzip, deflate",
}


class parsed_url:
    """Class to parse a url"""

    def __init__(self, url: str, check_redirect=True):
        self.supplied_url = url.lower()
        self.language_url = False

        self.check_prefix(url)

        if check_redirect:
            redirect_url = self.check_if_redirect()
            if redirect_url:
                self.url = redirect_url
                self.check_prefix(self.url)

        if self.parsed_url.path == "/" or self.parsed_url.path == "":
            self.components = []
        else:
            self.components = self.parsed_url.path.split("/")[1:]

            if len(self.components) > MAX_URL_DEPTH:
                # combine components from the max_url_depth to the end with a /
                # and set it as the index of max_url_depth
                self.components[MAX_URL_DEPTH] = "/".join(
                    self.components[MAX_URL_DEPTH:]
                )

            self.language_url = (
                True if self.components[0] in languages_to_check else False
            )

        url_splits = self.parsed_url.hostname.split("www.")[1].split(".")

        # self.domain = self.parsed_url.hostname.split("www.")[1]

        if len(url_splits) == 1:
            self.domain = self.parsed_url.hostname
        elif len(url_splits) == 2:
            self.domain = ".".join(url_splits)
        elif len(url_splits) > 2:
            if "co" in url_splits:
                # find where the co is
                co_index = url_splits.index("co")
                self.domain = ".".join(url_splits[co_index - 1 :])
            elif "com" in url_splits:
                co_index = url_splits.index("com")
                self.domain = ".".join(url_splits[co_index - 1 :])
            else:
                self.domain = ".".join(url_splits[1:])

    def check_prefix(self, url):
        """Check if the url has a prefix"""
        if "https://" not in url and "http://" not in url:
            url = "https://" + url

        url = re.sub(r"www[0-9].", "www.", url)

        www_char_check = 19
        if len(url) < 20:
            www_char_check = len(url) - 1
        if "www." not in url[0:www_char_check]:
            url_split = url.split("://")
            url = url_split[0] + "://www." + url_split[1]

        self.parsed_url = urlparse(url)
        # self.domain = self.parsed_url.netloc.split("www.")[1].split(".")
        self.url = self.parsed_url.scheme + "://" + self.parsed_url.hostname

    def check_link_linkedin(self):
        """
        Args:
            link (str): The link to check

        Returns:
            link (str): The link
        """

        if "linkedin.com" in self.url:
            return self.supplied_url
        else:
            return False

    def check_if_redirect(self):
        """Check if the url redirects"""
        urls_to_check = [
            self.supplied_url,
            self.url,
            self.url.replace("://www.", "://"),
        ]
        for url_to_check in urls_to_check:
            try:
                r = requests.head(
                    url_to_check, allow_redirects=True, timeout=2, headers=HEADERS
                )
                return r.url
            except requests.exceptions.ReadTimeout:
                continue
            except requests.exceptions.ConnectTimeout:
                continue
            except requests.exceptions.ConnectionError:
                continue
            except requests.exceptions.InvalidURL:
                continue
            except requests.exceptions.MissingSchema:
                continue

        return False


if __name__ == "__main__":

    url1 = "https://www3.proproductsllc.com"
    url_parsed = parsed_url(url1)
    print(url_parsed.domain)

    url2 = "http://ir.mdjmjh.com/"
    url_parsed = parsed_url(url2)
    print(url_parsed.domain)

    url3 = "http://www.intel.com/"
    url_parsed = parsed_url(url3)
    print(url_parsed.domain)

    url4 = "https://about.gitlab.com/"
    url_parsed = parsed_url(url4)
    print(url_parsed.domain)

    url5 = "https://www.car2go.com"
    url_parsed = parsed_url(url5)
    print(url_parsed.domain)

    url6 = "https://www.altlaw.co.uk/"
    url_parsed = parsed_url(url6)
    print(url_parsed.domain)
