#!/usr/bin/env python3

from datetime import datetime
from lxml import etree
from collections import namedtuple
from urllib3.exceptions import ReadTimeoutError
from tempfile import TemporaryDirectory
from git import Repo
from subprocess import check_call


import argparse
import re
import logging
import os
import requests
import json
import yaml
import gzip
import sys
import stat


CHUNK_SIZE = 4096
MB = 1024 * 1024

logging.basicConfig(format='%(asctime)-15s [%(levelname)s] %(message)s')
log = logging.getLogger(__name__)

Artifact = namedtuple('Artifact', ['name', 'version', 'url',
                                   'offset', 'commit', 'timestamp'])

re_whitespace = re.compile(r'\s+')

REQUIRED_CHARTS = [
    'baremetal-provider',
    'metallb',
]

REQUIRED_ARTIFACTS = [
    'bootstrap',
    'kaas-tools'
]


def download_file(url, target, retry_count=5, timeout=300):
    while retry_count:
        size = 0
        size_mb = 0
        started = datetime.now()
        retry_count -= 1
        log.info("Fetching {} to {} ({} attempt(s) left)"
                 .format(url, target, retry_count))

        try:
            with requests.get(url, timeout=timeout, stream=True) as r:
                if r.status_code == 404:
                    log.warning("Not found (404): {}".format(url))
                    return False
                r.raise_for_status()
                content_length = r.headers.get('Content-Length', default=0)
                if content_length:
                    content_length = int(content_length)
                with open(target, 'wb') as f:
                    chunk = r.raw.read(CHUNK_SIZE)
                    while chunk:
                        f.write(chunk)
                        size += len(chunk)
                        x = size / MB
                        if int(x) > size_mb:
                            size_mb = int(x)
                            if content_length:
                                log.debug("Got {:.0%} of {}"
                                          .format(size / content_length, url))
                            else:
                                log.debug("Got {}MB of {}"
                                          .format(size_mb, url))
                        chunk = r.raw.read(CHUNK_SIZE)
        except ReadTimeoutError:
            log.warning("Timeout while downloading {}".format(url))
            continue
        except Exception: # noqa
            log.exception("Failed to download from {}".format(url))
            continue

        finished = datetime.now()
        seconds = (finished - started).seconds
        log.info("Fetched '{}' ({} byte(s) / {} second(s))"
                 .format(url, size, seconds))

        if content_length:
            if content_length == size:
                return True
            else:
                log.error("Bad size, expected {}, got {}, file {} from {}"
                          .format(content_length, size, target, url))
                os.remove(target)
        else:
            return True

    return False


class ArtifactsRepository(object):
    re_artifact_name = []

    def __init__(self, url):
        self.url = url
        self.artifacts = self.load(url)
        if self.artifacts is None:
            raise Exception("Failed to initialize BinaryRepository object.")

    def load(self, url):
        try:
            log.info("Loading artifacts from {} ...".format(url))
            artifacts = list(self.list_artifacts(url))
            log.info("Loading done, got {} artifact(s)".format(len(artifacts)))
        except Exception: # noqa
            log.exception("Unable to load artifacts list.")
            artifacts = None
        return artifacts

    def list_artifacts(self, url):
        with requests.get(url) as r:
            if r.status_code != 200:
                raise Exception("Bad URL: {}".format(url))
            html = etree.HTML(r.content)

        for element in html.xpath('/html/body/pre/a'):
            for re_exp in self.re_artifact_name:
                match = re_exp.match(element.text)

                if not match:
                    continue

                tail = re_whitespace.split(element.tail.strip())
                # Valid tail should contain 4 fields:
                # * date
                # * time
                # * size
                # * MB
                if len(tail) < 4:
                    continue

                timestamp = datetime.strptime(tail[0] + ' ' + tail[1],
                                              '%d-%b-%Y %H:%M')

                artifact = Artifact(name=match.group('name'),
                                    version=match.group('version'),
                                    url=url + '/' + element.get('href'),
                                    offset=match.group('offset'),
                                    commit=match.group('commit'),
                                    timestamp=timestamp)
                yield artifact

    def search(self, commit=None):
        if commit:
            return filter(lambda x: commit.startswith(x.commit),
                          self.artifacts)

        return iter(self.artifacts)


class BinaryRepository(ArtifactsRepository):
    """
    Example of strings that should match regexps:
      bootstrap-linux-1.21.1-47-af9074d.tar.gz
      kaas-tools-1.21.1-47-af9074d-linux
    """
    re_artifact_name = [
        re.compile(r'^(?P<name>\w+)-linux-'
                   r'(?P<version>[0-9.]+)-'
                   r'(?P<offset>\d+)-'
                   r'(?P<commit>[a-f0-9]+)\.tar\.gz$'),
        re.compile(r'^(?P<name>[\w\-]+)-'
                   r'(?P<version>[0-9.]+)-'
                   r'(?P<offset>\d+)-'
                   r'(?P<commit>[a-f0-9]+)-linux$'),
    ]


class ChartsRepository(ArtifactsRepository):
    """
    Example of strings that should match regexps:
      baremetal-provider-1.21.1-47-af9074d.tgz
      metallb-1.21.1-47-af9074d.tgz
    """
    re_artifact_name = [
        re.compile(r'^(?P<name>[\w\-]+)-'
                   r'(?P<version>[0-9.]+)-'
                   r'(?P<offset>\d+)-'
                   r'(?P<commit>[a-f0-9]+)\.tgz$'),
    ]


class HelmRepository(object):
    def __init__(self, url, yq_url=None):
        self.url = url
        self.yq_url = yq_url
        self.index_yaml = self.load(self.url)
        if self.index_yaml is None:
            raise Exception("Failed to initialize HelmRepository object.")

    def convert_index_yaml(self, index_yaml, tempdir):
        yq_bin = os.path.join(tempdir, 'yq')
        if not download_file(self.yq_url, yq_bin):
            return index_yaml
        try:
            index_json = index_yaml + '.json'
            os.chmod(yq_bin, stat.S_IEXEC)
            with open(index_json, 'w') as f:
                check_call([yq_bin, 'r', '-j', index_yaml], stdout=f)
            return index_json
        except Exception: # noqa
            log.exception("Can't convert {} using 'yq'".format(index_yaml))
        return index_yaml

    def load(self, url):
        # return self.read_index_yaml('./index.yaml.json.gz')
        log.info("Loading index.yaml from {} ...".format(url))
        with TemporaryDirectory() as tempdir:
            index_yaml = self.fetch_index_yaml(url, tempdir)
            if index_yaml:
                if self.yq_url:
                    index_yaml = self.convert_index_yaml(index_yaml, tempdir)
                data = self.read_index_yaml(index_yaml)
            else:
                data = None
        log.info("Loading done")
        return data

    def fetch_index_yaml(self, url, target_dir):
        index_file = '{}/index.yaml.json.gz'.format(url)
        target = os.path.join(target_dir, 'index.yaml.json.gz')
        if download_file(index_file, target):
            return target

        index_file = '{}/index.yaml'.format(url)
        target = os.path.join(target_dir, 'index.yaml')
        if download_file(index_file, target):
            return target

        return None

    def read_index_yaml(self, path):
        if path.endswith('.json.gz'):
            with gzip.open(path, 'rt') as f:
                log.debug("Reading {} ...".format(f.name))
                data = json.load(f)
                log.debug("Done {}".format(f.name))
                return data
        elif path.endswith('.json'):
            with open(path) as f:
                log.debug("Reading {} ...".format(f.name))
                data = json.load(f)
                log.debug("Done {}".format(f.name))
                return data
        elif path.endswith('.yaml'):
            with open(path) as f:
                log.debug("Reading {} ...".format(f.name))
                data = yaml.load(f, Loader=yaml.BaseLoader)
                log.debug("Done {}".format(f.name))
                return data
        else:
            return None

    def search(self, commit=None, offset=None, version=None):
        helm_version = '{}-{}-{}'.format(version, offset, commit)

        def _filter(item):
            if item.get('version') == helm_version:
                return True
            return False

        for entry in self.index_yaml.get('entries', {}).values():
            for item in filter(_filter, entry):
                yield item


class KaasCoreRepository(object):
    def __init__(self, path, branch='master'):
        self.repo = Repo(path)
        self.branch = branch

    def get_ref(self, branch):
        for item in self.repo.branches:
            if item.name == branch:
                return item.path

        origin = self.repo.remote('origin')
        _branch = origin.name + '/' + branch
        for ref in origin.refs:
            if ref.name == _branch:
                return ref.path

        for remote in self.repo.remotes:
            if remote.name == 'origin':
                continue
            _branch = remote.name + '/' + branch
            for ref in remote.refs:
                if ref.name == _branch:
                    return ref.path

        raise Exception("Can't find ref for {}".format(branch))

    def history(self):
        branch = self.get_ref(self.branch)
        log.info("Using branch {}".format(branch))
        for commit in self.repo.iter_commits(branch):
            log.info("Trying commit {}".format(commit.hexsha))
            yield commit

    def find_published_commit(self, helm_repo, bin_repo):
        head = self.repo.head.commit
        for commit in self.history():
            bin_artifacts = set()
            for artifact in bin_repo.search(commit=commit.hexsha):
                bin_artifacts.add(artifact.name)

            if not set(REQUIRED_ARTIFACTS).issubset(bin_artifacts):
                # Not all artifact names found, skipping this version
                continue

            chart_artifacts = set()
            for artifact in helm_repo.search(commit=commit.hexsha):
                chart_artifacts.add(artifact.name)

            if not set(REQUIRED_CHARTS).issubset(chart_artifacts):
                # Not all artifact names found, skipping this version
                continue

            return commit

        log.error("No suitable commit found in kaas/core. "
                  "Please check that bin / helm repository URLs "
                  "are correct and contain artifacts / charts. "
                  "Use --debug option for detailed output.")
        return head


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--base-url', default=None,
                        help="Base URL where both binary / helm repositories"
                             "are located.")
    parser.add_argument('--binary-repo-url', default=None,
                        help="Binary repository URL in case it is not"
                             "'BASE_URL/core/bin'")
    parser.add_argument('--helm-repo-url', default=None,
                        help="Helm repository URL in case is it not"
                             "'BASE_URL/core/helm'")
    parser.add_argument('--kaas-core-repo', default=None,
                        help='Path to kaas/core git repository')
    parser.add_argument('--kaas-core-branch', default='master')
    parser.add_argument('--output', default=None,
                        help="Filename to write result to.")
    parser.add_argument('--debug', default=False, action='store_true',
                        help="Enable debug output")
    parser.add_argument('--yq-url', default=None,
                        help="URL where yq tool is stored. If specified this "
                             "tool will be used to convert YAML to JSON to "
                             "speed up importing.")
    args = parser.parse_args()

    if args.debug:
        log.setLevel('DEBUG')

    if args.base_url and not args.binary_repo_url:
        args.binary_repo_url = args.base_url + '/core/bin'

    if args.base_url and not args.helm_repo_url:
        args.helm_repo_url = args.base_url + '/core/helm'

    if not args.binary_repo_url:
        raise Exception("Binary repo URL is not set!")

    if not args.helm_repo_url:
        raise Exception("Helm repo URL is not set!")

    if not args.kaas_core_repo:
        raise Exception("Path to kaas/core repository is not set!")

    return args


args = parse_args()
bin_repo = BinaryRepository(args.binary_repo_url)
helm_repo = ChartsRepository(args.helm_repo_url)
kaas_core_repo = KaasCoreRepository(args.kaas_core_repo, args.kaas_core_branch)

commit = kaas_core_repo.find_published_commit(helm_repo, bin_repo)
if commit:
    log.info("Selected commit {}\n{}".format(commit.hexsha, commit.message))
    hexsha = commit.hexsha
else:
    hexsha = ''

if hexsha:
    if args.output:
        with open(args.output, 'w') as f:
            f.write(commit.hexsha)
    else:
        print(commit.hexsha)
else:
    sys.exit(1)
