import json
import os
from copy import deepcopy

from cfg_checker.common import logger, logger_cli, nested_set
from cfg_checker.common.const import _mainteiners_index_filename
from cfg_checker.common.const import _mirantis_versions_filename
from cfg_checker.common.const import _other_versions_filename
from cfg_checker.common.const import _pkg_desc_archive
from cfg_checker.common.const import _repos_index_filename
from cfg_checker.common.const import _repos_info_archive
from cfg_checker.common.const import _repos_versions_archive
from cfg_checker.common.const import ubuntu_releases
from cfg_checker.common.file_utils import get_gzipped_file
from cfg_checker.common.settings import pkg_dir
from cfg_checker.helpers.console_utils import Progress
from cfg_checker.helpers.tgz import TGZFile

import requests
from requests.exceptions import ConnectionError

ext = ".json"


def get_tag_label(_tag, parsed=False):
    # prettify the tag for printing
    if parsed:
        _label = "+ "
    else:
        _label = "  "

    if _tag.endswith(".update"):
        _label += "[updates] " + _tag.rsplit('.', 1)[0]
    elif _tag.endswith(".hotfix"):
        _label += " [hotfix] " + _tag.rsplit('.', 1)[0]
    else:
        _label += " "*10 + _tag

    return _label


def _get_value_index(_di, value, header=None):
    if header:
        if not filter(lambda i: _di[i]["header"] == header, _di):
            _index = str(len(_di.keys()) + 1)
            _di[_index] = {
                "header": header,
                "props": value
            }
        else:
            for _k, _v in _di.iteritems():
                if _v["header"] == header:
                    _index = _k

        return _index
    else:
        if not filter(lambda i: _di[i] == value, _di):
            _index = str(len(_di.keys()) + 1)
            _di[_index] = value
        else:
            for _k, _v in _di.iteritems():
                if _v == value:
                    _index = _k

        return _index


def _safe_load(_f, _a):
    if _f in _a.list_files():
        logger_cli.info(
            "# Loading '{}':'{}'".format(
                _a.basefile,
                _f
            )
        )
        return json.loads(_a.get_file(_f))
    else:
        return {}


def _n_url(url):
    if url[-1] == '/':
        return url
    else:
        return url + '/'


class ReposInfo(object):
    repos = []
    _repofile = os.path.join(pkg_dir, "versions", _repos_info_archive)

    @staticmethod
    def _ls_repo_page(url):
        # Yes, this is ugly. But it works ok for small HTMLs.
        _a = "<a"
        _s = "href="
        _e = "\">"
        try:
            page = requests.get(url, timeout=60)
        except ConnectionError as e:
            logger_cli.error("# ERROR: {}".format(e.message))
            return [], []
        a = page.text.splitlines()
        # Comprehension for dirs. Anchors for ends with '-'
        _dirs = [l[l.index(_s)+6:l.index(_e)-1]
                 for l in a if l.startswith(_a) and l.endswith('-')]
        # Comprehension for files. Anchors ends with size
        _files = [l[l.index(_s)+6:l.index(_e)]
                  for l in a if l.startswith(_a) and not l.endswith('-')]

        return _dirs, _files

    def search_pkg(self, url, _list):
        # recoursive method to walk dists tree
        _dirs, _files = self._ls_repo_page(url)

        for _d in _dirs:
            # Search only in dists, ignore the rest
            if "dists" not in url and _d != "dists":
                continue
            _u = _n_url(url + _d)
            self.search_pkg(_u, _list)

        for _f in _files:
            if _f == "Packages.gz":
                _list.append(url + _f)
                logger.debug("... [F] '{}'".format(url + _f))

        return _list

    @staticmethod
    def _map_repo(_path_list, _r):
        for _pkg_path in _path_list:
            _l = _pkg_path.split('/')
            _kw = _l[_l.index('dists')+1:]
            _kw.reverse()
            _repo_item = {
                "arch": _kw[1][7:] if "binary" in _kw[1] else _kw[1],
                "type": _kw[2],
                "ubuntu-release": _kw[3],
                "filepath": _pkg_path
            }
            _r.append(_repo_item)

    def _find_tag(self, _t, _u, label=""):
        if label:
            _url = _n_url(_u + label)
            _label = _t + '.' + label
        else:
            _url = _u
            _label = _t
        _ts, _ = self._ls_repo_page(_url)
        if _t in _ts:
            logger.debug(
                "... found tag '{}' at '{}'".format(
                    _t,
                    _url
                )
            )
            return {
                _label: {
                    "baseurl": _n_url(_url + _t),
                    "all": {}
                }
            }
        else:
            return {}

    def fetch_repos(self, url, tag=None):
        base_url = _n_url(url)
        logger_cli.info("# Using '{}' as a repos source".format(base_url))

        logger_cli.info("# Gathering repos info (i.e. links to 'packages.gz')")
        # init repoinfo archive
        _repotgz = TGZFile(self._repofile)
        # prepare repo links
        _repos = {}
        if tag:
            # only one tag to process
            _repos.update(self._find_tag(tag, base_url))
            _repos.update(self._find_tag(tag, base_url, label="hotfix"))
            _repos.update(self._find_tag(tag, base_url, label="update"))
        else:
            # gather all of them
            _tags, _ = self._ls_repo_page(base_url)
            _tags.remove('hotfix')
            _tags.remove('update')
            # search tags in subfolders
            _h_tags, _ = self._ls_repo_page(base_url + 'hotfix')
            _u_tags, _ = self._ls_repo_page(base_url + 'update')
            _tags.extend([t for t in _h_tags if t not in _tags])
            _tags.extend([t for t in _u_tags if t not in _tags])
            _progress = Progress(len(_tags))
            _index = 0
            for _tag in _tags:
                _repos.update(self._find_tag(_tag, base_url))
                _repos.update(self._find_tag(_tag, base_url, label="hotfix"))
                _repos.update(self._find_tag(_tag, base_url, label="update"))
                _index += 1
                _progress.write_progress(_index)
            _progress.end()

        # parse subtags
        for _label in _repos.keys():
            logger_cli.info("-> processing tag '{}'".format(_label))
            _name = _label + ".json"
            if _repotgz.has_file(_name):
                logger_cli.info(
                    "-> skipping, '{}' already has '{}'".format(
                        _repos_info_archive,
                        _name
                    )
                )
                continue
            # process the tag
            _repo = _repos[_label]
            _baseurl = _repos[_label]["baseurl"]
            # get the subtags
            _sub_tags, _ = self._ls_repo_page(_baseurl)
            _total_index = len(_sub_tags)
            _index = 0
            _progress = Progress(_total_index)
            logger.debug(
                "... found {} subtags for '{}'".format(
                    len(_sub_tags),
                    _label
                )
            )
            # save the url and start search
            for _stag in _sub_tags:
                _u = _baseurl + _stag
                _index += 1
                logger.debug(
                    "... searching repos in '{}/{}'".format(
                        _label,
                        _stag
                    )
                )

                # Searching Package collections
                if _stag in ubuntu_releases:
                    # if stag is the release, this is all packages
                    _repo["all"][_stag] = []
                    _repo["all"]["url"] = _n_url(_u)
                    _path_list = self.search_pkg(_n_url(_u), [])
                    self._map_repo(_path_list, _repo["all"][_stag])
                    logger.info(
                        "-> found {} dists".format(
                            len(_repo["all"][_stag])
                        )
                    )

                else:
                    # each subtag might have any ubuntu release
                    # so iterate them
                    _repo[_stag] = {
                        "url": _n_url(_u)
                    }
                    _releases, _ = self._ls_repo_page(_n_url(_u))
                    for _rel in _releases:
                        if _rel not in ubuntu_releases:
                            logger.debug(
                                "... skipped unknown ubuntu release: "
                                "'{}' in '{}'".format(
                                    _rel,
                                    _u
                                )
                            )
                        else:
                            _rel_u = _n_url(_u) + _rel
                            _repo[_stag][_rel] = []
                            _path_list = self.search_pkg(_n_url(_rel_u), [])
                            self._map_repo(
                                _path_list,
                                _repo[_stag][_rel]
                            )
                            logger.info(
                                "-> found {} dists for '{}'".format(
                                    len(_repo[_stag][_rel]),
                                    _rel
                                )
                            )
                _progress.write_progress(_index)

            _progress.end()
            _name = _label + ext
            _repotgz.add_file(_name, buf=json.dumps(_repo, indent=2))
            logger_cli.info(
                "-> archive '{}' updated with '{}'".format(
                    self._repofile,
                    _name
                )
            )

        return

    def list_tags(self, splitted=False):
        _files = TGZFile(self._repofile).list_files()
        # all files in archive with no '.json' part
        _all = set([f.rsplit('.', 1)[0] for f in _files])
        if splitted:
            # files that ends with '.update'
            _updates = set([f for f in _all if f.find('update') >= 0])
            # files that ends with '.hotfix'
            _hotfix = set([f for f in _all if f.find('hotfix') >= 0])
            # remove updates and hotfix tags from all. The true magic of SETs
            _all = _all - _updates - _hotfix
            # cut updates and hotfix endings
            _updates = [f.rsplit('.', 1)[0] for f in _updates]
            _hotfix = [f.rsplit('.', 1)[0] for f in _hotfix]

            return _all, _updates, _hotfix
        else:
            # dynamic import
            import re
            _all = list(_all)
            # lexical tags
            _lex = [s for s in _all if not s[0].isdigit()]
            _lex.sort()
            # tags with digits
            _dig = [s for s in _all if s[0].isdigit()]
            _dig = sorted(
                _dig,
                key=lambda x: tuple(int(i) for i in re.findall('\\d+', x)[:3])
            )

            return _dig + _lex

    def get_repoinfo(self, tag):
        _tgz = TGZFile(self._repofile)
        _buf = _tgz.get_file(tag + ext)
        return json.loads(_buf)


class RepoManager(object):
    # archives
    _versions_arch = os.path.join(pkg_dir, "versions", _repos_versions_archive)
    _desc_arch = os.path.join(pkg_dir, "versions", _pkg_desc_archive)

    # repository index
    _repo_index = {}
    _mainteiners_index = {}

    # init package versions storage
    _versions_mirantis = {}
    _versions_other = {}

    def __init__(self):
        # Init version files
        self.versionstgz = TGZFile(
            self._versions_arch,
            label="MCP Configuration Checker: Package versions archive"
        )
        self.desctgz = TGZFile(
            self._desc_arch,
            label="MCP Configuration Checker: Package descriptions archive"
        )
        # indices
        self._repo_index = _safe_load(
            _repos_index_filename,
            self.versionstgz
        )
        self._mainteiners_index = _safe_load(
            _mainteiners_index_filename,
            self.versionstgz
        )

        # versions
        self._versions_mirantis = _safe_load(
            _mirantis_versions_filename,
            self.versionstgz
        )
        self._versions_other = _safe_load(
            _other_versions_filename,
            self.versionstgz
        )

    def _create_repo_header(self, p):
        _header = "_".join([
            p['tag'],
            p['subset'],
            p['release'],
            p['ubuntu-release'],
            p['type'],
            p['arch']
        ])
        return _get_value_index(self._repo_index, p, header=_header)

    def _get_indexed_values(self, pair):
        _h, _m = pair.split('-')
        return self._repo_index[_h], self._mainteiners_index[_m]

    def _update_pkg_version(self, _d, n, v, md5, h_index, m_index):
        """Method updates package version record in global dict
        """
        # 'if'*4 operation is pretty expensive when using it 100k in a row
        # so try/except is a better way to go, even faster than 'reduce'
        _pair = "-".join([h_index, m_index])
        try:
            # try to load list
            _list = _d[n][v][md5]
            # cast it as set() and union()
            _list = set(_list).union([_pair])
            # cast back as set() is not serializeable
            _d[n][v][md5] = list(_list)
            return False
        except KeyError:
            # ok, this is fresh pkg. Do it slow way.
            if n in _d:
                # there is such pkg already
                if v in _d[n]:
                    # there is such version, check md5
                    if md5 in _d[n][v]:
                        # just add new repo header
                        if _pair not in _d[n][v][md5]:
                            _d[n][v][md5].append(_pair)
                    else:
                        # check if such index is here...
                        _existing = filter(
                            lambda i: _pair in _d[n][v][i],
                            _d[n][v]
                        )
                        if _existing:
                            # Yuck! Same version had different MD5
                            _r, _m = self._get_indexed_values(_pair)
                            logger_cli.error(
                                "# ERROR: Package version has multiple MD5s "
                                "in '{}': {}:{}:{}".format(
                                    _r,
                                    n,
                                    v,
                                    md5
                                )
                            )
                        _d[n][v][md5] = [_pair]
                else:
                    # this is new version for existing package
                    _d[n][v] = {
                        md5: [_pair]
                    }
                return False
            else:
                # this is new pakcage
                _d[n] = {
                    v: {
                        md5: [_pair]
                    }
                }
                return True

    def _save_repo_descriptions(self, repo_props, desc):
        # form the filename for the repo and save it
        self.desctgz.add_file(
            self._create_repo_header(repo_props),
            json.dumps(desc)
        )

    # def get_description(self, repo_props, name, md5=None):
    #     """Gets target description
    #     """
    #     _filename = self._create_repo_header(repo_props)
    #     # check if it is present in cache
    #     if _filename in self._desc_cache:
    #         _descs = self._desc_cache[_filename]
    #     else:
    #         # load data
    #         _descs = self.desctgz.get_file(_filename)
    #         # Serialize it
    #         _descs = json.loads(_descs)
    #         self._desc_cache[_filename] = _descs
    #     # return target desc
    #     if name in _descs and md5 in _descs[name]:
    #         return _descs[name][md5]
    #     else:
    #         return None

    def parse_tag(self, tag, descriptions=False):
        """Download and parse Package.gz files for specific tag
        By default, descriptions not saved
        due to huge resulting file size and slow processing
        """
        # init gzip and downloader
        _info = ReposInfo().get_repoinfo(tag)
        # calculate Packages.gz files to process
        _baseurl = _info.pop("baseurl")
        _total_components = len(_info.keys()) - 1
        _ubuntu_package_repos = 0
        _other_repos = 0
        for _c, _d in _info.iteritems():
            for _ur, _l in _d.iteritems():
                if _ur in ubuntu_releases:
                    _ubuntu_package_repos += len(_l)
                elif _ur != 'url':
                    _other_repos += len(_l)
        logger_cli.info(
            "-> loaded repository info for '{}'.\n"
            "  '{}', {} components, {} ubuntu repos, {} other/uknown".format(
                _baseurl,
                tag,
                _total_components,
                _ubuntu_package_repos,
                _other_repos
            )
        )
        # init progress bar
        _progress = Progress(_ubuntu_package_repos)
        _index = 0
        _processed = 0
        _new = 0
        for _c, _d in _info.iteritems():
            # we do not need url here, just get rid of it
            if 'url' in _d:
                _d.pop('url')
            # _url =  if 'url' in _d else _baseurl + _c
            for _ur, _l in _d.iteritems():
                # iterate package collections
                for _p in _l:
                    # descriptions
                    if descriptions:
                        _descriptions = {}
                    # download and unzip
                    _progress.write_progress(
                        _index,
                        note="/ {} {} {} {} {}, {}/{}".format(
                            _c,
                            _ur,
                            _p['ubuntu-release'],
                            _p['type'],
                            _p['arch'],
                            _processed,
                            _new
                        )
                    )
                    _raw = get_gzipped_file(_p['filepath'])
                    _lines = _raw.splitlines()
                    _index += 1
                    # break lines collection into isolated pkg data
                    _pkg = {
                        "tag": tag,
                        "subset": _c,
                        "release": _ur
                    }
                    _pkg.update(_p)
                    _desc = {}
                    _key = _value = ""
                    for _line in _lines:
                        if not _line:
                            # if the line is empty, process pkg data gathered
                            _name = _desc['package']
                            _md5 = _desc['md5sum']
                            _version = _desc['version']
                            _mainteiner = _desc['maintainer']

                            # Check is mainteiner is Mirantis
                            if _mainteiner.endswith("@mirantis.com>"):
                                # update mirantis versions
                                if self._update_pkg_version(
                                    self._versions_mirantis,
                                    _name,
                                    _version,
                                    _md5,
                                    self._create_repo_header(_pkg),
                                    _get_value_index(
                                        self._mainteiners_index,
                                        _mainteiner
                                    )
                                ):
                                    _new += 1
                            else:
                                # update other versions
                                if self._update_pkg_version(
                                    self._versions_other,
                                    _name,
                                    _version,
                                    _md5,
                                    self._create_repo_header(_pkg),
                                    _get_value_index(
                                        self._mainteiners_index,
                                        _mainteiner
                                    )
                                ):
                                    _new += 1

                            if descriptions:
                                _d_new = {
                                    _md5: deepcopy(_desc)
                                }
                                try:
                                    _descriptions[_name].update(_d_new)
                                except KeyError:
                                    _descriptions[_name] = _d_new
                            # clear the data for next pkg
                            _processed += 1
                            _desc = {}
                            _key = ""
                            _value = ""
                        elif _line.startswith(' '):
                            _desc[_key] += "\n{}".format(_line)
                        else:
                            _key, _value = _line.split(': ', 1)
                            _key = _key.lower()

                            _desc[_key] = _value
                    # save descriptions if needed
                    if descriptions:
                        _progress.clearline()
                        self._save_repo_descriptions(_pkg, _descriptions)

        _progress.end()
        # backup headers to disk
        self.versionstgz.add_file(
            _repos_index_filename,
            json.dumps(self._repo_index),
            replace=True
        )
        self.versionstgz.add_file(
            _mainteiners_index_filename,
            json.dumps(self._mainteiners_index),
            replace=True
        )
        return

    def fetch_versions(self, tag, descriptions=False):
        """Executes parsing for specific tag
        """
        if descriptions:
            logger_cli.warning(
                "\n\n# !!! WARNING: Saving repo descriptions "
                "consumes huge amount of disk space\n\n"
            )
        # if there is no such tag, parse it from repoinfo
        logger_cli.info("# Fetching versions for {}".format(tag))
        self.parse_tag(tag, descriptions=descriptions)
        logger_cli.info("-> saving updated versions")
        self.versionstgz.add_file(
            _mirantis_versions_filename,
            json.dumps(self._versions_mirantis),
            replace=True
        )
        self.versionstgz.add_file(
            _other_versions_filename,
            json.dumps(self._versions_other),
            replace=True
        )

    def build_repos(self, url, tag=None):
        """Builds versions data for selected tag, or for all of them
        """
        # Init the ReposInfo class and check if all files are present
        _repos = ReposInfo()
        # recoursively walk the mirrors
        # and gather all of the repos for 'tag' or all of the tags
        _repos.fetch_repos(url, tag=tag)

    def _build_action(self, url, tags):
        for t in tags:
            logger_cli.info(
                "# Building repo info for '{}/{}'".format(
                    url,
                    t
                )
            )
            self.build_repos(url, tag=t)

    def action_for_tag(
        self,
        url,
        tag,
        action=None,
        descriptions=None
    ):
        """Executes action for every tag from all collections
        """
        if not action:
            logger_cli.info("# No action set, nothing to do")
        # See if this is a list action
        if action == "list":
            _all = ReposInfo().list_tags()
            # Print pretty list and exit
            logger_cli.info("# Tags available at '{}':".format(url))
            for t in _all:
                _ri = self._repo_index
                _isparsed = any(
                    [k for k, v in _ri.iteritems() if v['props']['tag'] == t]
                )
                if _isparsed:
                    logger_cli.info(get_tag_label(t, parsed=True))
                else:
                    logger_cli.info(get_tag_label(t))
            # exit
            return

        # Pupulate action tags
        major, updates, hotfix = ReposInfo().list_tags(splitted=True)
        _action_tags = []
        if tag in major:
            _action_tags.append(tag)
        if tag in updates:
            _action_tags.append(tag + ".update")
        if tag in hotfix:
            _action_tags.append(tag + ".hotfix")
        # Check if any tags collected
        if not _action_tags:
            logger_cli.info(
                "# Tag of '{}' not found. "
                "Consider rebuilding repos info.".format(tag)
            )
        else:
            logger_cli.info(
                "-> tags to process: {}".format(
                    ", ".join(_action_tags)
                )
            )
        # Execute actions
        if action == "build":
            self._build_action(url, _action_tags)
        elif action == "fetch":
            for t in _action_tags:
                self.fetch_versions(t, descriptions=descriptions)

        logger_cli.info("# Done.")

    def show_package(self, name):
        # get the package data
        _p = self.get_package_versions(name)
        if not _p:
            logger_cli.warning(
                "# WARNING: Package '{}' not found".format(name)
            )
        else:
            # print package info using sorted tags from headers
            # Package: name
            # [u/h] tag \t <version>
            #           \t <version>
            # <10symbols> \t <md5> \t sorted headers with no tag
            # ...
            logger_cli.info("\n# Package: {}".format(name))
            _o = ""
            # get and sort tags
            _vs = _p.keys()
            _vs.sort()
            for _v in _vs:
                _o += "\n" + " "*8 + _v + ':\n'
                # get and sort tags
                _mds = _p[_v].keys()
                _mds.sort()
                for _md5 in _mds:
                    _o += " "*16 + _md5 + "\n"
                    # get and sort repo headers
                    _rr = _p[_v][_md5].keys()
                    _rr.sort()
                    for _r in _rr:
                        _o += " "*24 + _r.replace('_', ' ')
                        _o += " ({})\n".format(_p[_v][_md5][_r]["mainteiner"])

            logger_cli.info(_o)

    def get_package_versions(self, name, mirantis=True, tagged=False):
        """Method builds package version structure
        with repository properties included
        """
        # get data
        if mirantis and name in self._versions_mirantis:
            _vs = self._versions_mirantis[name]
        elif not mirantis and name in self._versions_other:
            _vs = self._versions_other[name]
        else:
            return {}

        # insert repo data, insert props into headers place
        _package = {}
        if tagged:
            for _v, _d1 in _vs.iteritems():
                # use tag as a next step
                for _md5, _indices in _d1.iteritems():
                    for _pair in _indices:
                        # extract props for a repo
                        _r, _m = self._get_indexed_values(_pair)
                        # get tag
                        _tag = _r["props"]["tag"]
                        # cut tag from the header
                        _cut_head = _r["header"].split("_", 1)[1]
                        # populate dict
                        nested_set(
                            _package,
                            [_tag, _v, _cut_head, _md5],
                            {
                                "repo": _r["props"],
                                "mainteiner": _m
                            }
                        )
        else:
            for _v, _d1 in _vs.iteritems():
                for _md5, _indices in _d1.iteritems():
                    for _pair in _indices:
                        _r, _m = self._get_indexed_values(_pair)
                        nested_set(
                            _package,
                            [_v, _md5, _r["header"]],
                            {
                                "repo": _r["props"],
                                "mainteiner": _m
                            }
                        )

        return _package

    def parse_repos(self):
        # all tags to check
        major, updates, hotfix = ReposInfo().list_tags(splitted=True)

        # major tags
        logger_cli.info("# Processing major tags")
        for _tag in major:
            self.fetch_versions(_tag)

        # updates tags
        logger_cli.info("# Processing update tags")
        for _tag in updates:
            self.fetch_versions(_tag + ".update")

        # hotfix tags
        logger_cli.info("# Processing hotfix tags")
        for _tag in hotfix:
            self.fetch_versions(_tag + ".hotfix")
