Package report/repo parser integration

 - parser able to filter package versions using keywords
 - warning message on missing tag
 - on the fly versions lookup (excluding '*.hotfix')
 - updated versions compare routine
 - lexical compare uses numbers, not ordinal values
 - updated release version detection
 - final report lists pkg section/app if no description given
 - final report shows repo info for detected release version

Fixes:
 - shorter alternate entrpoints: mcp-pkg, mcp-net, cmp-reclass
 - flake8 syntax
 - proper mirantis/non-mirantis versions getting
 - exit on unexpected arguments
 - salt-master class now gets linux codename by default and architecture

Change-Id: I0a2daadca8a1acaecafc8680226dc00d20cc24ce
Related-PROD: PROD-28199
diff --git a/cfg_checker/modules/packages/repos.py b/cfg_checker/modules/packages/repos.py
index ae662de..e7c4c6a 100644
--- a/cfg_checker/modules/packages/repos.py
+++ b/cfg_checker/modules/packages/repos.py
@@ -1,5 +1,6 @@
 import json
 import os
+import re
 from copy import deepcopy
 
 from cfg_checker.common import logger, logger_cli, nested_set
@@ -71,8 +72,8 @@
 
 def _safe_load(_f, _a):
     if _f in _a.list_files():
-        logger_cli.info(
-            "# Loading '{}':'{}'".format(
+        logger_cli.debug(
+            "... loading '{}':'{}'".format(
                 _a.basefile,
                 _f
             )
@@ -325,7 +326,7 @@
             _dig = [s for s in _all if s[0].isdigit()]
             _dig = sorted(
                 _dig,
-                key=lambda x: tuple(int(i) for i in re.findall('\\d+', x)[:3])
+                key=lambda x: tuple(int(i) for i in re.findall(r"\d+", x)[:3])
             )
 
             return _dig + _lex
@@ -340,11 +341,14 @@
     # archives
     _versions_arch = os.path.join(pkg_dir, "versions", _repos_versions_archive)
     _desc_arch = os.path.join(pkg_dir, "versions", _pkg_desc_archive)
+    _apps_filename = "apps.json"
 
     # repository index
     _repo_index = {}
     _mainteiners_index = {}
 
+    _apps = {}
+
     # init package versions storage
     _versions_mirantis = {}
     _versions_other = {}
@@ -359,6 +363,13 @@
             self._desc_arch,
             label="MCP Configuration Checker: Package descriptions archive"
         )
+
+        # section / app
+        self._apps = _safe_load(
+            self._apps_filename,
+            self.desctgz
+        )
+
         # indices
         self._repo_index = _safe_load(
             _repos_index_filename,
@@ -394,19 +405,24 @@
         _h, _m = pair.split('-')
         return self._repo_index[_h], self._mainteiners_index[_m]
 
-    def _update_pkg_version(self, _d, n, v, md5, h_index, m_index):
+    def _update_pkg_version(self, _d, n, v, md5, s, a, h_index, m_index):
         """Method updates package version record in global dict
         """
         # 'if'*4 operation is pretty expensive when using it 100k in a row
         # so try/except is a better way to go, even faster than 'reduce'
         _pair = "-".join([h_index, m_index])
+        _info = {
+            'repo': [_pair],
+            'section': s,
+            'app': a
+        }
         try:
             # try to load list
-            _list = _d[n][v][md5]
+            _list = _d[n][v][md5]['repo']
             # cast it as set() and union()
             _list = set(_list).union([_pair])
             # cast back as set() is not serializeable
-            _d[n][v][md5] = list(_list)
+            _d[n][v][md5]['repo'] = list(_list)
             return False
         except KeyError:
             # ok, this is fresh pkg. Do it slow way.
@@ -416,12 +432,12 @@
                     # there is such version, check md5
                     if md5 in _d[n][v]:
                         # just add new repo header
-                        if _pair not in _d[n][v][md5]:
-                            _d[n][v][md5].append(_pair)
+                        if _pair not in _d[n][v][md5]['repo']:
+                            _d[n][v][md5]['repo'].append(_pair)
                     else:
                         # check if such index is here...
                         _existing = filter(
-                            lambda i: _pair in _d[n][v][i],
+                            lambda i: _pair in _d[n][v][i]['repo'],
                             _d[n][v]
                         )
                         if _existing:
@@ -436,18 +452,18 @@
                                     md5
                                 )
                             )
-                        _d[n][v][md5] = [_pair]
+                        _d[n][v][md5] = _info
                 else:
                     # this is new version for existing package
                     _d[n][v] = {
-                        md5: [_pair]
+                        md5: _info
                     }
                 return False
             else:
                 # this is new pakcage
                 _d[n] = {
                     v: {
-                        md5: [_pair]
+                        md5: _info
                     }
                 }
                 return True
@@ -478,7 +494,7 @@
     #     else:
     #         return None
 
-    def parse_tag(self, tag, descriptions=False):
+    def parse_tag(self, tag, descriptions=False, apps=False):
         """Download and parse Package.gz files for specific tag
         By default, descriptions not saved
         due to huge resulting file size and slow processing
@@ -523,6 +539,27 @@
                     if descriptions:
                         _descriptions = {}
                     # download and unzip
+                    _index += 1
+                    _progress.write_progress(
+                        _index,
+                        note="/ {} {} {} {} {}, GET 'Packages.gz'".format(
+                            _c,
+                            _ur,
+                            _p['ubuntu-release'],
+                            _p['type'],
+                            _p['arch']
+                        )
+                    )
+                    _raw = get_gzipped_file(_p['filepath'])
+                    if not _raw:
+                        # empty repo...
+                        _progress.clearline()
+                        logger_cli.warning(
+                            "# WARNING: Empty file: '{}'".format(
+                                _p['filepath']
+                            )
+                        )
+                        continue
                     _progress.write_progress(
                         _index,
                         note="/ {} {} {} {} {}, {}/{}".format(
@@ -535,9 +572,7 @@
                             _new
                         )
                     )
-                    _raw = get_gzipped_file(_p['filepath'])
                     _lines = _raw.splitlines()
-                    _index += 1
                     # break lines collection into isolated pkg data
                     _pkg = {
                         "tag": tag,
@@ -547,6 +582,10 @@
                     _pkg.update(_p)
                     _desc = {}
                     _key = _value = ""
+                    # if there is no empty line at end, add it
+                    if _lines[-1] != '':
+                        _lines.append('')
+                    # Process lines
                     for _line in _lines:
                         if not _line:
                             # if the line is empty, process pkg data gathered
@@ -555,6 +594,30 @@
                             _version = _desc['version']
                             _mainteiner = _desc['maintainer']
 
+                            if 'source' in _desc:
+                                _ap = _desc['source'].lower()
+                            else:
+                                _ap = "-"
+
+                            if apps:
+                                # insert app
+                                _sc = _desc['section'].lower()
+                                if 'source' in _desc:
+                                    _ap = _desc['source'].lower()
+                                else:
+                                    _ap = "-"
+
+                                try:
+                                    _tmp = set(self._apps[_sc][_ap][_name])
+                                    _tmp.add(_desc['architecture'])
+                                    self._apps[_sc][_ap][_name] = list(_tmp)
+                                except KeyError:
+                                    nested_set(
+                                        self._apps,
+                                        [_sc, _ap, _name],
+                                        [_desc['architecture']]
+                                    )
+
                             # Check is mainteiner is Mirantis
                             if _mainteiner.endswith("@mirantis.com>"):
                                 # update mirantis versions
@@ -563,6 +626,8 @@
                                     _name,
                                     _version,
                                     _md5,
+                                    _desc['section'].lower(),
+                                    _ap,
                                     self._create_repo_header(_pkg),
                                     _get_value_index(
                                         self._mainteiners_index,
@@ -577,6 +642,8 @@
                                     _name,
                                     _version,
                                     _md5,
+                                    _desc['section'].lower(),
+                                    _ap,
                                     self._create_repo_header(_pkg),
                                     _get_value_index(
                                         self._mainteiners_index,
@@ -622,9 +689,16 @@
             json.dumps(self._mainteiners_index),
             replace=True
         )
+        if apps:
+            self.desctgz.add_file(
+                self._apps_filename,
+                json.dumps(self._apps),
+                replace=True
+            )
+
         return
 
-    def fetch_versions(self, tag, descriptions=False):
+    def fetch_versions(self, tag, descriptions=False, apps=False):
         """Executes parsing for specific tag
         """
         if descriptions:
@@ -634,7 +708,7 @@
             )
         # if there is no such tag, parse it from repoinfo
         logger_cli.info("# Fetching versions for {}".format(tag))
-        self.parse_tag(tag, descriptions=descriptions)
+        self.parse_tag(tag, descriptions=descriptions, apps=apps)
         logger_cli.info("-> saving updated versions")
         self.versionstgz.add_file(
             _mirantis_versions_filename,
@@ -666,12 +740,27 @@
             )
             self.build_repos(url, tag=t)
 
+    def get_available_tags(self, tag=None):
+        # Populate action tags
+        major, updates, hotfix = ReposInfo().list_tags(splitted=True)
+
+        _tags = []
+        if tag in major:
+            _tags.append(tag)
+        if tag in updates:
+            _tags.append(tag + ".update")
+        if tag in hotfix:
+            _tags.append(tag + ".hotfix")
+
+        return _tags
+
     def action_for_tag(
         self,
         url,
         tag,
         action=None,
-        descriptions=None
+        descriptions=None,
+        apps=None
     ):
         """Executes action for every tag from all collections
         """
@@ -694,16 +783,9 @@
             # exit
             return
 
-        # Pupulate action tags
-        major, updates, hotfix = ReposInfo().list_tags(splitted=True)
-        _action_tags = []
-        if tag in major:
-            _action_tags.append(tag)
-        if tag in updates:
-            _action_tags.append(tag + ".update")
-        if tag in hotfix:
-            _action_tags.append(tag + ".hotfix")
-        # Check if any tags collected
+        # Populate action tags
+        _action_tags = self.get_available_tags(tag)
+
         if not _action_tags:
             logger_cli.info(
                 "# Tag of '{}' not found. "
@@ -720,7 +802,7 @@
             self._build_action(url, _action_tags)
         elif action == "fetch":
             for t in _action_tags:
-                self.fetch_versions(t, descriptions=descriptions)
+                self.fetch_versions(t, descriptions=descriptions, apps=apps)
 
         logger_cli.info("# Done.")
 
@@ -738,46 +820,214 @@
             #           \t <version>
             # <10symbols> \t <md5> \t sorted headers with no tag
             # ...
-            logger_cli.info("\n# Package: {}".format(name))
+            # section
             _o = ""
-            # get and sort tags
-            _vs = _p.keys()
-            _vs.sort()
-            for _v in _vs:
-                _o += "\n" + " "*8 + _v + ':\n'
-                # get and sort tags
-                _mds = _p[_v].keys()
-                _mds.sort()
-                for _md5 in _mds:
-                    _o += " "*16 + _md5 + "\n"
-                    # get and sort repo headers
-                    _rr = _p[_v][_md5].keys()
-                    _rr.sort()
-                    for _r in _rr:
-                        _o += " "*24 + _r.replace('_', ' ')
-                        _o += " ({})\n".format(_p[_v][_md5][_r]["mainteiner"])
+
+            _ss = _p.keys()
+            _ss.sort()
+            for _s in _ss:
+                _apps = _p[_s].keys()
+                _apps.sort()
+                # app
+                for _a in _apps:
+                    logger_cli.info(
+                        "\n# Package: {}/{}/{}".format(_s, _a, name)
+                    )
+                    # get and sort tags
+                    _vs = _p[_s][_a].keys()
+                    _vs.sort()
+                    for _v in _vs:
+                        _o += "\n" + " "*8 + _v + ':\n'
+                        # get and sort tags
+                        _mds = _p[_s][_a][_v].keys()
+                        _mds.sort()
+                        for _md5 in _mds:
+                            _o += " "*16 + _md5 + "\n"
+                            # get and sort repo headers
+                            _rr = _p[_s][_a][_v][_md5].keys()
+                            _rr.sort()
+                            for _r in _rr:
+                                _o += " "*24 + _r.replace('_', ' ')
+                                _o += " ({})\n".format(
+                                    _p[_s][_a][_v][_md5][_r]["maintainer"]
+                                )
 
             logger_cli.info(_o)
 
-    def get_package_versions(self, name, mirantis=True, tagged=False):
+    @staticmethod
+    def get_apps(versions, name):
+        _all = True if name == '*' else False
+        _s_max = 0
+        _a_max = 0
+        _rows = []
+        for _p in versions.keys():
+            _vs = versions[_p]
+            for _v, _d1 in _vs.iteritems():
+                for _md5, _info in _d1.iteritems():
+                    if _all or name == _info['app']:
+                        _s_max = max(len(_info['section']), _s_max)
+                        _a_max = max(len(_info['app']), _a_max)
+                        _rows.append([
+                            _info['section'],
+                            _info['app'],
+                            _p
+                        ])
+        _fmt = "{:"+str(_s_max)+"} {:"+str(_a_max)+"} {}"
+        _rows = [_fmt.format(s, a, p) for s, a, p in _rows]
+        _rows.sort()
+        return _rows
+
+    def show_app(self, name):
+        c = 0
+        rows = self.get_apps(self._versions_mirantis, name)
+        if rows:
+            logger_cli.info("# Mirantis packages for '{}'".format(name))
+            logger_cli.info("\n".join(rows))
+            c += 1
+        rows = self.get_apps(self._versions_other, name)
+        if rows:
+            logger_cli.info("# Other packages for '{}'".format(name))
+            logger_cli.info("\n".join(rows))
+            c += 1
+        if c == 0:
+            logger_cli.info("\n# No app found for '{}'".format(name))
+
+    def get_mirantis_pkg_names(self):
+        # Mirantis maintainers only
+        return set(
+            self._versions_mirantis.keys()
+        ) - set(
+            self._versions_other.keys()
+        )
+
+    def get_other_pkg_names(self):
+        # Non-mirantis Maintainers
+        return set(
+            self._versions_other.keys()
+        ) - set(
+            self._versions_mirantis.keys()
+        )
+
+    def get_mixed_pkg_names(self):
+        # Mixed maintainers
+        return set(
+            self._versions_mirantis.keys()
+        ).intersection(set(
+            self._versions_other.keys()
+        ))
+
+    def is_mirantis(self, name, tag=None):
+        """Method checks if this package is mainteined
+        by mirantis in target tag repo
+        """
+        if name in self._versions_mirantis:
+            # check tag
+            if tag:
+                _pkg = self.get_package_versions(
+                    name,
+                    tagged=True
+                )
+                _tags = []
+                for s in _pkg.keys():
+                    for a in _pkg[s].keys():
+                        for t in _pkg[s][a].keys():
+                            _tags.append(t)
+                if any([t.startswith(tag) for t in _tags]):
+                    return True
+                else:
+                    return None
+            else:
+                return True
+        elif name in self._versions_other:
+            # check tag
+            if tag:
+                _pkg = self.get_package_versions(
+                    name,
+                    tagged=True
+                )
+                _tags = []
+                for s in _pkg.keys():
+                    for a in _pkg[s].keys():
+                        for t in _pkg[s][a].keys():
+                            _tags.append(t)
+                if any([t.startswith(tag) for t in _tags]):
+                    return False
+                else:
+                    return None
+            else:
+                return False
+        else:
+            logger.error(
+                "# ERROR: package '{}' not found "
+                "while determining maintainer".format(
+                    name
+                )
+            )
+            return None
+
+    def get_filtered_versions(
+        self,
+        name,
+        tag=None,
+        include=None,
+        exclude=None
+    ):
+        """Method gets all the versions for the package
+        and filters them using keys above
+        """
+        if tag:
+            tag = unicode(tag) if not isinstance(tag, unicode) else tag
+        _out = {}
+        _vs = self.get_package_versions(name, tagged=True)
+        # iterate to filter out keywords
+        for s, apps in _vs.iteritems():
+            for a, _tt in apps.iteritems():
+                for t, vs in _tt.iteritems():
+                    # filter tags
+                    if tag and t != tag and t.rsplit('.', 1)[0] != tag:
+                        continue
+                    # Skip hotfix tag
+                    if t == tag + ".hotfix":
+                        continue
+                    for v, rp in vs.iteritems():
+                        for h, p in rp.iteritems():
+                            # filter headers with all keywords matching
+                            _h = re.split(r"[\-\_]+", h)
+                            _included = all([kw in _h for kw in include])
+                            _excluded = any([kw in _h for kw in exclude])
+                            if not _included or _excluded:
+                                continue
+                            else:
+                                nested_set(_out, [s, a, v], [])
+                                _dat = {
+                                    "header": h
+                                }
+                                _dat.update(p)
+                                _out[s][a][v].append(_dat)
+        return _out
+
+    def get_package_versions(self, name, tagged=False):
         """Method builds package version structure
         with repository properties included
         """
         # get data
-        if mirantis and name in self._versions_mirantis:
-            _vs = self._versions_mirantis[name]
-        elif not mirantis and name in self._versions_other:
-            _vs = self._versions_other[name]
-        else:
-            return {}
+        _vs = {}
+
+        if name in self._versions_mirantis:
+            _vs.update(self._versions_mirantis[name])
+        if name in self._versions_other:
+            _vs.update(self._versions_other[name])
 
         # insert repo data, insert props into headers place
         _package = {}
         if tagged:
             for _v, _d1 in _vs.iteritems():
                 # use tag as a next step
-                for _md5, _indices in _d1.iteritems():
-                    for _pair in _indices:
+                for _md5, _info in _d1.iteritems():
+                    _s = _info['section']
+                    _a = _info['app']
+                    for _pair in _info['repo']:
+                        _rp = {}
                         # extract props for a repo
                         _r, _m = self._get_indexed_values(_pair)
                         # get tag
@@ -785,26 +1035,27 @@
                         # cut tag from the header
                         _cut_head = _r["header"].split("_", 1)[1]
                         # populate dict
+                        _rp["maintainer"] = _m
+                        _rp["md5"] = _md5
+                        _rp.update(_r["props"])
                         nested_set(
                             _package,
-                            [_tag, _v, _cut_head, _md5],
-                            {
-                                "repo": _r["props"],
-                                "mainteiner": _m
-                            }
+                            [_s, _a, _tag, _v, _cut_head],
+                            _rp
                         )
         else:
             for _v, _d1 in _vs.iteritems():
-                for _md5, _indices in _d1.iteritems():
-                    for _pair in _indices:
+                for _md5, _info in _d1.iteritems():
+                    _s = _info['section']
+                    _a = _info['app']
+                    for _pair in _info['repo']:
                         _r, _m = self._get_indexed_values(_pair)
+                        _info["maintainer"] = _m
+                        _info.update(_r["props"])
                         nested_set(
                             _package,
-                            [_v, _md5, _r["header"]],
-                            {
-                                "repo": _r["props"],
-                                "mainteiner": _m
-                            }
+                            [_s, _a, _v, _md5, _r["header"]],
+                            _info
                         )
 
         return _package