New node generator added This commit introduces a draft of a node generator utility which is supposed to replace reclass.storage formula. While it does not really belong in reclass it appears the least problematic place to put it to, so it will stay here for the time being. PROD-26064 Change-Id: I4488330699fccf4d433b308dbf7a332b086d4703

commit: 7c70ce828a2b43879b06649ee5816d1ea0dee1cd [log] [tgz]
author: Alexey Ovchinnikov <aovchinnikov@mirantis.com> Wed Feb 06 11:18:03 2019 +0100
committer: Aleksey Zvyagintsev <azvyagintsev@mirantis.com> Tue Mar 05 21:31:18 2019 +0000
tree: b74d0435a188a420eb38d26035cbcdae965c4718
parent: e768c1a89617304ef31a41a6ed36082a0018a9a1 [diff]
diff --git a/.gitreview b/.gitreview
new file mode 100644
index 0000000..4b4246e
--- /dev/null
+++ b/.gitreview

@@ -0,0 +1,4 @@
+[gerrit]
+host=gerrit.mcp.mirantis.net
+port=29418
+project=packaging/sources/reclass

diff --git a/reclass/nodegenerator.py b/reclass/nodegenerator.py
new file mode 100644
index 0000000..e5d4153
--- /dev/null
+++ b/reclass/nodegenerator.py

@@ -0,0 +1,278 @@
+import argparse
+import sys
+import os
+import yaml
+from pprint import pprint as pp
+import re
+from collections import Iterable
+from functools import partial
+from itertools import chain
+import copy
+import ipaddress
+import yaml
+import collections
+
+##
+# A hack needed to save order of fields in output. The only reason it is here
+# for verbatim reproduction of a part of original reclass.storage formula.
+# It could be omitted, but the order of keys would become different.
+class UnsortableList(list):
+    def sort(self, *a, **k):
+        pass
+
+class UnsortableDict(collections.OrderedDict):
+    def items(self, *a, **k):
+        return UnsortableList(collections.OrderedDict.items(self, *a, **k))
+
+yaml.add_representer(UnsortableDict,
+                     yaml.representer.SafeRepresenter.represent_dict)
+# End hack
+##
+
+
+_param_reference = re.compile('\$\{_param\:.*?\}')
+_refernce_name = re.compile('\$\{_param\:(.*?)\}')
+_iprange = re.compile('<<(.*?)>>')
+
+
+def get_references(string):
+    """Exctracts all raw references from a string."""
+    return re.findall(_param_reference, string)
+
+
+def get_ref_names(string):
+    """Exctracts all reference values from a string."""
+    return re.findall(_refernce_name, string)
+
+
+def has_subst(value):
+    """Checks if an element has slot for substitution."""
+    # NOTE: the code does not address possible nested references so far since
+    # it is unclear if anyone actually uses them.
+    if type(value) is str:
+        return len(get_references(value)) > 0
+    elif isinstance(value, Iterable):
+        return any(has_subst(x) for x in value)
+    elif type(value) is dict:
+        return any(has_subst(k) or has_subst(v) for k, v in value.iteritems())
+    return False
+
+
+def do_substitution(entity, src, subst_stub=None):
+    """Attempts to substitute references in entity from src."""
+    subster = partial(do_substitution, src=src)
+    if type(entity) is str:
+        lookfor = zip(get_ref_names(entity), get_references(entity))
+        result = entity
+        for subst, ref in lookfor:
+            default_subst = subst if subst_stub is None else subst_stub
+            result = result.replace(ref, str(src.get(subst, default_subst)))
+        return result
+    elif type(entity) is list:
+        return [subster(x) for x in entity]
+    elif type(entity) is dict:
+        return dict((subster(k), subster(v)) for k, v in entity.iteritems())
+    return entity
+
+
+# Parameters as they arrive after collection can easily contain references, but
+# fortunately most of them could be resolved using the very same dictionary:
+# since it is a glob of paramteres from different parts of a cluster model it
+# can contain them. Thus it is worthwhile to try and expand parameters.
+def expand_params(source):
+    """Expands parameter dict in case parameters contain references."""
+    result = do_substitution(source, source)
+    if has_subst(result):
+        result = expand_params(result)
+    return result
+
+
+# TODO: remove side effects.
+def update_dict(x, y):
+    def is_substitute(foo):
+        return type(foo) is str and foo.startswith('${_param:')
+    for key, value in y.iteritems():
+        if key in x and x[key] != value:
+            if not is_substitute(x[key]) and is_substitute(value):
+                continue
+            if is_substitute(x[key]) and is_substitute(value):
+                continue
+            # TODO: logging.
+            print "WARNING! overwriting key", key, ':', x[key], ' -> ', value
+        x[key] = value
+
+
+def get_substitution_parameters(source):
+    result = {}
+    exps = [x.get('parameters', {}).get('_param', {}) for x in source]
+    for expansion in exps:
+        update_dict(result, expansion)
+    return result
+
+
+def get_configs(base, cluster):
+    configs_to_process = []
+    out = []
+    cluster_def = os.path.join(base, cluster)
+    for rootdir, _, files in os.walk(cluster_def):
+        for fname in files:
+            if fname == 'init.yml':
+                configs_to_process.append(os.path.join(rootdir, fname))
+            elif fname == 'nodes.yml':  # TODO: refactor lumping.
+                nodes_definition = os.path.join(rootdir, fname)
+    for config in configs_to_process:
+        with open(config, 'r') as f:
+            out.append(yaml.load(f))
+    return out, nodes_definition
+
+
+def get_basic_node_defenitions(cfgs, params, basename):
+    def fixname(x):
+        return os.path.join(basename, x.replace('.', '/') + '.yml')
+    out = {}
+    storage_classnames = []
+    for cfg in cfgs:
+        storage_classnames.extend(x for x in cfg.get('classes', [])
+                                  if x.find('storage.system') != -1)
+    all_nodes_names = []
+    for x in [fixname(x) for x in storage_classnames]:
+        with open(x, 'r') as f:
+            data = yaml.load(f)
+        update_dict(params, data['parameters']['_param'])
+        node_content = data['parameters']['reclass']['storage']['node']
+        for nodename, nodecontent in node_content.iteritems():
+            if nodename not in all_nodes_names:
+                all_nodes_names.append(nodename)
+                out[nodename] = nodecontent
+            else:
+                print "Node duplicate detected:", node
+                sys.exit(1)
+    return out
+
+
+def dump_to_files(content, dest):
+    files_to_generate = []
+    for res in content:
+        tt = ".".join([res['name'], res['domain'], 'yml'])
+        files_to_generate.append(tt)
+        systemdesc = UnsortableDict()
+        systemdesc['name'] = res['name']
+        systemdesc['domain'] = res['domain']
+        # Are the two below ever used in production?:
+        systemdesc['cluster'] = res.get('cluster', 'default')
+        systemdesc['environment'] = res.get('environment', 'prd')
+        contents = {'classes': res['classes'],
+                    'parameters': {
+                      '_param': res['params'],
+                      'linux': {
+                         'system':  systemdesc
+        }}}
+        # NOTE: the original formula contains hints to other possible sections.
+        # Since it is not immediately clear whether those are used or just
+        # a preacution for future work thay are omited for now.
+        with open(os.path.join(dest, tt), 'w') as f:
+            yaml.dump(contents, f, default_flow_style=False)
+
+
+
+# Takes an internally parametrized node desciption and generates appropriate
+# number of nodes.
+# A node consists of a regular node definition fields and additional 'repeat'
+# field which is used to update/populate regular fields. Parammetrized fields
+# are apparently of two types: number and ip address, substitution is defined
+# by adding double angle brackets around a name. Name 'count' is apparently
+# hardcoded for various counter-based name parts, names for ip ranges are
+# somewhat dinamic.
+def do_count_expansion(node_description):
+
+    def get_ip(ranges):
+        af = lambda x: ipaddress.ip_address(unicode(x))
+        for sta, sto in [x.split('-') for x in ranges.split(',')]:
+            ista, isto = af(sta), af(sto)
+            while ista <= isto:
+                yield ista
+                ista += 1
+
+    def generate_counted_node(from_node, count_padding):
+        def update_count(string, value, padding):
+            return string.replace('<<count>>', str(value).zfill(padding))
+        out = {}
+        for k, v in from_node.iteritems():
+            if type(v) is str:
+                outv = update_count(v, count, count_padding)
+            elif type(v) is list:
+                outv = v
+            # Tricky place: a dictionary can contain ip range substitution
+            # TODO: refactor.
+            elif type(v) is dict:
+                outv = copy.deepcopy(v)
+                for ik, iv in v.iteritems():
+                    if type(iv) is dict:
+                        for iiv in iv.itervalues():  # apparently key is always 'value'
+                            ranges = re.findall(_iprange, iiv)
+                            if ranges:
+                                addr = ip_ranges[ranges[0]].next()
+                                outv[ik] = str(addr.exploded)
+            out[k] = outv
+        return out
+
+    result = []
+    local_node = copy.deepcopy(node_description)
+    # Process repeat field
+    repeat = local_node.pop('repeat')
+    local_node['params'].update(repeat['params'])
+    # Process ranges. Ranges look like the only repeatable entity, so
+    # it is not generalized yet. NOTE: check other models.
+    ip_ranges = repeat['ip_ranges'].iteritems()
+    ip_ranges = dict((ip_rn, get_ip(ip_r)) for ip_rn, ip_r in ip_ranges)
+
+    # Generate counted nodes.
+    for count in range(repeat['start'], repeat['start'] + repeat['count']):
+        result.append(generate_counted_node(local_node, repeat['digits']))
+    return result
+
+
+def extend_basic_nodes(basic_nodes, nodes_definition):
+    def needs_expansion(node):
+        return 'repeat' in node.keys()
+
+    with open(nodes_definition, 'r') as f:
+        nodes_init = yaml.load(f)['parameters']['reclass']['storage']['node']
+    basic_nodes_extended = {}
+    for nname, node in basic_nodes.iteritems():
+        node['classes'].extend(nodes_init.get(nname, {}).get('classes', []))
+        node['params'].update(nodes_init.get(nname, {}).get('params', {}))
+        basic_nodes_extended[nname] = node
+
+    extra_nodes = []
+    for node in nodes_init.values():
+        if needs_expansion(node):
+            extra_nodes.extend(do_count_expansion(node))
+
+    all_nodes = basic_nodes_extended.values()
+    all_nodes.extend(extra_nodes)
+    return all_nodes
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('-b', '--basedir', default='/srv/salt/reclass/classes',
+                        help="Cluster base directory")
+    parser.add_argument('cluster', help="Cluster name")
+    parser.add_argument('-o', '--outdir', default='_newgen',
+                        help="Directory to store generated nodes")
+    args = parser.parse_args()
+
+    cluster_name = os.path.join('cluster', args.cluster)
+    configs, nodes_definition = get_configs(args.basedir, cluster_name)
+    params = get_substitution_parameters(configs)
+    basic_nodes = get_basic_node_defenitions(configs, params, args.basedir)
+    all_nodes = extend_basic_nodes(basic_nodes, nodes_definition)
+    expanded_parameters = expand_params(params)
+    resulting_nodes = do_substitution(all_nodes, expanded_parameters)
+    dump_to_files(resulting_nodes, args.outdir)
+
+
+if __name__ == '__main__':
+    main()

diff --git a/setup.py b/setup.py
index 884be88..0e33e64 100644
--- a/setup.py
+++ b/setup.py

@@ -25,7 +25,7 @@
     long_description = f.read()
 
 ADAPTERS = ['salt', 'ansible']
-console_scripts = ['reclass = reclass.cli:main']
+console_scripts = ['reclass = reclass.cli:main', 'nodegenerator = reclass.nodegenerator:main']
 console_scripts.extend('reclass-{0} = reclass.adapters.{0}:cli'.format(i)
                        for i in ADAPTERS)
commit	7c70ce828a2b43879b06649ee5816d1ea0dee1cd	[log] [tgz]
author	Alexey Ovchinnikov <aovchinnikov@mirantis.com>	Wed Feb 06 11:18:03 2019 +0100
committer	Aleksey Zvyagintsev <azvyagintsev@mirantis.com>	Tue Mar 05 21:31:18 2019 +0000
tree	b74d0435a188a420eb38d26035cbcdae965c4718
parent	e768c1a89617304ef31a41a6ed36082a0018a9a1 [diff]