Several fixes to nodegenerator

Nodegenerator is fixed to correctly process more complex
clusters.

PROD-26064

Change-Id: I1972bf4738b8f8151c1888e613e33e5f8acfb773
diff --git a/reclass/nodegenerator.py b/reclass/nodegenerator.py
index e5d4153..b161a33 100644
--- a/reclass/nodegenerator.py
+++ b/reclass/nodegenerator.py
@@ -1,16 +1,14 @@
 import argparse
-import sys
+import collections
+import copy
+import functools
+import ipaddress
 import os
-import yaml
 from pprint import pprint as pp
 import re
-from collections import Iterable
-from functools import partial
-from itertools import chain
-import copy
-import ipaddress
+import sys
 import yaml
-import collections
+
 
 ##
 # A hack needed to save order of fields in output. The only reason it is here
@@ -51,16 +49,16 @@
     # it is unclear if anyone actually uses them.
     if type(value) is str:
         return len(get_references(value)) > 0
-    elif isinstance(value, Iterable):
+    elif isinstance(value, collections.Iterable):
         return any(has_subst(x) for x in value)
-    elif type(value) is dict:
+    elif isinstance(value, dict):
         return any(has_subst(k) or has_subst(v) for k, v in value.iteritems())
     return False
 
 
 def do_substitution(entity, src, subst_stub=None):
     """Attempts to substitute references in entity from src."""
-    subster = partial(do_substitution, src=src)
+    subster = functools.partial(do_substitution, src=src)
     if type(entity) is str:
         lookfor = zip(get_ref_names(entity), get_references(entity))
         result = entity
@@ -70,7 +68,7 @@
         return result
     elif type(entity) is list:
         return [subster(x) for x in entity]
-    elif type(entity) is dict:
+    elif isinstance(entity, dict):
         return dict((subster(k), subster(v)) for k, v in entity.iteritems())
     return entity
 
@@ -88,17 +86,32 @@
 
 
 # TODO: remove side effects.
-def update_dict(x, y):
-    def is_substitute(foo):
-        return type(foo) is str and foo.startswith('${_param:')
+def update_dict(x, y, block_override=False):
     for key, value in y.iteritems():
         if key in x and x[key] != value:
-            if not is_substitute(x[key]) and is_substitute(value):
+            if (type(x[key]) == type(value) and type(value) == str
+                    and block_override):
                 continue
-            if is_substitute(x[key]) and is_substitute(value):
+            # NOTE: the substitution rules should be moved.
+            # Don't replace specific value with a reference.
+            if not has_subst(x[key]) and has_subst(value):
                 continue
-            # TODO: logging.
-            print "WARNING! overwriting key", key, ':', x[key], ' -> ', value
+            # Do not replace a dict with a scalar value.
+            if isinstance(x[key], dict) and not isinstance(value, dict):
+                continue
+            # Do not touch lists: it happens in other place.
+            if type(x[key]) == list:
+            # TODO: this is a crude cludge to patch missing class problem.  It
+            # will be used till more statistics is collected and a generic
+            # solution is created.
+                if key == 'classes':
+                    for el in value:
+                        if 'gateway' in el:
+                            x[key].extend(value)
+                continue
+            if isinstance(x[key], dict) and isinstance(value, dict):
+                update_dict(x[key], value)
+                continue
         x[key] = value
 
 
@@ -111,59 +124,76 @@
 
 
 def get_configs(base, cluster):
-    configs_to_process = []
-    out = []
+    configs_to_process, out = [], []
     cluster_def = os.path.join(base, cluster)
     for rootdir, _, files in os.walk(cluster_def):
-        for fname in files:
+        for fname in [x for x in files if x.endswith('yml')]:
+            config = os.path.join(rootdir, fname)
             if fname == 'init.yml':
-                configs_to_process.append(os.path.join(rootdir, fname))
-            elif fname == 'nodes.yml':  # TODO: refactor lumping.
-                nodes_definition = os.path.join(rootdir, fname)
+                configs_to_process.append(config)
+            # NOTE: this is a special case left here for the time being.
+            elif fname == 'nodes.yml':  # TODO: refactor it.
+                nodes_definition = config
+            else:
+                with open(config, 'r') as f:
+                    data = yaml.load(f)
+                    if data is None:
+                        continue
+                    if data.get('parameters', {}).get('_param') is not None:
+                        configs_to_process.append(config)
     for config in configs_to_process:
         with open(config, 'r') as f:
-            out.append(yaml.load(f))
+            data = yaml.load(f)
+            data['src'] = [config]
+            out.append(data)
     return out, nodes_definition
 
 
 def get_basic_node_defenitions(cfgs, params, basename):
     def fixname(x):
-        return os.path.join(basename, x.replace('.', '/') + '.yml')
-    out = {}
-    storage_classnames = []
+        fn = os.path.join(basename, x.replace('.', '/') + '.yml')
+        return fn if os.path.isfile(fn) else os.path.join(fn[:-4], 'init.yml')
+
+    out, storage_classnames, cluster_cn = collections.defaultdict(dict), [], []
     for cfg in cfgs:
         storage_classnames.extend(x for x in cfg.get('classes', [])
-                                  if x.find('storage.system') != -1)
-    all_nodes_names = []
-    for x in [fixname(x) for x in storage_classnames]:
+                                  if ((x.find('storage.system') != -1)
+                                  or x.find('cluster.') != -1))
+    for x in map(fixname, storage_classnames):
         with open(x, 'r') as f:
             data = yaml.load(f)
-        update_dict(params, data['parameters']['_param'])
-        node_content = data['parameters']['reclass']['storage']['node']
+        if params is None:
+            params = {}
+        if data  is None:
+            data = {}
+        update_dict(params, data.get('parameters', {}).get('_param', {}), True)
+        node_content = (data.get('parameters', {}).get('reclass', {})
+                            .get('storage', {}).get('node', {}))
+
         for nodename, nodecontent in node_content.iteritems():
-            if nodename not in all_nodes_names:
-                all_nodes_names.append(nodename)
-                out[nodename] = nodecontent
+            if out[nodename].get('src') is not None:
+                out[nodename]['src'].append(x)
             else:
-                print "Node duplicate detected:", node
-                sys.exit(1)
+                out[nodename]['src'] = [x]
+            update_dict(out[nodename], nodecontent, False)
     return out
 
 
 def dump_to_files(content, dest):
-    files_to_generate = []
     for res in content:
-        tt = ".".join([res['name'], res['domain'], 'yml'])
-        files_to_generate.append(tt)
+        tt = ".".join([res.get('name', 'NOTADEFINITION'),
+                       res.get('domain','NOTADEFINITION'), 'yml'])
+        if tt.find('NOTADEFINITION') !=-1:
+            continue
         systemdesc = UnsortableDict()
-        systemdesc['name'] = res['name']
-        systemdesc['domain'] = res['domain']
+        systemdesc['name'] = res.get('name', 'FOO')
+        systemdesc['domain'] = res.get('domain', 'BAR')
         # Are the two below ever used in production?:
         systemdesc['cluster'] = res.get('cluster', 'default')
         systemdesc['environment'] = res.get('environment', 'prd')
         contents = {'classes': res['classes'],
                     'parameters': {
-                      '_param': res['params'],
+                      '_param': res.get('params', {}),
                       'linux': {
                          'system':  systemdesc
         }}}
@@ -184,7 +214,6 @@
 # hardcoded for various counter-based name parts, names for ip ranges are
 # somewhat dinamic.
 def do_count_expansion(node_description):
-
     def get_ip(ranges):
         af = lambda x: ipaddress.ip_address(unicode(x))
         for sta, sto in [x.split('-') for x in ranges.split(',')]:
@@ -208,7 +237,8 @@
                 outv = copy.deepcopy(v)
                 for ik, iv in v.iteritems():
                     if type(iv) is dict:
-                        for iiv in iv.itervalues():  # apparently key is always 'value'
+                        # apparently key is always 'value'
+                        for iiv in iv.itervalues():
                             ranges = re.findall(_iprange, iiv)
                             if ranges:
                                 addr = ip_ranges[ranges[0]].next()
@@ -216,8 +246,7 @@
             out[k] = outv
         return out
 
-    result = []
-    local_node = copy.deepcopy(node_description)
+    local_node, result = copy.deepcopy(node_description), []
     # Process repeat field
     repeat = local_node.pop('repeat')
     local_node['params'].update(repeat['params'])
@@ -225,7 +254,6 @@
     # it is not generalized yet. NOTE: check other models.
     ip_ranges = repeat['ip_ranges'].iteritems()
     ip_ranges = dict((ip_rn, get_ip(ip_r)) for ip_rn, ip_r in ip_ranges)
-
     # Generate counted nodes.
     for count in range(repeat['start'], repeat['start'] + repeat['count']):
         result.append(generate_counted_node(local_node, repeat['digits']))
@@ -233,22 +261,28 @@
 
 
 def extend_basic_nodes(basic_nodes, nodes_definition):
+    # Here nodes defined in nodes.yml are transformed into actual definitions.
     def needs_expansion(node):
         return 'repeat' in node.keys()
 
     with open(nodes_definition, 'r') as f:
         nodes_init = yaml.load(f)['parameters']['reclass']['storage']['node']
-    basic_nodes_extended = {}
+    basic_nodes = copy.deepcopy(basic_nodes)  # preserve state.
+    basic_nodes_extended, extra_nodes = {}, []
+
     for nname, node in basic_nodes.iteritems():
-        node['classes'].extend(nodes_init.get(nname, {}).get('classes', []))
-        node['params'].update(nodes_init.get(nname, {}).get('params', {}))
+        if needs_expansion(node):
+            continue
+        node.get('classes', []).extend(nodes_init.get(nname, {}).
+                 get('classes', []))
+        # OK, this crap could be dangerous
+        node.get('params', {}).update(nodes_init.get(nname, {}).
+                 get('params', {}))
         basic_nodes_extended[nname] = node
 
-    extra_nodes = []
     for node in nodes_init.values():
         if needs_expansion(node):
             extra_nodes.extend(do_count_expansion(node))
-
     all_nodes = basic_nodes_extended.values()
     all_nodes.extend(extra_nodes)
     return all_nodes