Revamp storage classes, add node subdirs and caching
This is a mammoth commit, sorry about that. In trying to add node subdir
support, I ended up refactoring a lot of the logic from yaml_fs into the
storage base class, and then caching was added along the way, I almost
didn't notice. ;)
Signed-off-by: martin f. krafft <madduck@madduck.net>
diff --git a/doc/source/changelog.rst b/doc/source/changelog.rst
index 3553051..1bd522a 100644
--- a/doc/source/changelog.rst
+++ b/doc/source/changelog.rst
@@ -5,6 +5,10 @@
========= ========== ========================================================
Version Date Changes
========= ========== ========================================================
+ * Caching of classes for performance reasons, especially
+ during the inventory runs
+ * yaml_fs: nodes may be defined in subdirectories
+ (closes: #10).
* Classes and nodes URI must not overlap anymore
* Class names must not contain spaces
1.1 2013-08-28 Salt adapter: fix interface to include minion_id, filter
diff --git a/doc/source/operations.rst b/doc/source/operations.rst
index 049fed1..671b8b0 100644
--- a/doc/source/operations.rst
+++ b/doc/source/operations.rst
@@ -40,6 +40,12 @@
permit_root_login: no
============ ================================================================
+Nodes may be defined in subdirectories. However, node names (filename) must be
+unique across all subdirectories, and |reclass| will exit with an error if
+a node is defined multiple times. Subdirectories therefore really only exist
+for the administrator's sanity (and may be used in the future to tag
+additional classes onto nodes).
+
Data merging
------------
|reclass| has two modes of operation: node information retrieval and inventory
diff --git a/doc/source/todo.rst b/doc/source/todo.rst
index f1733c5..5aa12c7 100644
--- a/doc/source/todo.rst
+++ b/doc/source/todo.rst
@@ -74,9 +74,4 @@
a list of clients that define it as their master. That would short-circuit
Puppet's ``storeconfigs`` and Salt's ``mine``.
-Caching of classes in yaml\_fs
-------------------------------
-Right now, ``yaml\_fs`` opens each class file dozens of times during an
-inventory run. A class could be cached.
-
.. include:: substs.inc
diff --git a/reclass/errors.py b/reclass/errors.py
index 8f097c6..ddc09ad 100644
--- a/reclass/errors.py
+++ b/reclass/errors.py
@@ -140,3 +140,12 @@
msg = "Invalid character '{0}' in class name '{1}'."
msg = msg.format(invalid_character, classname)
super(InvalidClassnameError, self).__init__(msg)
+
+
+class DuplicateNodeNameError(NameError):
+
+ def __init__(self, storage, name, uri1, uri2):
+ msg = "{0}: Definition of node '{1}' in '{2}' collides with " \
+ "definition in '{3}'. Nodes can only be defined once per inventory."
+ msg = msg.format(storage, name, uri2, uri1)
+ super(DuplicateNodeNameError, self).__init__(msg)
diff --git a/reclass/storage/__init__.py b/reclass/storage/__init__.py
index b13826b..8bb64e4 100644
--- a/reclass/storage/__init__.py
+++ b/reclass/storage/__init__.py
@@ -8,6 +8,7 @@
#
import time, sys
+from reclass.datatypes import Entity
def _get_timestamp():
return time.strftime('%c')
@@ -21,31 +22,87 @@
def __init__(self, nodes_uri, classes_uri):
self._nodes_uri = nodes_uri
self._classes_uri = classes_uri
+ self._classes_cache = {}
nodes_uri = property(lambda self: self._nodes_uri)
classes_uri = property(lambda self: self._classes_uri)
- def _read_entity(self, node, base_uri, seen={}):
- raise NotImplementedError, "Storage class not implement node info retrieval"
+ def _get_storage_name(self):
+ raise NotImplementedError, "Storage class does not have a name"
- def nodeinfo(self, node):
- entity, uri = self._read_entity(node, self.nodes_uri, {})
- entity.interpolate()
- return {'__reclass__' : {'node': node, 'node_uri': uri,
- 'timestamp': _get_timestamp()
+ def _get_node(self, name, merge_base=None):
+ raise NotImplementedError, "Storage class not implement node entity retrieval"
+
+ def _get_class(self, name):
+ raise NotImplementedError, "Storage class not implement class entity retrieval"
+
+ def _recurse_entity(self, entity, merge_base=None, seen={}, nodename=None):
+ if merge_base is None:
+ merge_base = Entity(name='empty (@{0})'.format(nodename))
+
+ for klass in entity.classes.as_list():
+ if klass not in seen:
+ try:
+ class_entity = self._classes_cache[klass]
+ except KeyError, e:
+ class_entity, uri = self._get_class(klass)
+ self._classes_cache[klass] = class_entity
+
+ descent = self._recurse_entity(class_entity, seen=seen,
+ nodename=nodename)
+ # on every iteration, we merge the result of the recursive
+ # descent into what we have so far…
+ merge_base.merge(descent)
+ seen[klass] = True
+
+ # … and finally, we merge what we have at this level into the
+ # result of the iteration, so that elements at the current level
+ # overwrite stuff defined by parents
+ merge_base.merge(entity)
+ return merge_base
+
+ def _nodeinfo(self, nodename):
+ node_entity, uri = self._get_node(nodename)
+ merge_base = Entity(name='merge base for {0}'.format(nodename))
+ ret = self._recurse_entity(node_entity, merge_base, nodename=nodename)
+ ret.interpolate()
+ return ret, uri
+
+ def _nodeinfo_as_dict(self, nodename, entity, uri):
+ ret = {'__reclass__' : {'node': nodename, 'uri': uri,
+ 'timestamp': _get_timestamp()
},
- 'classes': entity.classes.as_list(),
- 'applications': entity.applications.as_list(),
- 'parameters': entity.parameters.as_dict()
- }
+ }
+ ret.update(entity.as_dict())
+ return ret
+
+ def nodeinfo(self, nodename):
+ return self._nodeinfo_as_dict(nodename, *self._nodeinfo(nodename))
def _list_inventory(self):
raise NotImplementedError, "Storage class does not implement inventory listing"
def inventory(self):
- entities, applications, classes = self._list_inventory()
+ entities = self._list_inventory()
+
+ nodes = {}
+ applications = {}
+ classes = {}
+ for f, (nodeinfo, uri) in entities.iteritems():
+ d = nodes[f] = self._nodeinfo_as_dict(f, nodeinfo, uri)
+ for a in d['applications']:
+ if a in applications:
+ applications[a].append(f)
+ else:
+ applications[a] = [f]
+ for c in d['classes']:
+ if c in classes:
+ classes[c].append(f)
+ else:
+ classes[c] = [f]
+
return {'__reclass__' : {'timestamp': _get_timestamp()},
- 'nodes': entities,
+ 'nodes': nodes,
'classes': classes,
'applications': applications
}
diff --git a/reclass/storage/yaml_fs/__init__.py b/reclass/storage/yaml_fs/__init__.py
index d6f3cbd..798847e 100644
--- a/reclass/storage/yaml_fs/__init__.py
+++ b/reclass/storage/yaml_fs/__init__.py
@@ -7,6 +7,7 @@
# Released under the terms of the Artistic Licence 2.0
#
import os, sys
+import fnmatch
from reclass.storage import NodeStorageBase
from yamlfile import YamlFile
from directory import Directory
@@ -24,65 +25,55 @@
def __init__(self, nodes_uri, classes_uri):
super(ExternalNodeStorage, self).__init__(nodes_uri, classes_uri)
- def _handle_read_error(self, exc, name, base_uri, nodename):
- if base_uri == self.classes_uri:
- raise reclass.errors.ClassNotFound('yaml_fs', name, base_uri, nodename)
- else:
- raise reclass.errors.NodeNotFound('yaml_fs', name, base_uri)
+ def _handle_node_duplicates(name, uri1, uri2):
+ raise reclass.errors.DuplicateNodeNameError(self._get_storage_name(),
+ name, uri1, uri2)
+ self._nodes = self._enumerate_inventory(nodes_uri,
+ duplicate_handler=_handle_node_duplicates)
+ self._classes = self._enumerate_inventory(classes_uri)
- def _read_entity(self, name, base_uri, seen, nodename=None):
- path = os.path.join(base_uri, name + FILE_EXTENSION)
+ def _get_storage_name(self):
+ return 'yaml_fs'
+
+ def _enumerate_inventory(self, basedir, duplicate_handler=None):
+ ret = {}
+ def register_fn(dirpath, filenames):
+ filenames = fnmatch.filter(filenames, '*{0}'.format(FILE_EXTENSION))
+ vvv('REGISTER {0} in path {1}'.format(filenames, dirpath))
+ for f in filenames:
+ name = os.path.splitext(f)[0]
+ uri = os.path.join(dirpath, f)
+ if name in ret and callable(duplicate_handler):
+ duplicate_handler(name, os.path.join(basedir, ret[name]), uri)
+ ret[name] = os.path.relpath(uri, basedir)
+
+ d = Directory(basedir)
+ d.walk(register_fn)
+ return ret
+
+ def _get_node(self, name):
+ vvv('GET NODE {0}'.format(name))
try:
- entity = YamlFile(path).entity
- seen[name] = True
+ path = os.path.join(self.nodes_uri, self._nodes[name])
+ except KeyError, e:
+ raise reclass.errors.NodeNotFound(self._get_storage_name(),
+ name, self.nodes_uri)
+ entity = YamlFile(path).entity
+ return entity, 'file://{0}'.format(path)
- merge_base = Entity()
- for klass in entity.classes.as_list():
- if klass not in seen:
- ret = self._read_entity(klass, self.classes_uri, seen,
- name if nodename is None else nodename)[0]
- # on every iteration, we merge the result of the
- # recursive descend into what we have so far…
- merge_base.merge(ret)
-
- # … and finally, we merge what we have at this level into the
- # result of the iteration, so that elements at the current level
- # overwrite stuff defined by parents
- merge_base.merge(entity)
- return merge_base, 'file://{0}'.format(path)
-
- except reclass.errors.NotFoundError, e:
- self._handle_read_error(e, name, base_uri, nodename)
-
- except IOError, e:
- self._handle_read_error(e, name, base_uri, nodename)
+ def _get_class(self, name, nodename=None):
+ vvv('GET CLASS {0}'.format(name))
+ try:
+ path = os.path.join(self.classes_uri, self._classes[name])
+ except KeyError, e:
+ raise reclass.errors.ClassNotFound(self._get_storage_name(),
+ name, self.classes_uri,
+ nodename)
+ entity = YamlFile(path).entity
+ return entity, 'file://{0}'.format(path)
def _list_inventory(self):
- d = Directory(self.nodes_uri)
-
entities = {}
-
- def register_fn(dirpath, filenames):
- vvv('REGISTER {0} in path {1}'.format(filenames, dirpath))
- for f in filter(lambda f: f.endswith(FILE_EXTENSION), filenames):
- name = f[:-len(FILE_EXTENSION)]
- nodeinfo = self.nodeinfo(name)
- entities[name] = nodeinfo
-
- d.walk(register_fn)
-
- applications = {}
- classes = {}
- for f, nodeinfo in entities.iteritems():
- for a in nodeinfo['applications']:
- if a in applications:
- applications[a].append(f)
- else:
- applications[a] = [f]
- for c in nodeinfo['classes']:
- if c in classes:
- classes[c].append(f)
- else:
- classes[c] = [f]
-
- return entities, applications, classes
+ for n in self._nodes.iterkeys():
+ entities[n] = self._nodeinfo(n)
+ return entities