Parse XML results to JSON files
after running XCCDF or OVAL scans, parses results.xml file to
results.json one in the format acceptable by our `worp` API.
vendored-in is `untangle` lib v1.1.0
(https://github.com/stchris/untangle, MIT License)
Change-Id: I87f106c4b8b678e1b125ffab832f80ee261a4781
Related-Issue: https://mirantis.jira.com/browse/PROD-23159
Related-Issue: https://mirantis.jira.com/browse/PROD-23160
diff --git a/_modules/oscap/commands.py b/_modules/oscap/commands.py
index 44c703d..6682ae3 100644
--- a/_modules/oscap/commands.py
+++ b/_modules/oscap/commands.py
@@ -2,6 +2,7 @@
import tempfile
import os
from oscap.utils import build_tailoring, normalize_id, run
+from oscap.utils import xccdf_xml_to_json, oval_xml_to_json
def oscap_has_sce():
(stdout, _, _) = run('oscap -V')
@@ -40,6 +41,9 @@
f.write(build_tailoring(pillar_data, tailoring_id))
stdout, stderr, rc = run(cmd, tempdir)
+ res_file = os.path.join(tempdir, 'results.xml')
+ if os.path.isfile(res_file):
+ xccdf_xml_to_json(res_file)
return stdout, stderr, rc, tempdir
def oval(benchmark):
@@ -48,4 +52,7 @@
cmd = cmd.format(benchmark)
stdout, stderr, rc = run(cmd, tempdir)
+ res_file = os.path.join(tempdir, 'results.xml')
+ if os.path.isfile(res_file):
+ oval_xml_to_json(res_file)
return stdout, stderr, rc, tempdir
diff --git a/_modules/oscap/untangle.py b/_modules/oscap/untangle.py
new file mode 100644
index 0000000..6f0b22b
--- /dev/null
+++ b/_modules/oscap/untangle.py
@@ -0,0 +1,151 @@
+#!/usr/bin/env python
+
+"""
+ untangle
+
+ Converts xml to python objects.
+
+ The only method you need to call is parse()
+
+ Partially inspired by xml2obj
+ (http://code.activestate.com/recipes/149368-xml2obj/)
+
+ Author: Christian Stefanescu (http://0chris.com)
+ License: MIT License - http://www.opensource.org/licenses/mit-license.php
+"""
+
+import os
+from xml.sax import make_parser, handler
+try:
+ from StringIO import StringIO
+except ImportError:
+ from io import StringIO
+
+__version__ = '1.1.0'
+
+
+class Element():
+ """
+ Representation of an XML element.
+ """
+ def __init__(self, name, attributes):
+ self._name = name
+ self._attributes = attributes
+ self.children = []
+ self.is_root = False
+ self.cdata = ''
+
+ def add_child(self, element):
+ self.children.append(element)
+
+ def add_cdata(self, cdata):
+ self.cdata = self.cdata + cdata
+
+ def get_attribute(self, key):
+ return self._attributes.get(key)
+
+ def get_elements(self, name=None):
+ if name:
+ return [e for e in self.children if e._name == name]
+ else:
+ return self.children
+
+ def __getitem__(self, key):
+ return self.get_attribute(key)
+
+ def __getattr__(self, key):
+ matching_children = [x for x in self.children if x._name == key]
+ if matching_children:
+ if len(matching_children) == 1:
+ self.__dict__[key] = matching_children[0]
+ return matching_children[0]
+ else:
+ self.__dict__[key] = matching_children
+ return matching_children
+ else:
+ raise IndexError('Unknown key <%s>' % key)
+
+ def __iter__(self):
+ yield self
+
+ def __str__(self):
+ return (
+ "Element <%s> with attributes %s and children %s" %
+ (self._name, self._attributes, self.children)
+ )
+
+ def __repr__(self):
+ return (
+ "Element(name = %s, attributes = %s, cdata = %s)" %
+ (self._name, self._attributes, self.cdata)
+ )
+
+ def __nonzero__(self):
+ return self.is_root or self._name is not None
+
+ def __eq__(self, val):
+ return self.cdata == val
+
+ def __dir__(self):
+ children_names = [x._name for x in self.children]
+ return children_names
+
+
+class Handler(handler.ContentHandler):
+ """
+ SAX handler which creates the Python object structure out of ``Element``s
+ """
+ def __init__(self):
+ self.root = Element(None, None)
+ self.root.is_root = True
+ self.elements = []
+
+ def startElement(self, name, attributes):
+ name = name.replace('-', '_')
+ name = name.replace('.', '_')
+ name = name.replace(':', '_')
+ attrs = dict()
+ for k, v in attributes.items():
+ attrs[k] = v
+ element = Element(name, attrs)
+ if len(self.elements) > 0:
+ self.elements[-1].add_child(element)
+ else:
+ self.root.add_child(element)
+ self.elements.append(element)
+
+ def endElement(self, name):
+ self.elements.pop()
+
+ def characters(self, cdata):
+ self.elements[-1].add_cdata(cdata)
+
+
+def parse(filename):
+ """
+ Interprets the given string as a filename, URL or XML data string,
+ parses it and returns a Python object which represents the given
+ document.
+
+ Raises ``ValueError`` if the argument is None / empty string.
+
+ Raises ``xml.sax.SAXParseException`` if something goes wrong
+ during parsing.s
+ """
+ if filename is None or filename.strip() == '':
+ raise ValueError('parse() takes a filename, URL or XML string')
+ parser = make_parser()
+ sax_handler = Handler()
+ parser.setContentHandler(sax_handler)
+ if os.path.exists(filename) or is_url(filename):
+ parser.parse(filename)
+ else:
+ parser.parse(StringIO(filename))
+
+ return sax_handler.root
+
+
+def is_url(string):
+ return string.startswith('http://') or string.startswith('https://')
+
+# vim: set expandtab ts=4 sw=4:
diff --git a/_modules/oscap/utils.py b/_modules/oscap/utils.py
index 164949d..f026fdc 100644
--- a/_modules/oscap/utils.py
+++ b/_modules/oscap/utils.py
@@ -1,11 +1,17 @@
+import collections
+import datetime
+import json
from lxml.etree import Element, SubElement, tostring
+import os.path
+import re
from subprocess import Popen, PIPE
import shlex
-import re
-import datetime
import salt.ext.six as six
+from oscap import untangle
+
+
def normalize_id(id,
xccdf_version='1.2',
typeof='profile',
@@ -16,6 +22,7 @@
return 'xccdf_org.{0}.content_{1}_{2}'.format(vendor, typeof, id)
return id
+
def build_tailoring(data, id):
xccdf_version = data.get('xccdf_version', '1.2')
ns = {None: 'http://checklists.nist.gov/xccdf/{}'.format(xccdf_version)}
@@ -26,12 +33,11 @@
tailoring.append(Element('benchmark', {'href': ext}))
now = datetime.datetime.now().isoformat()
- version = SubElement(tailoring, 'version', time=now).text = '1'
+ SubElement(tailoring, 'version', time=now).text = '1'
profile = SubElement(tailoring, 'Profile', id=pid, extends=ext)
- title = SubElement(profile, 'title').text = \
- 'Extends {}'.format(ext)
+ SubElement(profile, 'title').text = 'Extends {}'.format(ext)
for key, value in six.iteritems(data.get('values', {})):
idref = normalize_id(key, xccdf_version, typeof='value')
@@ -39,8 +45,122 @@
elem.text = str(value)
return tostring(tailoring, pretty_print=True)
+
def run(cmd, cwd=None):
# The Popen used here because the __salt__['cmd.run'] returns only stdout
proc = Popen(shlex.split(cmd), stdout=PIPE, stderr=PIPE, cwd=cwd)
(stdout, stderr) = proc.communicate()
return stdout, stderr, proc.returncode
+
+
+def _get_flatten_groups(document, groups=None):
+ groups = groups if groups else []
+ if hasattr(document, 'Group'):
+ for group in document.Group:
+ groups.append(group)
+ groups = _get_flatten_groups(group, groups)
+ return groups
+
+
+def _get_rules(groups):
+ rules = {}
+ for group in groups:
+ if hasattr(group, 'Rule'):
+ for rule in group.Rule:
+ rules[rule['id']] = {
+ 'title': rule.title.cdata,
+ 'severity': rule['severity'],
+ 'description': rule.description.cdata}
+ return rules
+
+
+def _parse_xccdf_doc(document):
+ groups = _get_flatten_groups(document.Benchmark)
+ rules = _get_rules(groups)
+
+ results = []
+ for result in document.Benchmark.TestResult.rule_result:
+ results.append({
+ 'rule': result['idref'],
+ 'result': result.result.cdata,
+ 'severity': result['severity'],
+ 'weight': result['weight'],
+ 'title': rules[result['idref']]['title'],
+ 'description': rules[result['idref']]['title']
+ })
+
+ return results
+
+
+def _sanitize_xccdf_xml(data):
+ data = data.replace(
+ '<html:code xmlns:html="http://www.w3.org/1999/xhtml">', '')
+ data = data.replace('</html:code>', '')
+ data = data.replace(
+ '<html:pre xmlns:html="http://www.w3.org/1999/xhtml">', '')
+ data = data.replace('<html:pre>', '')
+ data = data.replace('</html:pre>', '')
+ data = data.replace('<html:code>', '')
+ data = data.replace('<html:li>', '')
+ data = data.replace('</html:li>', '')
+ data = data.replace(
+ '<html:pre xmlns:html="http://www.w3.org/1999/xhtml" '
+ 'xmlns:ns0="http://checklists.nist.gov/xccdf/1.1">',
+ '')
+ data = data.replace(
+ '<html:br xmlns:html="http://www.w3.org/1999/xhtml"/>', '')
+ data = data.replace(
+ '<html:code xmlns:html="http://www.w3.org/1999/xhtml" '
+ 'xmlns:ns0="http://checklists.nist.gov/xccdf/1.1">',
+ '')
+ return data
+
+
+def xccdf_xml_to_json(xml_file):
+ with open(xml_file) as in_file:
+ raw_xml = in_file.read()
+ doc = untangle.parse(_sanitize_xccdf_xml(raw_xml))
+ results = _parse_xccdf_doc(doc)
+ with open(os.path.splitext(xml_file)[0] + '.json', 'w') as json_file:
+ # NOTE(pas-ha) the src/com/mirantis/mk.Common.parseJSON method
+ # from mk/pipeline-library that is used in our Jenkins pipelines
+ # can not parse the string representation of a list!
+ # only dict structure is supported as a top-level one there
+ json.dump({"results": results}, json_file)
+
+
+def _parse_oval_definitions(document):
+ definitions = {}
+ def_list = document.oval_results.oval_definitions.definitions.definition
+ for definition in def_list:
+ try:
+ definitions[definition['id']] = {'class': definition['class']}
+ def_dict = definitions[definition['id']]
+ def_dict['title'] = definition.metadata.title.cdata
+ def_dict['description'] = definition.metadata.description.cdata
+ def_dict['ref_id'] = definition.metadata.reference['ref_id']
+ def_dict['link'] = definition.metadata.reference['ref_url']
+ def_dict['severity'] = definition.metadata.advisory.severity.cdata
+ except AttributeError:
+ # NOTE(e0ne): inventory does't have definition.metadata.reference
+ pass
+
+ return definitions
+
+
+def oval_xml_to_json(xml_file):
+ document = untangle.parse(xml_file)
+ definitions = _parse_oval_definitions(document)
+ results = []
+ for defn in document.oval_results.results.system.definitions.definition:
+ res = collections.defaultdict(lambda: None)
+ res['id'] = defn['definition_id']
+ res['result'] = defn['result']
+ res.update(definitions[defn['definition_id']])
+ results.append(res)
+ with open(os.path.splitext(xml_file)[0] + '.json', 'w') as json_file:
+ # NOTE(pas-ha) the src/com/mirantis/mk.Common.parseJSON method
+ # from mk/pipeline-library that is used in our Jenkins pipelines
+ # can not parse the string representation of a list!
+ # only dict structure is supported as a top-level one there
+ json.dump({"results": results}, json_file)