blob: 6f0b22be55f5bc259534936bec461fd6fd0cc1eb [file] [log] [blame]
#!/usr/bin/env python
"""
untangle
Converts xml to python objects.
The only method you need to call is parse()
Partially inspired by xml2obj
(http://code.activestate.com/recipes/149368-xml2obj/)
Author: Christian Stefanescu (http://0chris.com)
License: MIT License - http://www.opensource.org/licenses/mit-license.php
"""
import os
from xml.sax import make_parser, handler
try:
from StringIO import StringIO
except ImportError:
from io import StringIO
__version__ = '1.1.0'
class Element():
"""
Representation of an XML element.
"""
def __init__(self, name, attributes):
self._name = name
self._attributes = attributes
self.children = []
self.is_root = False
self.cdata = ''
def add_child(self, element):
self.children.append(element)
def add_cdata(self, cdata):
self.cdata = self.cdata + cdata
def get_attribute(self, key):
return self._attributes.get(key)
def get_elements(self, name=None):
if name:
return [e for e in self.children if e._name == name]
else:
return self.children
def __getitem__(self, key):
return self.get_attribute(key)
def __getattr__(self, key):
matching_children = [x for x in self.children if x._name == key]
if matching_children:
if len(matching_children) == 1:
self.__dict__[key] = matching_children[0]
return matching_children[0]
else:
self.__dict__[key] = matching_children
return matching_children
else:
raise IndexError('Unknown key <%s>' % key)
def __iter__(self):
yield self
def __str__(self):
return (
"Element <%s> with attributes %s and children %s" %
(self._name, self._attributes, self.children)
)
def __repr__(self):
return (
"Element(name = %s, attributes = %s, cdata = %s)" %
(self._name, self._attributes, self.cdata)
)
def __nonzero__(self):
return self.is_root or self._name is not None
def __eq__(self, val):
return self.cdata == val
def __dir__(self):
children_names = [x._name for x in self.children]
return children_names
class Handler(handler.ContentHandler):
"""
SAX handler which creates the Python object structure out of ``Element``s
"""
def __init__(self):
self.root = Element(None, None)
self.root.is_root = True
self.elements = []
def startElement(self, name, attributes):
name = name.replace('-', '_')
name = name.replace('.', '_')
name = name.replace(':', '_')
attrs = dict()
for k, v in attributes.items():
attrs[k] = v
element = Element(name, attrs)
if len(self.elements) > 0:
self.elements[-1].add_child(element)
else:
self.root.add_child(element)
self.elements.append(element)
def endElement(self, name):
self.elements.pop()
def characters(self, cdata):
self.elements[-1].add_cdata(cdata)
def parse(filename):
"""
Interprets the given string as a filename, URL or XML data string,
parses it and returns a Python object which represents the given
document.
Raises ``ValueError`` if the argument is None / empty string.
Raises ``xml.sax.SAXParseException`` if something goes wrong
during parsing.s
"""
if filename is None or filename.strip() == '':
raise ValueError('parse() takes a filename, URL or XML string')
parser = make_parser()
sax_handler = Handler()
parser.setContentHandler(sax_handler)
if os.path.exists(filename) or is_url(filename):
parser.parse(filename)
else:
parser.parse(StringIO(filename))
return sax_handler.root
def is_url(string):
return string.startswith('http://') or string.startswith('https://')
# vim: set expandtab ts=4 sw=4: