| Pavlo Shchelokovskyy | 4a8f1c1 | 2018-09-21 19:17:19 +0300 | [diff] [blame] | 1 | #!/usr/bin/env python | 
|  | 2 |  | 
|  | 3 | """ | 
|  | 4 | untangle | 
|  | 5 |  | 
|  | 6 | Converts xml to python objects. | 
|  | 7 |  | 
|  | 8 | The only method you need to call is parse() | 
|  | 9 |  | 
|  | 10 | Partially inspired by xml2obj | 
|  | 11 | (http://code.activestate.com/recipes/149368-xml2obj/) | 
|  | 12 |  | 
|  | 13 | Author: Christian Stefanescu (http://0chris.com) | 
|  | 14 | License: MIT License - http://www.opensource.org/licenses/mit-license.php | 
|  | 15 | """ | 
|  | 16 |  | 
|  | 17 | import os | 
|  | 18 | from xml.sax import make_parser, handler | 
|  | 19 | try: | 
|  | 20 | from StringIO import StringIO | 
|  | 21 | except ImportError: | 
|  | 22 | from io import StringIO | 
|  | 23 |  | 
|  | 24 | __version__ = '1.1.0' | 
|  | 25 |  | 
|  | 26 |  | 
|  | 27 | class Element(): | 
|  | 28 | """ | 
|  | 29 | Representation of an XML element. | 
|  | 30 | """ | 
|  | 31 | def __init__(self, name, attributes): | 
|  | 32 | self._name = name | 
|  | 33 | self._attributes = attributes | 
|  | 34 | self.children = [] | 
|  | 35 | self.is_root = False | 
|  | 36 | self.cdata = '' | 
|  | 37 |  | 
|  | 38 | def add_child(self, element): | 
|  | 39 | self.children.append(element) | 
|  | 40 |  | 
|  | 41 | def add_cdata(self, cdata): | 
|  | 42 | self.cdata = self.cdata + cdata | 
|  | 43 |  | 
|  | 44 | def get_attribute(self, key): | 
|  | 45 | return self._attributes.get(key) | 
|  | 46 |  | 
|  | 47 | def get_elements(self, name=None): | 
|  | 48 | if name: | 
|  | 49 | return [e for e in self.children if e._name == name] | 
|  | 50 | else: | 
|  | 51 | return self.children | 
|  | 52 |  | 
|  | 53 | def __getitem__(self, key): | 
|  | 54 | return self.get_attribute(key) | 
|  | 55 |  | 
|  | 56 | def __getattr__(self, key): | 
|  | 57 | matching_children = [x for x in self.children if x._name == key] | 
|  | 58 | if matching_children: | 
|  | 59 | if len(matching_children) == 1: | 
|  | 60 | self.__dict__[key] = matching_children[0] | 
|  | 61 | return matching_children[0] | 
|  | 62 | else: | 
|  | 63 | self.__dict__[key] = matching_children | 
|  | 64 | return matching_children | 
|  | 65 | else: | 
|  | 66 | raise IndexError('Unknown key <%s>' % key) | 
|  | 67 |  | 
|  | 68 | def __iter__(self): | 
|  | 69 | yield self | 
|  | 70 |  | 
|  | 71 | def __str__(self): | 
|  | 72 | return ( | 
|  | 73 | "Element <%s> with attributes %s and children %s" % | 
|  | 74 | (self._name, self._attributes, self.children) | 
|  | 75 | ) | 
|  | 76 |  | 
|  | 77 | def __repr__(self): | 
|  | 78 | return ( | 
|  | 79 | "Element(name = %s, attributes = %s, cdata = %s)" % | 
|  | 80 | (self._name, self._attributes, self.cdata) | 
|  | 81 | ) | 
|  | 82 |  | 
|  | 83 | def __nonzero__(self): | 
|  | 84 | return self.is_root or self._name is not None | 
|  | 85 |  | 
|  | 86 | def __eq__(self, val): | 
|  | 87 | return self.cdata == val | 
|  | 88 |  | 
|  | 89 | def __dir__(self): | 
|  | 90 | children_names = [x._name for x in self.children] | 
|  | 91 | return children_names | 
|  | 92 |  | 
|  | 93 |  | 
|  | 94 | class Handler(handler.ContentHandler): | 
|  | 95 | """ | 
|  | 96 | SAX handler which creates the Python object structure out of ``Element``s | 
|  | 97 | """ | 
|  | 98 | def __init__(self): | 
|  | 99 | self.root = Element(None, None) | 
|  | 100 | self.root.is_root = True | 
|  | 101 | self.elements = [] | 
|  | 102 |  | 
|  | 103 | def startElement(self, name, attributes): | 
|  | 104 | name = name.replace('-', '_') | 
|  | 105 | name = name.replace('.', '_') | 
|  | 106 | name = name.replace(':', '_') | 
|  | 107 | attrs = dict() | 
|  | 108 | for k, v in attributes.items(): | 
|  | 109 | attrs[k] = v | 
|  | 110 | element = Element(name, attrs) | 
|  | 111 | if len(self.elements) > 0: | 
|  | 112 | self.elements[-1].add_child(element) | 
|  | 113 | else: | 
|  | 114 | self.root.add_child(element) | 
|  | 115 | self.elements.append(element) | 
|  | 116 |  | 
|  | 117 | def endElement(self, name): | 
|  | 118 | self.elements.pop() | 
|  | 119 |  | 
|  | 120 | def characters(self, cdata): | 
|  | 121 | self.elements[-1].add_cdata(cdata) | 
|  | 122 |  | 
|  | 123 |  | 
|  | 124 | def parse(filename): | 
|  | 125 | """ | 
|  | 126 | Interprets the given string as a filename, URL or XML data string, | 
|  | 127 | parses it and returns a Python object which represents the given | 
|  | 128 | document. | 
|  | 129 |  | 
|  | 130 | Raises ``ValueError`` if the argument is None / empty string. | 
|  | 131 |  | 
|  | 132 | Raises ``xml.sax.SAXParseException`` if something goes wrong | 
|  | 133 | during parsing.s | 
|  | 134 | """ | 
|  | 135 | if filename is None or filename.strip() == '': | 
|  | 136 | raise ValueError('parse() takes a filename, URL or XML string') | 
|  | 137 | parser = make_parser() | 
|  | 138 | sax_handler = Handler() | 
|  | 139 | parser.setContentHandler(sax_handler) | 
|  | 140 | if os.path.exists(filename) or is_url(filename): | 
|  | 141 | parser.parse(filename) | 
|  | 142 | else: | 
|  | 143 | parser.parse(StringIO(filename)) | 
|  | 144 |  | 
|  | 145 | return sax_handler.root | 
|  | 146 |  | 
|  | 147 |  | 
|  | 148 | def is_url(string): | 
|  | 149 | return string.startswith('http://') or string.startswith('https://') | 
|  | 150 |  | 
|  | 151 | # vim: set expandtab ts=4 sw=4: |