Pavlo Shchelokovskyy | 4a8f1c1 | 2018-09-21 19:17:19 +0300 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | |
| 3 | """ |
| 4 | untangle |
| 5 | |
| 6 | Converts xml to python objects. |
| 7 | |
| 8 | The only method you need to call is parse() |
| 9 | |
| 10 | Partially inspired by xml2obj |
| 11 | (http://code.activestate.com/recipes/149368-xml2obj/) |
| 12 | |
| 13 | Author: Christian Stefanescu (http://0chris.com) |
| 14 | License: MIT License - http://www.opensource.org/licenses/mit-license.php |
| 15 | """ |
| 16 | |
| 17 | import os |
| 18 | from xml.sax import make_parser, handler |
| 19 | try: |
| 20 | from StringIO import StringIO |
| 21 | except ImportError: |
| 22 | from io import StringIO |
| 23 | |
| 24 | __version__ = '1.1.0' |
| 25 | |
| 26 | |
| 27 | class Element(): |
| 28 | """ |
| 29 | Representation of an XML element. |
| 30 | """ |
| 31 | def __init__(self, name, attributes): |
| 32 | self._name = name |
| 33 | self._attributes = attributes |
| 34 | self.children = [] |
| 35 | self.is_root = False |
| 36 | self.cdata = '' |
| 37 | |
| 38 | def add_child(self, element): |
| 39 | self.children.append(element) |
| 40 | |
| 41 | def add_cdata(self, cdata): |
| 42 | self.cdata = self.cdata + cdata |
| 43 | |
| 44 | def get_attribute(self, key): |
| 45 | return self._attributes.get(key) |
| 46 | |
| 47 | def get_elements(self, name=None): |
| 48 | if name: |
| 49 | return [e for e in self.children if e._name == name] |
| 50 | else: |
| 51 | return self.children |
| 52 | |
| 53 | def __getitem__(self, key): |
| 54 | return self.get_attribute(key) |
| 55 | |
| 56 | def __getattr__(self, key): |
| 57 | matching_children = [x for x in self.children if x._name == key] |
| 58 | if matching_children: |
| 59 | if len(matching_children) == 1: |
| 60 | self.__dict__[key] = matching_children[0] |
| 61 | return matching_children[0] |
| 62 | else: |
| 63 | self.__dict__[key] = matching_children |
| 64 | return matching_children |
| 65 | else: |
| 66 | raise IndexError('Unknown key <%s>' % key) |
| 67 | |
| 68 | def __iter__(self): |
| 69 | yield self |
| 70 | |
| 71 | def __str__(self): |
| 72 | return ( |
| 73 | "Element <%s> with attributes %s and children %s" % |
| 74 | (self._name, self._attributes, self.children) |
| 75 | ) |
| 76 | |
| 77 | def __repr__(self): |
| 78 | return ( |
| 79 | "Element(name = %s, attributes = %s, cdata = %s)" % |
| 80 | (self._name, self._attributes, self.cdata) |
| 81 | ) |
| 82 | |
| 83 | def __nonzero__(self): |
| 84 | return self.is_root or self._name is not None |
| 85 | |
| 86 | def __eq__(self, val): |
| 87 | return self.cdata == val |
| 88 | |
| 89 | def __dir__(self): |
| 90 | children_names = [x._name for x in self.children] |
| 91 | return children_names |
| 92 | |
| 93 | |
| 94 | class Handler(handler.ContentHandler): |
| 95 | """ |
| 96 | SAX handler which creates the Python object structure out of ``Element``s |
| 97 | """ |
| 98 | def __init__(self): |
| 99 | self.root = Element(None, None) |
| 100 | self.root.is_root = True |
| 101 | self.elements = [] |
| 102 | |
| 103 | def startElement(self, name, attributes): |
| 104 | name = name.replace('-', '_') |
| 105 | name = name.replace('.', '_') |
| 106 | name = name.replace(':', '_') |
| 107 | attrs = dict() |
| 108 | for k, v in attributes.items(): |
| 109 | attrs[k] = v |
| 110 | element = Element(name, attrs) |
| 111 | if len(self.elements) > 0: |
| 112 | self.elements[-1].add_child(element) |
| 113 | else: |
| 114 | self.root.add_child(element) |
| 115 | self.elements.append(element) |
| 116 | |
| 117 | def endElement(self, name): |
| 118 | self.elements.pop() |
| 119 | |
| 120 | def characters(self, cdata): |
| 121 | self.elements[-1].add_cdata(cdata) |
| 122 | |
| 123 | |
| 124 | def parse(filename): |
| 125 | """ |
| 126 | Interprets the given string as a filename, URL or XML data string, |
| 127 | parses it and returns a Python object which represents the given |
| 128 | document. |
| 129 | |
| 130 | Raises ``ValueError`` if the argument is None / empty string. |
| 131 | |
| 132 | Raises ``xml.sax.SAXParseException`` if something goes wrong |
| 133 | during parsing.s |
| 134 | """ |
| 135 | if filename is None or filename.strip() == '': |
| 136 | raise ValueError('parse() takes a filename, URL or XML string') |
| 137 | parser = make_parser() |
| 138 | sax_handler = Handler() |
| 139 | parser.setContentHandler(sax_handler) |
| 140 | if os.path.exists(filename) or is_url(filename): |
| 141 | parser.parse(filename) |
| 142 | else: |
| 143 | parser.parse(StringIO(filename)) |
| 144 | |
| 145 | return sax_handler.root |
| 146 | |
| 147 | |
| 148 | def is_url(string): |
| 149 | return string.startswith('http://') or string.startswith('https://') |
| 150 | |
| 151 | # vim: set expandtab ts=4 sw=4: |