blob: 6f0b22be55f5bc259534936bec461fd6fd0cc1eb [file] [log] [blame]
Pavlo Shchelokovskyy4a8f1c12018-09-21 19:17:19 +03001#!/usr/bin/env python
2
3"""
4 untangle
5
6 Converts xml to python objects.
7
8 The only method you need to call is parse()
9
10 Partially inspired by xml2obj
11 (http://code.activestate.com/recipes/149368-xml2obj/)
12
13 Author: Christian Stefanescu (http://0chris.com)
14 License: MIT License - http://www.opensource.org/licenses/mit-license.php
15"""
16
17import os
18from xml.sax import make_parser, handler
19try:
20 from StringIO import StringIO
21except ImportError:
22 from io import StringIO
23
24__version__ = '1.1.0'
25
26
27class Element():
28 """
29 Representation of an XML element.
30 """
31 def __init__(self, name, attributes):
32 self._name = name
33 self._attributes = attributes
34 self.children = []
35 self.is_root = False
36 self.cdata = ''
37
38 def add_child(self, element):
39 self.children.append(element)
40
41 def add_cdata(self, cdata):
42 self.cdata = self.cdata + cdata
43
44 def get_attribute(self, key):
45 return self._attributes.get(key)
46
47 def get_elements(self, name=None):
48 if name:
49 return [e for e in self.children if e._name == name]
50 else:
51 return self.children
52
53 def __getitem__(self, key):
54 return self.get_attribute(key)
55
56 def __getattr__(self, key):
57 matching_children = [x for x in self.children if x._name == key]
58 if matching_children:
59 if len(matching_children) == 1:
60 self.__dict__[key] = matching_children[0]
61 return matching_children[0]
62 else:
63 self.__dict__[key] = matching_children
64 return matching_children
65 else:
66 raise IndexError('Unknown key <%s>' % key)
67
68 def __iter__(self):
69 yield self
70
71 def __str__(self):
72 return (
73 "Element <%s> with attributes %s and children %s" %
74 (self._name, self._attributes, self.children)
75 )
76
77 def __repr__(self):
78 return (
79 "Element(name = %s, attributes = %s, cdata = %s)" %
80 (self._name, self._attributes, self.cdata)
81 )
82
83 def __nonzero__(self):
84 return self.is_root or self._name is not None
85
86 def __eq__(self, val):
87 return self.cdata == val
88
89 def __dir__(self):
90 children_names = [x._name for x in self.children]
91 return children_names
92
93
94class Handler(handler.ContentHandler):
95 """
96 SAX handler which creates the Python object structure out of ``Element``s
97 """
98 def __init__(self):
99 self.root = Element(None, None)
100 self.root.is_root = True
101 self.elements = []
102
103 def startElement(self, name, attributes):
104 name = name.replace('-', '_')
105 name = name.replace('.', '_')
106 name = name.replace(':', '_')
107 attrs = dict()
108 for k, v in attributes.items():
109 attrs[k] = v
110 element = Element(name, attrs)
111 if len(self.elements) > 0:
112 self.elements[-1].add_child(element)
113 else:
114 self.root.add_child(element)
115 self.elements.append(element)
116
117 def endElement(self, name):
118 self.elements.pop()
119
120 def characters(self, cdata):
121 self.elements[-1].add_cdata(cdata)
122
123
124def parse(filename):
125 """
126 Interprets the given string as a filename, URL or XML data string,
127 parses it and returns a Python object which represents the given
128 document.
129
130 Raises ``ValueError`` if the argument is None / empty string.
131
132 Raises ``xml.sax.SAXParseException`` if something goes wrong
133 during parsing.s
134 """
135 if filename is None or filename.strip() == '':
136 raise ValueError('parse() takes a filename, URL or XML string')
137 parser = make_parser()
138 sax_handler = Handler()
139 parser.setContentHandler(sax_handler)
140 if os.path.exists(filename) or is_url(filename):
141 parser.parse(filename)
142 else:
143 parser.parse(StringIO(filename))
144
145 return sax_handler.root
146
147
148def is_url(string):
149 return string.startswith('http://') or string.startswith('https://')
150
151# vim: set expandtab ts=4 sw=4: