blob: 605cc029e94a33508b0806609670468f1f50fcbc [file] [log] [blame]
Sean Dague2bbdf422014-07-11 07:58:33 -04001# Copyright 2011 OpenStack Foundation.
2# All Rights Reserved.
3#
4# Licensed under the Apache License, Version 2.0 (the "License"); you may
5# not use this file except in compliance with the License. You may obtain
6# a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13# License for the specific language governing permissions and limitations
14# under the License.
15
16"""
17System-level utilities and helper functions.
18"""
19
20import math
21import re
22import sys
23import unicodedata
24
25import six
26
27from tempest.openstack.common.gettextutils import _
28
29
30UNIT_PREFIX_EXPONENT = {
31 'k': 1,
32 'K': 1,
33 'Ki': 1,
34 'M': 2,
35 'Mi': 2,
36 'G': 3,
37 'Gi': 3,
38 'T': 4,
39 'Ti': 4,
40}
41UNIT_SYSTEM_INFO = {
42 'IEC': (1024, re.compile(r'(^[-+]?\d*\.?\d+)([KMGT]i?)?(b|bit|B)$')),
43 'SI': (1000, re.compile(r'(^[-+]?\d*\.?\d+)([kMGT])?(b|bit|B)$')),
44}
45
46TRUE_STRINGS = ('1', 't', 'true', 'on', 'y', 'yes')
47FALSE_STRINGS = ('0', 'f', 'false', 'off', 'n', 'no')
48
49SLUGIFY_STRIP_RE = re.compile(r"[^\w\s-]")
50SLUGIFY_HYPHENATE_RE = re.compile(r"[-\s]+")
51
52
53# NOTE(flaper87): The following 3 globals are used by `mask_password`
54_SANITIZE_KEYS = ['adminPass', 'admin_pass', 'password', 'admin_password']
55
56# NOTE(ldbragst): Let's build a list of regex objects using the list of
57# _SANITIZE_KEYS we already have. This way, we only have to add the new key
58# to the list of _SANITIZE_KEYS and we can generate regular expressions
59# for XML and JSON automatically.
60_SANITIZE_PATTERNS = []
61_FORMAT_PATTERNS = [r'(%(key)s\s*[=]\s*[\"\']).*?([\"\'])',
62 r'(<%(key)s>).*?(</%(key)s>)',
63 r'([\"\']%(key)s[\"\']\s*:\s*[\"\']).*?([\"\'])',
64 r'([\'"].*?%(key)s[\'"]\s*:\s*u?[\'"]).*?([\'"])',
65 r'([\'"].*?%(key)s[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?[\'"])'
66 '.*?([\'"])',
67 r'(%(key)s\s*--?[A-z]+\s*)\S+(\s*)']
68
69for key in _SANITIZE_KEYS:
70 for pattern in _FORMAT_PATTERNS:
71 reg_ex = re.compile(pattern % {'key': key}, re.DOTALL)
72 _SANITIZE_PATTERNS.append(reg_ex)
73
74
75def int_from_bool_as_string(subject):
76 """Interpret a string as a boolean and return either 1 or 0.
77
78 Any string value in:
79
80 ('True', 'true', 'On', 'on', '1')
81
82 is interpreted as a boolean True.
83
84 Useful for JSON-decoded stuff and config file parsing
85 """
86 return bool_from_string(subject) and 1 or 0
87
88
89def bool_from_string(subject, strict=False, default=False):
90 """Interpret a string as a boolean.
91
92 A case-insensitive match is performed such that strings matching 't',
93 'true', 'on', 'y', 'yes', or '1' are considered True and, when
94 `strict=False`, anything else returns the value specified by 'default'.
95
96 Useful for JSON-decoded stuff and config file parsing.
97
98 If `strict=True`, unrecognized values, including None, will raise a
99 ValueError which is useful when parsing values passed in from an API call.
100 Strings yielding False are 'f', 'false', 'off', 'n', 'no', or '0'.
101 """
102 if not isinstance(subject, six.string_types):
103 subject = six.text_type(subject)
104
105 lowered = subject.strip().lower()
106
107 if lowered in TRUE_STRINGS:
108 return True
109 elif lowered in FALSE_STRINGS:
110 return False
111 elif strict:
112 acceptable = ', '.join(
113 "'%s'" % s for s in sorted(TRUE_STRINGS + FALSE_STRINGS))
114 msg = _("Unrecognized value '%(val)s', acceptable values are:"
115 " %(acceptable)s") % {'val': subject,
116 'acceptable': acceptable}
117 raise ValueError(msg)
118 else:
119 return default
120
121
122def safe_decode(text, incoming=None, errors='strict'):
123 """Decodes incoming text/bytes string using `incoming` if they're not
124 already unicode.
125
126 :param incoming: Text's current encoding
127 :param errors: Errors handling policy. See here for valid
128 values http://docs.python.org/2/library/codecs.html
129 :returns: text or a unicode `incoming` encoded
130 representation of it.
131 :raises TypeError: If text is not an instance of str
132 """
133 if not isinstance(text, (six.string_types, six.binary_type)):
134 raise TypeError("%s can't be decoded" % type(text))
135
136 if isinstance(text, six.text_type):
137 return text
138
139 if not incoming:
140 incoming = (sys.stdin.encoding or
141 sys.getdefaultencoding())
142
143 try:
144 return text.decode(incoming, errors)
145 except UnicodeDecodeError:
146 # Note(flaper87) If we get here, it means that
147 # sys.stdin.encoding / sys.getdefaultencoding
148 # didn't return a suitable encoding to decode
149 # text. This happens mostly when global LANG
150 # var is not set correctly and there's no
151 # default encoding. In this case, most likely
152 # python will use ASCII or ANSI encoders as
153 # default encodings but they won't be capable
154 # of decoding non-ASCII characters.
155 #
156 # Also, UTF-8 is being used since it's an ASCII
157 # extension.
158 return text.decode('utf-8', errors)
159
160
161def safe_encode(text, incoming=None,
162 encoding='utf-8', errors='strict'):
163 """Encodes incoming text/bytes string using `encoding`.
164
165 If incoming is not specified, text is expected to be encoded with
166 current python's default encoding. (`sys.getdefaultencoding`)
167
168 :param incoming: Text's current encoding
169 :param encoding: Expected encoding for text (Default UTF-8)
170 :param errors: Errors handling policy. See here for valid
171 values http://docs.python.org/2/library/codecs.html
172 :returns: text or a bytestring `encoding` encoded
173 representation of it.
174 :raises TypeError: If text is not an instance of str
175 """
176 if not isinstance(text, (six.string_types, six.binary_type)):
177 raise TypeError("%s can't be encoded" % type(text))
178
179 if not incoming:
180 incoming = (sys.stdin.encoding or
181 sys.getdefaultencoding())
182
183 if isinstance(text, six.text_type):
184 return text.encode(encoding, errors)
185 elif text and encoding != incoming:
186 # Decode text before encoding it with `encoding`
187 text = safe_decode(text, incoming, errors)
188 return text.encode(encoding, errors)
189 else:
190 return text
191
192
193def string_to_bytes(text, unit_system='IEC', return_int=False):
194 """Converts a string into an float representation of bytes.
195
196 The units supported for IEC ::
197
198 Kb(it), Kib(it), Mb(it), Mib(it), Gb(it), Gib(it), Tb(it), Tib(it)
199 KB, KiB, MB, MiB, GB, GiB, TB, TiB
200
201 The units supported for SI ::
202
203 kb(it), Mb(it), Gb(it), Tb(it)
204 kB, MB, GB, TB
205
206 Note that the SI unit system does not support capital letter 'K'
207
208 :param text: String input for bytes size conversion.
209 :param unit_system: Unit system for byte size conversion.
210 :param return_int: If True, returns integer representation of text
211 in bytes. (default: decimal)
212 :returns: Numerical representation of text in bytes.
213 :raises ValueError: If text has an invalid value.
214
215 """
216 try:
217 base, reg_ex = UNIT_SYSTEM_INFO[unit_system]
218 except KeyError:
219 msg = _('Invalid unit system: "%s"') % unit_system
220 raise ValueError(msg)
221 match = reg_ex.match(text)
222 if match:
223 magnitude = float(match.group(1))
224 unit_prefix = match.group(2)
225 if match.group(3) in ['b', 'bit']:
226 magnitude /= 8
227 else:
228 msg = _('Invalid string format: %s') % text
229 raise ValueError(msg)
230 if not unit_prefix:
231 res = magnitude
232 else:
233 res = magnitude * pow(base, UNIT_PREFIX_EXPONENT[unit_prefix])
234 if return_int:
235 return int(math.ceil(res))
236 return res
237
238
239def to_slug(value, incoming=None, errors="strict"):
240 """Normalize string.
241
242 Convert to lowercase, remove non-word characters, and convert spaces
243 to hyphens.
244
245 Inspired by Django's `slugify` filter.
246
247 :param value: Text to slugify
248 :param incoming: Text's current encoding
249 :param errors: Errors handling policy. See here for valid
250 values http://docs.python.org/2/library/codecs.html
251 :returns: slugified unicode representation of `value`
252 :raises TypeError: If text is not an instance of str
253 """
254 value = safe_decode(value, incoming, errors)
255 # NOTE(aababilov): no need to use safe_(encode|decode) here:
256 # encodings are always "ascii", error handling is always "ignore"
257 # and types are always known (first: unicode; second: str)
258 value = unicodedata.normalize("NFKD", value).encode(
259 "ascii", "ignore").decode("ascii")
260 value = SLUGIFY_STRIP_RE.sub("", value).strip().lower()
261 return SLUGIFY_HYPHENATE_RE.sub("-", value)
262
263
264def mask_password(message, secret="***"):
265 """Replace password with 'secret' in message.
266
267 :param message: The string which includes security information.
268 :param secret: value with which to replace passwords.
269 :returns: The unicode value of message with the password fields masked.
270
271 For example:
272
273 >>> mask_password("'adminPass' : 'aaaaa'")
274 "'adminPass' : '***'"
275 >>> mask_password("'admin_pass' : 'aaaaa'")
276 "'admin_pass' : '***'"
277 >>> mask_password('"password" : "aaaaa"')
278 '"password" : "***"'
279 >>> mask_password("'original_password' : 'aaaaa'")
280 "'original_password' : '***'"
281 >>> mask_password("u'original_password' : u'aaaaa'")
282 "u'original_password' : u'***'"
283 """
284 message = six.text_type(message)
285
286 # NOTE(ldbragst): Check to see if anything in message contains any key
287 # specified in _SANITIZE_KEYS, if not then just return the message since
288 # we don't have to mask any passwords.
289 if not any(key in message for key in _SANITIZE_KEYS):
290 return message
291
292 secret = r'\g<1>' + secret + r'\g<2>'
293 for pattern in _SANITIZE_PATTERNS:
294 message = re.sub(pattern, secret, message)
295 return message