blob: cfe9df2a78219a15f668451c80e5e5d5e6c99bc3 [file] [log] [blame]
Ved-vampir5c7b6142015-04-24 19:49:59 +03001""" Analize test results for finding bottlenecks """
2
3import sys
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +03004import csv
5import time
6import bisect
koder aka kdanilovf86d7af2015-05-06 04:01:54 +03007import os.path
Ved-vampir5c7b6142015-04-24 19:49:59 +03008import argparse
koder aka kdanilovf86d7af2015-05-06 04:01:54 +03009import collections
Ved-vampir5c7b6142015-04-24 19:49:59 +030010
11
koder aka kdanilovf86d7af2015-05-06 04:01:54 +030012import yaml
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +030013import texttable
Ved-vampir5c7b6142015-04-24 19:49:59 +030014
Ved-vampirfcea0722015-04-27 14:06:13 +030015
koder aka kdanilovf86d7af2015-05-06 04:01:54 +030016from wally.utils import b2ssize
Ved-vampirfcea0722015-04-27 14:06:13 +030017
18
koder aka kdanilovf86d7af2015-05-06 04:01:54 +030019class SensorsData(object):
20 def __init__(self, source_id, hostname, ctime, values):
21 self.source_id = source_id
22 self.hostname = hostname
23 self.ctime = ctime
24 self.values = values # [((dev, sensor), value)]
Ved-vampirfcea0722015-04-27 14:06:13 +030025
26
koder aka kdanilovf86d7af2015-05-06 04:01:54 +030027class SensorInfo(object):
28 def __init__(self, name, native_ext, to_bytes_coef):
29 self.name = name
30 self.native_ext = native_ext
31 self.to_bytes_coef = to_bytes_coef
Ved-vampirfcea0722015-04-27 14:06:13 +030032
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +030033_SINFO = [
koder aka kdanilovf86d7af2015-05-06 04:01:54 +030034 SensorInfo('recv_bytes', 'B', 1),
35 SensorInfo('send_bytes', 'B', 1),
36 SensorInfo('sectors_written', 'Sect', 512),
37 SensorInfo('sectors_read', 'Sect', 512),
38]
Ved-vampirfcea0722015-04-27 14:06:13 +030039
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +030040SINFO_MAP = dict((sinfo.name, sinfo) for sinfo in _SINFO)
41to_bytes = dict((sinfo.name, sinfo.to_bytes_coef) for sinfo in _SINFO)
Ved-vampirfcea0722015-04-27 14:06:13 +030042
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +030043
44def load_results(fd):
45 data = fd.read(100)
46 fd.seek(0, os.SEEK_SET)
47
48 # t = time.time()
49 if '(' in data or '{' in data:
50 res, source_id2nostname = load_results_eval(fd)
51 else:
52 res, source_id2nostname = load_results_csv(fd)
53
54 # print int(((time.time() - t) * 1000000) / len(res)), len(res)
55
56 return res, source_id2nostname
57
58
59def load_results_csv(fd):
60
61 fields = {}
62 res = []
63 source_id2nostname = {}
64 coefs = {}
65
66 # cached for performance
67 ii = int
68 zz = zip
69 SD = SensorsData
70 ra = res.append
71
72 for row in csv.reader(fd):
73 if len(row) == 0:
74 continue
75 ip, port = row[:2]
76 ip_port = (ip, ii(port))
77
78 if ip_port not in fields:
79 sensors = [i.split('.') for i in row[4:]]
80 fields[ip_port] = row[2:4] + sensors
81 source_id2nostname[row[2]] = row[3]
82 coefs[ip_port] = [to_bytes.get(s[1], 1) for s in sensors]
83 else:
84 fld = fields[ip_port]
85 processed_data = []
86 a = processed_data.append
87
88 # this cycle is critical for performance
89 # don't "refactor" it, unles you are confident
90 # in what you are doing
91 for dev_sensor, val, coef in zz(fld[2:], row[3:], coefs[ip_port]):
92 a((dev_sensor, ii(val) * coef))
93
94 ctime = ii(row[2])
95 sd = SD(fld[0], fld[1], ctime, processed_data)
96 ra((ctime, sd))
97
98 res.sort(key=lambda x: x[0])
99 return res, source_id2nostname
100
101
102def load_results_eval(fd):
103 res = []
104 source_id2nostname = {}
105
106 for line in fd:
107 if line.strip() == "":
108 continue
109
110 _, data = eval(line)
111 ctime = data.pop('time')
112 source_id = data.pop('source_id')
113 hostname = data.pop('hostname')
114
115 processed_data = []
116 for k, v in data.items():
117 dev, sensor = k.split('.')
118 processed_data.append(((dev, sensor),
119 v * to_bytes.get(sensor, 1)))
120
121 sd = SensorsData(source_id, hostname, ctime, processed_data)
122 res.append((ctime, sd))
123 source_id2nostname[source_id] = hostname
124
125 res.sort(key=lambda x: x[0])
126 return res, source_id2nostname
127
128
129def load_test_timings(fd):
130 result = {} # test name - [(start_time, finish_time)]
131 data = yaml.load(fd.read())
132 assert len(data) == 1
133 test_type, test_data = data[0]
134 assert test_type == 'io'
135 for test_names, interval in test_data['__meta__']['timings']:
136 assert len(set(test_names)) == 1
137 if test_names[0] not in result:
138 result[test_names[0]] = interval
139 return result
140
141
142critical_values = dict(
143 io_queue=1,
144 mem_usage_percent=0.8)
Ved-vampirfcea0722015-04-27 14:06:13 +0300145
Ved-vampirfcea0722015-04-27 14:06:13 +0300146
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300147class AggregatedData(object):
148 def __init__(self, sensor_name):
149 self.sensor_name = sensor_name
Ved-vampir5c7b6142015-04-24 19:49:59 +0300150
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300151 # (node, device): count
152 self.per_device = collections.defaultdict(lambda: 0)
153
154 # node: count
155 self.per_node = collections.defaultdict(lambda: 0)
156
157 # role: count
158 self.per_role = collections.defaultdict(lambda: 0)
159
160 # (role_or_node, device_or_*): count
161 self.all_together = collections.defaultdict(lambda: 0)
162
163 def __str__(self):
164 res = "<AggregatedData({0})>\n".format(self.sensor_name)
165 for (role_or_node, device), val in self.all_together.items():
166 res += " {0}:{1} = {2}\n".format(role_or_node, device, val)
167 return res
168
169
170def total_consumption(sensors_data, roles_map):
171 result = {}
172
173 for _, item in sensors_data:
174 for (dev, sensor), val in item.values:
175
176 try:
177 ad = result[sensor]
178 except KeyError:
179 ad = result[sensor] = AggregatedData(sensor)
180
181 ad.per_device[(item.hostname, dev)] += val
182
183 for ad in result.values():
184 for (hostname, dev), val in ad.per_device.items():
185 ad.per_node[hostname] += val
186
187 for role in roles_map[hostname]:
188 ad.per_role[role] += val
189
190 ad.all_together[(hostname, dev)] = val
191
192 for role, val in ad.per_role.items():
193 ad.all_together[(role, '*')] = val
194
195 for node, val in ad.per_node.items():
196 ad.all_together[(node, '*')] = val
197
198 return result
199
200
201def avg_load(data):
202 load = {}
203
204 min_time = 0xFFFFFFFFFFF
205 max_time = 0
206
207 for tm, item in data:
208
209 min_time = min(min_time, item.ctime)
210 max_time = max(max_time, item.ctime)
211
212 for name, max_val in critical_values.items():
213 for (dev, sensor), val in item.values:
214 if sensor == name and val > max_val:
215 load[(item.hostname, dev, sensor)] += 1
216 return load, max_time - min_time
217
218
219def print_bottlenecks(data_iter, max_bottlenecks=15):
220 load, duration = avg_load(data_iter)
221 rev_items = ((v, k) for (k, v) in load.items())
222
223 res = sorted(rev_items, reverse=True)[:max_bottlenecks]
224
225 max_name_sz = max(len(name) for _, name in res)
226 frmt = "{{0:>{0}}} | {{1:>4}}".format(max_name_sz)
227 table = [frmt.format("Component", "% times load > 100%")]
228
229 for (v, k) in res:
230 table.append(frmt.format(k, int(v * 100.0 / duration + 0.5)))
231
232 return "\n".join(table)
233
234
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +0300235def print_consumption(agg, min_transfer=None):
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300236 rev_items = []
237 for (node_or_role, dev), v in agg.all_together.items():
238 rev_items.append((int(v), node_or_role + ':' + dev))
239
240 res = sorted(rev_items, reverse=True)
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300241
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +0300242 if min_transfer is not None:
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300243 res = [(v, k)
244 for (v, k) in res
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +0300245 if v >= min_transfer]
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300246
247 if len(res) == 0:
248 return None
249
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +0300250 res = [(b2ssize(v) + "B", k) for (v, k) in res]
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300251
252 max_name_sz = max(len(name) for _, name in res)
253 max_val_sz = max(len(val) for val, _ in res)
254
255 frmt = " {{0:>{0}}} | {{1:>{1}}} ".format(max_name_sz, max_val_sz)
256 table = [frmt.format("Component", "Usage")]
257
258 for (v, k) in res:
259 table.append(frmt.format(k, v))
260
261 return "\n".join(table)
Ved-vampir5c7b6142015-04-24 19:49:59 +0300262
263
264def parse_args(args):
265 parser = argparse.ArgumentParser()
266 parser.add_argument('-t', '--time_period', nargs=2,
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300267 type=int, default=None,
Ved-vampir5c7b6142015-04-24 19:49:59 +0300268 help="Begin and end time for tests")
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300269 parser.add_argument('-m', '--max-bottlenek', type=int,
270 default=15, help="Max bottlenek to show")
Ved-vampirfcea0722015-04-27 14:06:13 +0300271 parser.add_argument('-d', '--debug-ver', action='store_true',
272 help="Full report with original data")
273 parser.add_argument('-u', '--user-ver', action='store_true',
274 default=True,
275 help="Avg load report")
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300276 parser.add_argument('results_folder')
Ved-vampir5c7b6142015-04-24 19:49:59 +0300277 return parser.parse_args(args[1:])
278
279
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300280def make_roles_mapping(source_id_mapping, source_id2hostname):
281 result = {}
282 for ssh_url, roles in source_id_mapping.items():
283 if '@' in ssh_url:
284 source_id = ssh_url.split('@')[1]
285 else:
286 source_id = ssh_url.split('://')[1]
287
288 if source_id.count(':') == 2:
289 source_id = source_id.rsplit(":", 1)[0]
290
291 if source_id.endswith(':'):
292 source_id += "22"
293
294 if source_id in source_id2hostname:
295 result[source_id] = roles
296 result[source_id2hostname[source_id]] = roles
297
298 for testnode_src in (set(source_id2hostname) - set(result)):
299 result[testnode_src] = ['testnode']
300 result[source_id2hostname[testnode_src]] = ['testnode']
301
302 return result
303
304
305def get_testdata_size(consumption):
306 max_data = 0
307 for sensor_name, agg in consumption.items():
308 if sensor_name in SINFO_MAP:
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +0300309 max_data = max(max_data, agg.per_role.get('testnode', 0))
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300310 return max_data
311
312
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +0300313def get_data_for_interval(data, interval):
314 begin, end = interval
315 times = [ctime for ctime, _ in data]
316 b_p = bisect.bisect_left(times, begin)
317 e_p = bisect.bisect_right(times, end)
318 return data[b_p:e_p]
319
320
Ved-vampir5c7b6142015-04-24 19:49:59 +0300321def main(argv):
322 opts = parse_args(argv)
323
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300324 sensors_data_fname = os.path.join(opts.results_folder,
325 'sensor_storage.txt')
Ved-vampir5c7b6142015-04-24 19:49:59 +0300326
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300327 roles_file = os.path.join(opts.results_folder,
328 'nodes.yaml')
Ved-vampirfcea0722015-04-27 14:06:13 +0300329
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +0300330 raw_results_file = os.path.join(opts.results_folder,
331 'raw_results.yaml')
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300332
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +0300333 src2roles = yaml.load(open(roles_file))
334 timings = load_test_timings(open(raw_results_file))
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300335 with open(sensors_data_fname) as fd:
336 data, source_id2hostname = load_results(fd)
337
338 roles_map = make_roles_mapping(src2roles, source_id2hostname)
339
340 # print print_bottlenecks(data, opts.max_bottlenek)
341 # print print_bottlenecks(data, opts.max_bottlenek)
342
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +0300343 for name, interval in sorted(timings.items()):
344 print
345 print
346 print "-" * 30 + " " + name + " " + "-" * 30
347 print
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300348
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +0300349 data_chunk = get_data_for_interval(data, interval)
Ved-vampirfcea0722015-04-27 14:06:13 +0300350
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +0300351 consumption = total_consumption(data_chunk, roles_map)
352
353 testdata_sz = get_testdata_size(consumption) // 100
354
355 fields = ('recv_bytes', 'send_bytes',
356 'sectors_read', 'sectors_written')
357 per_consumer_table = {}
358
359 all_consumers = set(consumption.values()[0].all_together)
360 all_consumers_sum = []
361
362 for consumer in all_consumers:
363 tb = per_consumer_table[consumer] = []
364 vl = 0
365 for name in fields:
366 val = consumption[name].all_together[consumer]
367 if val < testdata_sz:
368 val = 0
369 vl += int(val)
370 tb.append(b2ssize(int(val)) + "B")
371 all_consumers_sum.append((vl, consumer))
372
373 all_consumers_sum.sort(reverse=True)
374 tt = texttable.Texttable(max_width=130)
375 tt.set_cols_align(["l"] + ["r"] * len(fields))
376 tt.header(["Name"] + list(fields))
377
378 for summ, consumer in all_consumers_sum:
379 if summ > 0:
380 tt.add_row([".".join(consumer)] +
381 [v if v != '0B' else '-'
382 for v in per_consumer_table[consumer]])
383
384 tt.set_deco(texttable.Texttable.VLINES | texttable.Texttable.HEADER)
385 print tt.draw()
386
387 # if name in consumption:
388 # table = print_consumption(consumption[name], testdata_sz)
389 # if table is None:
390 # print "Consumption of", name, "is negligible"
391 # else:
392 # ln = max(map(len, table.split('\n')))
393 # print '-' * ln
394 # print name.center(ln)
395 # print '-' * ln
396 # print table
397 # print '-' * ln
398 # print
Ved-vampir5c7b6142015-04-24 19:49:59 +0300399
400if __name__ == "__main__":
401 exit(main(sys.argv))