blob: d8d4b4c7db02964de41b7bb7a127b382c5663bce [file] [log] [blame]
Ved-vampir5c7b6142015-04-24 19:49:59 +03001""" Analize test results for finding bottlenecks """
2
3import sys
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +03004import csv
5import time
6import bisect
koder aka kdanilovf86d7af2015-05-06 04:01:54 +03007import os.path
Ved-vampir5c7b6142015-04-24 19:49:59 +03008import argparse
koder aka kdanilovf86d7af2015-05-06 04:01:54 +03009import collections
Ved-vampir5c7b6142015-04-24 19:49:59 +030010
11
koder aka kdanilovf86d7af2015-05-06 04:01:54 +030012import yaml
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +030013import texttable
Ved-vampir5c7b6142015-04-24 19:49:59 +030014
koder aka kdanilov416b87a2015-05-12 00:26:04 +030015try:
16 import pygraphviz as pgv
17except ImportError:
18 pgv = None
Ved-vampirfcea0722015-04-27 14:06:13 +030019
koder aka kdanilov416b87a2015-05-12 00:26:04 +030020from wally.utils import b2ssize, b2ssize_10
Ved-vampirfcea0722015-04-27 14:06:13 +030021
22
koder aka kdanilovf86d7af2015-05-06 04:01:54 +030023class SensorsData(object):
24 def __init__(self, source_id, hostname, ctime, values):
25 self.source_id = source_id
26 self.hostname = hostname
27 self.ctime = ctime
28 self.values = values # [((dev, sensor), value)]
Ved-vampirfcea0722015-04-27 14:06:13 +030029
30
koder aka kdanilovf86d7af2015-05-06 04:01:54 +030031class SensorInfo(object):
koder aka kdanilov416b87a2015-05-12 00:26:04 +030032 def __init__(self, name, print_name, native_ext, to_bytes_coef):
koder aka kdanilovf86d7af2015-05-06 04:01:54 +030033 self.name = name
koder aka kdanilov416b87a2015-05-12 00:26:04 +030034 self.print_name = print_name
koder aka kdanilovf86d7af2015-05-06 04:01:54 +030035 self.native_ext = native_ext
36 self.to_bytes_coef = to_bytes_coef
Ved-vampirfcea0722015-04-27 14:06:13 +030037
koder aka kdanilov416b87a2015-05-12 00:26:04 +030038
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +030039_SINFO = [
koder aka kdanilov416b87a2015-05-12 00:26:04 +030040 SensorInfo('recv_bytes', 'net_recv', 'B', 1),
41 SensorInfo('send_bytes', 'net_send', 'B', 1),
42 SensorInfo('sectors_written', 'hdd_write', 'Sect', 512),
43 SensorInfo('sectors_read', 'hdd_read', 'Sect', 512),
44 SensorInfo('reads_completed', 'read_op', 'OP', None),
45 SensorInfo('writes_completed', 'write_op', 'OP', None),
koder aka kdanilovf86d7af2015-05-06 04:01:54 +030046]
Ved-vampirfcea0722015-04-27 14:06:13 +030047
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +030048SINFO_MAP = dict((sinfo.name, sinfo) for sinfo in _SINFO)
koder aka kdanilov416b87a2015-05-12 00:26:04 +030049to_bytes = dict((sinfo.name, sinfo.to_bytes_coef)
50 for sinfo in _SINFO
51 if sinfo.to_bytes_coef is not None)
Ved-vampirfcea0722015-04-27 14:06:13 +030052
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +030053
54def load_results(fd):
55 data = fd.read(100)
56 fd.seek(0, os.SEEK_SET)
57
58 # t = time.time()
59 if '(' in data or '{' in data:
60 res, source_id2nostname = load_results_eval(fd)
61 else:
62 res, source_id2nostname = load_results_csv(fd)
63
64 # print int(((time.time() - t) * 1000000) / len(res)), len(res)
65
66 return res, source_id2nostname
67
68
69def load_results_csv(fd):
70
71 fields = {}
72 res = []
73 source_id2nostname = {}
74 coefs = {}
75
76 # cached for performance
77 ii = int
78 zz = zip
79 SD = SensorsData
80 ra = res.append
81
82 for row in csv.reader(fd):
83 if len(row) == 0:
84 continue
85 ip, port = row[:2]
86 ip_port = (ip, ii(port))
87
88 if ip_port not in fields:
89 sensors = [i.split('.') for i in row[4:]]
90 fields[ip_port] = row[2:4] + sensors
91 source_id2nostname[row[2]] = row[3]
92 coefs[ip_port] = [to_bytes.get(s[1], 1) for s in sensors]
93 else:
94 fld = fields[ip_port]
95 processed_data = []
96 a = processed_data.append
97
98 # this cycle is critical for performance
99 # don't "refactor" it, unles you are confident
100 # in what you are doing
101 for dev_sensor, val, coef in zz(fld[2:], row[3:], coefs[ip_port]):
102 a((dev_sensor, ii(val) * coef))
103
104 ctime = ii(row[2])
105 sd = SD(fld[0], fld[1], ctime, processed_data)
106 ra((ctime, sd))
107
108 res.sort(key=lambda x: x[0])
109 return res, source_id2nostname
110
111
112def load_results_eval(fd):
113 res = []
114 source_id2nostname = {}
115
116 for line in fd:
117 if line.strip() == "":
118 continue
119
120 _, data = eval(line)
121 ctime = data.pop('time')
122 source_id = data.pop('source_id')
123 hostname = data.pop('hostname')
124
125 processed_data = []
126 for k, v in data.items():
127 dev, sensor = k.split('.')
128 processed_data.append(((dev, sensor),
129 v * to_bytes.get(sensor, 1)))
130
131 sd = SensorsData(source_id, hostname, ctime, processed_data)
132 res.append((ctime, sd))
133 source_id2nostname[source_id] = hostname
134
135 res.sort(key=lambda x: x[0])
136 return res, source_id2nostname
137
138
koder aka kdanilov416b87a2015-05-12 00:26:04 +0300139def load_test_timings(fd, max_diff=1000):
140 raw_map = collections.defaultdict(lambda: [])
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +0300141 data = yaml.load(fd.read())
koder aka kdanilov416b87a2015-05-12 00:26:04 +0300142 for test_type, test_results in data:
143 if test_type == 'io':
144 for tests_res in test_results:
145 for test_res in tests_res['res']:
146 raw_map[test_res['name']].append(test_res['run_interval'])
147
148 result = {}
149 for name, intervals in raw_map.items():
150 intervals.sort()
151 curr_start, curr_stop = intervals[0]
152 curr_result = []
153
154 for (start, stop) in intervals[1:]:
155 if abs(curr_start - start) < max_diff:
156 # if abs(curr_stop - stop) > 2:
157 # print abs(curr_stop - stop)
158 assert abs(curr_stop - stop) < max_diff
159 else:
160 assert start + max_diff >= curr_stop
161 assert stop > curr_stop
162 curr_result.append((curr_start, curr_stop))
163 curr_start, curr_stop = start, stop
164 curr_result.append((curr_start, curr_stop))
165
166 merged_res = []
167 curr_start, curr_stop = curr_result[0]
168 for start, stop in curr_result[1:]:
169 if abs(curr_stop - start) < max_diff:
170 curr_stop = stop
171 else:
172 merged_res.append((curr_start, curr_stop))
173 curr_start, curr_stop = start, stop
174 merged_res.append((curr_start, curr_stop))
175 result[name] = merged_res
176
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +0300177 return result
178
179
180critical_values = dict(
181 io_queue=1,
182 mem_usage_percent=0.8)
Ved-vampirfcea0722015-04-27 14:06:13 +0300183
Ved-vampirfcea0722015-04-27 14:06:13 +0300184
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300185class AggregatedData(object):
186 def __init__(self, sensor_name):
187 self.sensor_name = sensor_name
Ved-vampir5c7b6142015-04-24 19:49:59 +0300188
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300189 # (node, device): count
190 self.per_device = collections.defaultdict(lambda: 0)
191
192 # node: count
193 self.per_node = collections.defaultdict(lambda: 0)
194
195 # role: count
196 self.per_role = collections.defaultdict(lambda: 0)
197
198 # (role_or_node, device_or_*): count
199 self.all_together = collections.defaultdict(lambda: 0)
200
201 def __str__(self):
202 res = "<AggregatedData({0})>\n".format(self.sensor_name)
203 for (role_or_node, device), val in self.all_together.items():
204 res += " {0}:{1} = {2}\n".format(role_or_node, device, val)
205 return res
206
207
208def total_consumption(sensors_data, roles_map):
209 result = {}
210
211 for _, item in sensors_data:
212 for (dev, sensor), val in item.values:
213
214 try:
215 ad = result[sensor]
216 except KeyError:
217 ad = result[sensor] = AggregatedData(sensor)
218
219 ad.per_device[(item.hostname, dev)] += val
220
221 for ad in result.values():
222 for (hostname, dev), val in ad.per_device.items():
223 ad.per_node[hostname] += val
224
225 for role in roles_map[hostname]:
226 ad.per_role[role] += val
227
228 ad.all_together[(hostname, dev)] = val
229
230 for role, val in ad.per_role.items():
231 ad.all_together[(role, '*')] = val
232
233 for node, val in ad.per_node.items():
234 ad.all_together[(node, '*')] = val
235
236 return result
237
238
239def avg_load(data):
240 load = {}
241
242 min_time = 0xFFFFFFFFFFF
243 max_time = 0
244
245 for tm, item in data:
246
247 min_time = min(min_time, item.ctime)
248 max_time = max(max_time, item.ctime)
249
250 for name, max_val in critical_values.items():
251 for (dev, sensor), val in item.values:
252 if sensor == name and val > max_val:
253 load[(item.hostname, dev, sensor)] += 1
254 return load, max_time - min_time
255
256
257def print_bottlenecks(data_iter, max_bottlenecks=15):
258 load, duration = avg_load(data_iter)
259 rev_items = ((v, k) for (k, v) in load.items())
260
261 res = sorted(rev_items, reverse=True)[:max_bottlenecks]
262
263 max_name_sz = max(len(name) for _, name in res)
264 frmt = "{{0:>{0}}} | {{1:>4}}".format(max_name_sz)
265 table = [frmt.format("Component", "% times load > 100%")]
266
267 for (v, k) in res:
268 table.append(frmt.format(k, int(v * 100.0 / duration + 0.5)))
269
270 return "\n".join(table)
271
272
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +0300273def print_consumption(agg, min_transfer=None):
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300274 rev_items = []
275 for (node_or_role, dev), v in agg.all_together.items():
276 rev_items.append((int(v), node_or_role + ':' + dev))
277
278 res = sorted(rev_items, reverse=True)
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300279
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +0300280 if min_transfer is not None:
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300281 res = [(v, k)
282 for (v, k) in res
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +0300283 if v >= min_transfer]
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300284
285 if len(res) == 0:
286 return None
287
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +0300288 res = [(b2ssize(v) + "B", k) for (v, k) in res]
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300289
290 max_name_sz = max(len(name) for _, name in res)
291 max_val_sz = max(len(val) for val, _ in res)
292
293 frmt = " {{0:>{0}}} | {{1:>{1}}} ".format(max_name_sz, max_val_sz)
294 table = [frmt.format("Component", "Usage")]
295
296 for (v, k) in res:
297 table.append(frmt.format(k, v))
298
299 return "\n".join(table)
Ved-vampir5c7b6142015-04-24 19:49:59 +0300300
301
302def parse_args(args):
303 parser = argparse.ArgumentParser()
304 parser.add_argument('-t', '--time_period', nargs=2,
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300305 type=int, default=None,
Ved-vampir5c7b6142015-04-24 19:49:59 +0300306 help="Begin and end time for tests")
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300307 parser.add_argument('-m', '--max-bottlenek', type=int,
308 default=15, help="Max bottlenek to show")
koder aka kdanilov416b87a2015-05-12 00:26:04 +0300309 parser.add_argument('-x', '--max-diff', type=int,
310 default=10, help="Max bottlenek to show in" +
311 "0.1% from test nodes summ load")
Ved-vampirfcea0722015-04-27 14:06:13 +0300312 parser.add_argument('-d', '--debug-ver', action='store_true',
313 help="Full report with original data")
314 parser.add_argument('-u', '--user-ver', action='store_true',
koder aka kdanilov416b87a2015-05-12 00:26:04 +0300315 default=True, help="Avg load report")
316 parser.add_argument('-s', '--select-loads', nargs='*', default=[])
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300317 parser.add_argument('results_folder')
Ved-vampir5c7b6142015-04-24 19:49:59 +0300318 return parser.parse_args(args[1:])
319
320
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300321def make_roles_mapping(source_id_mapping, source_id2hostname):
322 result = {}
323 for ssh_url, roles in source_id_mapping.items():
324 if '@' in ssh_url:
325 source_id = ssh_url.split('@')[1]
326 else:
327 source_id = ssh_url.split('://')[1]
328
329 if source_id.count(':') == 2:
330 source_id = source_id.rsplit(":", 1)[0]
331
332 if source_id.endswith(':'):
333 source_id += "22"
334
335 if source_id in source_id2hostname:
336 result[source_id] = roles
337 result[source_id2hostname[source_id]] = roles
338
339 for testnode_src in (set(source_id2hostname) - set(result)):
340 result[testnode_src] = ['testnode']
341 result[source_id2hostname[testnode_src]] = ['testnode']
342
343 return result
344
345
346def get_testdata_size(consumption):
347 max_data = 0
koder aka kdanilov416b87a2015-05-12 00:26:04 +0300348 for name, sens in SINFO_MAP.items():
349 if sens.to_bytes_coef is not None:
350 agg = consumption.get(name)
351 if agg is not None:
352 max_data = max(max_data, agg.per_role.get('testnode', 0))
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300353 return max_data
354
355
koder aka kdanilov416b87a2015-05-12 00:26:04 +0300356def get_testop_cout(consumption):
357 max_op = 0
358 for name, sens in SINFO_MAP.items():
359 if sens.to_bytes_coef is None:
360 agg = consumption.get(name)
361 if agg is not None:
362 max_op = max(max_op, agg.per_role.get('testnode', 0))
363 return max_op
364
365
366def get_data_for_intervals(data, intervals):
367 res = []
368 for begin, end in intervals:
369 times = [ctime for ctime, _ in data]
370 b_p = bisect.bisect_left(times, begin)
371 e_p = bisect.bisect_right(times, end)
372 res.extend(data[b_p:e_p])
373 return res
374
375
376class Host(object):
377 def __init__(self, name=None):
378 self.name = name
379 self.hdd_devs = {}
380 self.net_devs = None
381
382
383# def plot_consumption(per_consumer_table, fields):
384# hosts = {}
385# storage_sensors = ('sectors_written', 'sectors_read')
386
387# for (hostname, dev), consumption in per_consumer_table.items():
388# if dev != '*':
389# continue
390
391# if hostname not in hosts:
392# hosts[hostname] = Host(hostname)
393
394# cons_map = map(zip(fields, consumption))
395
396# for sn in storage_sensors:
397# vl = cons_map.get(sn, 0)
398# if vl > 0:
399# pass
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +0300400
401
Ved-vampir5c7b6142015-04-24 19:49:59 +0300402def main(argv):
403 opts = parse_args(argv)
404
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300405 sensors_data_fname = os.path.join(opts.results_folder,
406 'sensor_storage.txt')
Ved-vampir5c7b6142015-04-24 19:49:59 +0300407
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300408 roles_file = os.path.join(opts.results_folder,
409 'nodes.yaml')
Ved-vampirfcea0722015-04-27 14:06:13 +0300410
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +0300411 raw_results_file = os.path.join(opts.results_folder,
412 'raw_results.yaml')
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300413
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +0300414 src2roles = yaml.load(open(roles_file))
415 timings = load_test_timings(open(raw_results_file))
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300416 with open(sensors_data_fname) as fd:
417 data, source_id2hostname = load_results(fd)
418
419 roles_map = make_roles_mapping(src2roles, source_id2hostname)
koder aka kdanilov416b87a2015-05-12 00:26:04 +0300420 max_diff = float(opts.max_diff) / 1000
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300421
422 # print print_bottlenecks(data, opts.max_bottlenek)
423 # print print_bottlenecks(data, opts.max_bottlenek)
424
koder aka kdanilov416b87a2015-05-12 00:26:04 +0300425 for name, intervals in sorted(timings.items()):
426 if opts.select_loads != []:
427 if name not in opts.select_loads:
428 continue
429
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +0300430 print
431 print
432 print "-" * 30 + " " + name + " " + "-" * 30
433 print
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300434
koder aka kdanilov416b87a2015-05-12 00:26:04 +0300435 data_chunk = get_data_for_intervals(data, intervals)
Ved-vampirfcea0722015-04-27 14:06:13 +0300436
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +0300437 consumption = total_consumption(data_chunk, roles_map)
438
koder aka kdanilov416b87a2015-05-12 00:26:04 +0300439 testdata_sz = get_testdata_size(consumption) * max_diff
440 testop_count = get_testop_cout(consumption) * max_diff
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +0300441
442 fields = ('recv_bytes', 'send_bytes',
koder aka kdanilov416b87a2015-05-12 00:26:04 +0300443 'sectors_read', 'sectors_written',
444 'reads_completed', 'writes_completed')
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +0300445 per_consumer_table = {}
446
447 all_consumers = set(consumption.values()[0].all_together)
448 all_consumers_sum = []
449
450 for consumer in all_consumers:
451 tb = per_consumer_table[consumer] = []
452 vl = 0
453 for name in fields:
454 val = consumption[name].all_together[consumer]
koder aka kdanilov416b87a2015-05-12 00:26:04 +0300455 if SINFO_MAP[name].to_bytes_coef is None:
456 if val < testop_count:
457 val = 0
458 tb.append(b2ssize_10(int(val)))
459 else:
460 if val < testdata_sz:
461 val = 0
462 tb.append(b2ssize(int(val)) + "B")
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +0300463 vl += int(val)
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +0300464 all_consumers_sum.append((vl, consumer))
465
466 all_consumers_sum.sort(reverse=True)
koder aka kdanilov416b87a2015-05-12 00:26:04 +0300467 # plot_consumption(per_consumer_table, fields)
468 # continue
469
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +0300470 tt = texttable.Texttable(max_width=130)
471 tt.set_cols_align(["l"] + ["r"] * len(fields))
koder aka kdanilov416b87a2015-05-12 00:26:04 +0300472
473 header = ["Name"]
474 for fld in fields:
475 if fld in SINFO_MAP:
476 header.append(SINFO_MAP[fld].print_name)
477 else:
478 header.append(fld)
479 tt.header(header)
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +0300480
481 for summ, consumer in all_consumers_sum:
482 if summ > 0:
koder aka kdanilov416b87a2015-05-12 00:26:04 +0300483 tt.add_row([":".join(consumer)] +
484 [v if v not in ('0B', '0') else '-'
koder aka kdanilovd5ed4da2015-05-07 23:33:23 +0300485 for v in per_consumer_table[consumer]])
486
487 tt.set_deco(texttable.Texttable.VLINES | texttable.Texttable.HEADER)
488 print tt.draw()
489
Ved-vampir5c7b6142015-04-24 19:49:59 +0300490
491if __name__ == "__main__":
492 exit(main(sys.argv))