Ved-vampir | 5c7b614 | 2015-04-24 19:49:59 +0300 | [diff] [blame] | 1 | """ Analize test results for finding bottlenecks """ |
| 2 | |
| 3 | import sys |
koder aka kdanilov | d5ed4da | 2015-05-07 23:33:23 +0300 | [diff] [blame^] | 4 | import csv |
| 5 | import time |
| 6 | import bisect |
koder aka kdanilov | f86d7af | 2015-05-06 04:01:54 +0300 | [diff] [blame] | 7 | import os.path |
Ved-vampir | 5c7b614 | 2015-04-24 19:49:59 +0300 | [diff] [blame] | 8 | import argparse |
koder aka kdanilov | f86d7af | 2015-05-06 04:01:54 +0300 | [diff] [blame] | 9 | import collections |
Ved-vampir | 5c7b614 | 2015-04-24 19:49:59 +0300 | [diff] [blame] | 10 | |
| 11 | |
koder aka kdanilov | f86d7af | 2015-05-06 04:01:54 +0300 | [diff] [blame] | 12 | import yaml |
koder aka kdanilov | d5ed4da | 2015-05-07 23:33:23 +0300 | [diff] [blame^] | 13 | import texttable |
Ved-vampir | 5c7b614 | 2015-04-24 19:49:59 +0300 | [diff] [blame] | 14 | |
Ved-vampir | fcea072 | 2015-04-27 14:06:13 +0300 | [diff] [blame] | 15 | |
koder aka kdanilov | f86d7af | 2015-05-06 04:01:54 +0300 | [diff] [blame] | 16 | from wally.utils import b2ssize |
Ved-vampir | fcea072 | 2015-04-27 14:06:13 +0300 | [diff] [blame] | 17 | |
| 18 | |
koder aka kdanilov | f86d7af | 2015-05-06 04:01:54 +0300 | [diff] [blame] | 19 | class SensorsData(object): |
| 20 | def __init__(self, source_id, hostname, ctime, values): |
| 21 | self.source_id = source_id |
| 22 | self.hostname = hostname |
| 23 | self.ctime = ctime |
| 24 | self.values = values # [((dev, sensor), value)] |
Ved-vampir | fcea072 | 2015-04-27 14:06:13 +0300 | [diff] [blame] | 25 | |
| 26 | |
koder aka kdanilov | f86d7af | 2015-05-06 04:01:54 +0300 | [diff] [blame] | 27 | class SensorInfo(object): |
| 28 | def __init__(self, name, native_ext, to_bytes_coef): |
| 29 | self.name = name |
| 30 | self.native_ext = native_ext |
| 31 | self.to_bytes_coef = to_bytes_coef |
Ved-vampir | fcea072 | 2015-04-27 14:06:13 +0300 | [diff] [blame] | 32 | |
koder aka kdanilov | d5ed4da | 2015-05-07 23:33:23 +0300 | [diff] [blame^] | 33 | _SINFO = [ |
koder aka kdanilov | f86d7af | 2015-05-06 04:01:54 +0300 | [diff] [blame] | 34 | SensorInfo('recv_bytes', 'B', 1), |
| 35 | SensorInfo('send_bytes', 'B', 1), |
| 36 | SensorInfo('sectors_written', 'Sect', 512), |
| 37 | SensorInfo('sectors_read', 'Sect', 512), |
| 38 | ] |
Ved-vampir | fcea072 | 2015-04-27 14:06:13 +0300 | [diff] [blame] | 39 | |
koder aka kdanilov | d5ed4da | 2015-05-07 23:33:23 +0300 | [diff] [blame^] | 40 | SINFO_MAP = dict((sinfo.name, sinfo) for sinfo in _SINFO) |
| 41 | to_bytes = dict((sinfo.name, sinfo.to_bytes_coef) for sinfo in _SINFO) |
Ved-vampir | fcea072 | 2015-04-27 14:06:13 +0300 | [diff] [blame] | 42 | |
koder aka kdanilov | d5ed4da | 2015-05-07 23:33:23 +0300 | [diff] [blame^] | 43 | |
| 44 | def load_results(fd): |
| 45 | data = fd.read(100) |
| 46 | fd.seek(0, os.SEEK_SET) |
| 47 | |
| 48 | # t = time.time() |
| 49 | if '(' in data or '{' in data: |
| 50 | res, source_id2nostname = load_results_eval(fd) |
| 51 | else: |
| 52 | res, source_id2nostname = load_results_csv(fd) |
| 53 | |
| 54 | # print int(((time.time() - t) * 1000000) / len(res)), len(res) |
| 55 | |
| 56 | return res, source_id2nostname |
| 57 | |
| 58 | |
| 59 | def load_results_csv(fd): |
| 60 | |
| 61 | fields = {} |
| 62 | res = [] |
| 63 | source_id2nostname = {} |
| 64 | coefs = {} |
| 65 | |
| 66 | # cached for performance |
| 67 | ii = int |
| 68 | zz = zip |
| 69 | SD = SensorsData |
| 70 | ra = res.append |
| 71 | |
| 72 | for row in csv.reader(fd): |
| 73 | if len(row) == 0: |
| 74 | continue |
| 75 | ip, port = row[:2] |
| 76 | ip_port = (ip, ii(port)) |
| 77 | |
| 78 | if ip_port not in fields: |
| 79 | sensors = [i.split('.') for i in row[4:]] |
| 80 | fields[ip_port] = row[2:4] + sensors |
| 81 | source_id2nostname[row[2]] = row[3] |
| 82 | coefs[ip_port] = [to_bytes.get(s[1], 1) for s in sensors] |
| 83 | else: |
| 84 | fld = fields[ip_port] |
| 85 | processed_data = [] |
| 86 | a = processed_data.append |
| 87 | |
| 88 | # this cycle is critical for performance |
| 89 | # don't "refactor" it, unles you are confident |
| 90 | # in what you are doing |
| 91 | for dev_sensor, val, coef in zz(fld[2:], row[3:], coefs[ip_port]): |
| 92 | a((dev_sensor, ii(val) * coef)) |
| 93 | |
| 94 | ctime = ii(row[2]) |
| 95 | sd = SD(fld[0], fld[1], ctime, processed_data) |
| 96 | ra((ctime, sd)) |
| 97 | |
| 98 | res.sort(key=lambda x: x[0]) |
| 99 | return res, source_id2nostname |
| 100 | |
| 101 | |
| 102 | def load_results_eval(fd): |
| 103 | res = [] |
| 104 | source_id2nostname = {} |
| 105 | |
| 106 | for line in fd: |
| 107 | if line.strip() == "": |
| 108 | continue |
| 109 | |
| 110 | _, data = eval(line) |
| 111 | ctime = data.pop('time') |
| 112 | source_id = data.pop('source_id') |
| 113 | hostname = data.pop('hostname') |
| 114 | |
| 115 | processed_data = [] |
| 116 | for k, v in data.items(): |
| 117 | dev, sensor = k.split('.') |
| 118 | processed_data.append(((dev, sensor), |
| 119 | v * to_bytes.get(sensor, 1))) |
| 120 | |
| 121 | sd = SensorsData(source_id, hostname, ctime, processed_data) |
| 122 | res.append((ctime, sd)) |
| 123 | source_id2nostname[source_id] = hostname |
| 124 | |
| 125 | res.sort(key=lambda x: x[0]) |
| 126 | return res, source_id2nostname |
| 127 | |
| 128 | |
| 129 | def load_test_timings(fd): |
| 130 | result = {} # test name - [(start_time, finish_time)] |
| 131 | data = yaml.load(fd.read()) |
| 132 | assert len(data) == 1 |
| 133 | test_type, test_data = data[0] |
| 134 | assert test_type == 'io' |
| 135 | for test_names, interval in test_data['__meta__']['timings']: |
| 136 | assert len(set(test_names)) == 1 |
| 137 | if test_names[0] not in result: |
| 138 | result[test_names[0]] = interval |
| 139 | return result |
| 140 | |
| 141 | |
| 142 | critical_values = dict( |
| 143 | io_queue=1, |
| 144 | mem_usage_percent=0.8) |
Ved-vampir | fcea072 | 2015-04-27 14:06:13 +0300 | [diff] [blame] | 145 | |
Ved-vampir | fcea072 | 2015-04-27 14:06:13 +0300 | [diff] [blame] | 146 | |
koder aka kdanilov | f86d7af | 2015-05-06 04:01:54 +0300 | [diff] [blame] | 147 | class AggregatedData(object): |
| 148 | def __init__(self, sensor_name): |
| 149 | self.sensor_name = sensor_name |
Ved-vampir | 5c7b614 | 2015-04-24 19:49:59 +0300 | [diff] [blame] | 150 | |
koder aka kdanilov | f86d7af | 2015-05-06 04:01:54 +0300 | [diff] [blame] | 151 | # (node, device): count |
| 152 | self.per_device = collections.defaultdict(lambda: 0) |
| 153 | |
| 154 | # node: count |
| 155 | self.per_node = collections.defaultdict(lambda: 0) |
| 156 | |
| 157 | # role: count |
| 158 | self.per_role = collections.defaultdict(lambda: 0) |
| 159 | |
| 160 | # (role_or_node, device_or_*): count |
| 161 | self.all_together = collections.defaultdict(lambda: 0) |
| 162 | |
| 163 | def __str__(self): |
| 164 | res = "<AggregatedData({0})>\n".format(self.sensor_name) |
| 165 | for (role_or_node, device), val in self.all_together.items(): |
| 166 | res += " {0}:{1} = {2}\n".format(role_or_node, device, val) |
| 167 | return res |
| 168 | |
| 169 | |
| 170 | def total_consumption(sensors_data, roles_map): |
| 171 | result = {} |
| 172 | |
| 173 | for _, item in sensors_data: |
| 174 | for (dev, sensor), val in item.values: |
| 175 | |
| 176 | try: |
| 177 | ad = result[sensor] |
| 178 | except KeyError: |
| 179 | ad = result[sensor] = AggregatedData(sensor) |
| 180 | |
| 181 | ad.per_device[(item.hostname, dev)] += val |
| 182 | |
| 183 | for ad in result.values(): |
| 184 | for (hostname, dev), val in ad.per_device.items(): |
| 185 | ad.per_node[hostname] += val |
| 186 | |
| 187 | for role in roles_map[hostname]: |
| 188 | ad.per_role[role] += val |
| 189 | |
| 190 | ad.all_together[(hostname, dev)] = val |
| 191 | |
| 192 | for role, val in ad.per_role.items(): |
| 193 | ad.all_together[(role, '*')] = val |
| 194 | |
| 195 | for node, val in ad.per_node.items(): |
| 196 | ad.all_together[(node, '*')] = val |
| 197 | |
| 198 | return result |
| 199 | |
| 200 | |
| 201 | def avg_load(data): |
| 202 | load = {} |
| 203 | |
| 204 | min_time = 0xFFFFFFFFFFF |
| 205 | max_time = 0 |
| 206 | |
| 207 | for tm, item in data: |
| 208 | |
| 209 | min_time = min(min_time, item.ctime) |
| 210 | max_time = max(max_time, item.ctime) |
| 211 | |
| 212 | for name, max_val in critical_values.items(): |
| 213 | for (dev, sensor), val in item.values: |
| 214 | if sensor == name and val > max_val: |
| 215 | load[(item.hostname, dev, sensor)] += 1 |
| 216 | return load, max_time - min_time |
| 217 | |
| 218 | |
| 219 | def print_bottlenecks(data_iter, max_bottlenecks=15): |
| 220 | load, duration = avg_load(data_iter) |
| 221 | rev_items = ((v, k) for (k, v) in load.items()) |
| 222 | |
| 223 | res = sorted(rev_items, reverse=True)[:max_bottlenecks] |
| 224 | |
| 225 | max_name_sz = max(len(name) for _, name in res) |
| 226 | frmt = "{{0:>{0}}} | {{1:>4}}".format(max_name_sz) |
| 227 | table = [frmt.format("Component", "% times load > 100%")] |
| 228 | |
| 229 | for (v, k) in res: |
| 230 | table.append(frmt.format(k, int(v * 100.0 / duration + 0.5))) |
| 231 | |
| 232 | return "\n".join(table) |
| 233 | |
| 234 | |
koder aka kdanilov | d5ed4da | 2015-05-07 23:33:23 +0300 | [diff] [blame^] | 235 | def print_consumption(agg, min_transfer=None): |
koder aka kdanilov | f86d7af | 2015-05-06 04:01:54 +0300 | [diff] [blame] | 236 | rev_items = [] |
| 237 | for (node_or_role, dev), v in agg.all_together.items(): |
| 238 | rev_items.append((int(v), node_or_role + ':' + dev)) |
| 239 | |
| 240 | res = sorted(rev_items, reverse=True) |
koder aka kdanilov | f86d7af | 2015-05-06 04:01:54 +0300 | [diff] [blame] | 241 | |
koder aka kdanilov | d5ed4da | 2015-05-07 23:33:23 +0300 | [diff] [blame^] | 242 | if min_transfer is not None: |
koder aka kdanilov | f86d7af | 2015-05-06 04:01:54 +0300 | [diff] [blame] | 243 | res = [(v, k) |
| 244 | for (v, k) in res |
koder aka kdanilov | d5ed4da | 2015-05-07 23:33:23 +0300 | [diff] [blame^] | 245 | if v >= min_transfer] |
koder aka kdanilov | f86d7af | 2015-05-06 04:01:54 +0300 | [diff] [blame] | 246 | |
| 247 | if len(res) == 0: |
| 248 | return None |
| 249 | |
koder aka kdanilov | d5ed4da | 2015-05-07 23:33:23 +0300 | [diff] [blame^] | 250 | res = [(b2ssize(v) + "B", k) for (v, k) in res] |
koder aka kdanilov | f86d7af | 2015-05-06 04:01:54 +0300 | [diff] [blame] | 251 | |
| 252 | max_name_sz = max(len(name) for _, name in res) |
| 253 | max_val_sz = max(len(val) for val, _ in res) |
| 254 | |
| 255 | frmt = " {{0:>{0}}} | {{1:>{1}}} ".format(max_name_sz, max_val_sz) |
| 256 | table = [frmt.format("Component", "Usage")] |
| 257 | |
| 258 | for (v, k) in res: |
| 259 | table.append(frmt.format(k, v)) |
| 260 | |
| 261 | return "\n".join(table) |
Ved-vampir | 5c7b614 | 2015-04-24 19:49:59 +0300 | [diff] [blame] | 262 | |
| 263 | |
| 264 | def parse_args(args): |
| 265 | parser = argparse.ArgumentParser() |
| 266 | parser.add_argument('-t', '--time_period', nargs=2, |
koder aka kdanilov | f86d7af | 2015-05-06 04:01:54 +0300 | [diff] [blame] | 267 | type=int, default=None, |
Ved-vampir | 5c7b614 | 2015-04-24 19:49:59 +0300 | [diff] [blame] | 268 | help="Begin and end time for tests") |
koder aka kdanilov | f86d7af | 2015-05-06 04:01:54 +0300 | [diff] [blame] | 269 | parser.add_argument('-m', '--max-bottlenek', type=int, |
| 270 | default=15, help="Max bottlenek to show") |
Ved-vampir | fcea072 | 2015-04-27 14:06:13 +0300 | [diff] [blame] | 271 | parser.add_argument('-d', '--debug-ver', action='store_true', |
| 272 | help="Full report with original data") |
| 273 | parser.add_argument('-u', '--user-ver', action='store_true', |
| 274 | default=True, |
| 275 | help="Avg load report") |
koder aka kdanilov | f86d7af | 2015-05-06 04:01:54 +0300 | [diff] [blame] | 276 | parser.add_argument('results_folder') |
Ved-vampir | 5c7b614 | 2015-04-24 19:49:59 +0300 | [diff] [blame] | 277 | return parser.parse_args(args[1:]) |
| 278 | |
| 279 | |
koder aka kdanilov | f86d7af | 2015-05-06 04:01:54 +0300 | [diff] [blame] | 280 | def make_roles_mapping(source_id_mapping, source_id2hostname): |
| 281 | result = {} |
| 282 | for ssh_url, roles in source_id_mapping.items(): |
| 283 | if '@' in ssh_url: |
| 284 | source_id = ssh_url.split('@')[1] |
| 285 | else: |
| 286 | source_id = ssh_url.split('://')[1] |
| 287 | |
| 288 | if source_id.count(':') == 2: |
| 289 | source_id = source_id.rsplit(":", 1)[0] |
| 290 | |
| 291 | if source_id.endswith(':'): |
| 292 | source_id += "22" |
| 293 | |
| 294 | if source_id in source_id2hostname: |
| 295 | result[source_id] = roles |
| 296 | result[source_id2hostname[source_id]] = roles |
| 297 | |
| 298 | for testnode_src in (set(source_id2hostname) - set(result)): |
| 299 | result[testnode_src] = ['testnode'] |
| 300 | result[source_id2hostname[testnode_src]] = ['testnode'] |
| 301 | |
| 302 | return result |
| 303 | |
| 304 | |
| 305 | def get_testdata_size(consumption): |
| 306 | max_data = 0 |
| 307 | for sensor_name, agg in consumption.items(): |
| 308 | if sensor_name in SINFO_MAP: |
koder aka kdanilov | d5ed4da | 2015-05-07 23:33:23 +0300 | [diff] [blame^] | 309 | max_data = max(max_data, agg.per_role.get('testnode', 0)) |
koder aka kdanilov | f86d7af | 2015-05-06 04:01:54 +0300 | [diff] [blame] | 310 | return max_data |
| 311 | |
| 312 | |
koder aka kdanilov | d5ed4da | 2015-05-07 23:33:23 +0300 | [diff] [blame^] | 313 | def get_data_for_interval(data, interval): |
| 314 | begin, end = interval |
| 315 | times = [ctime for ctime, _ in data] |
| 316 | b_p = bisect.bisect_left(times, begin) |
| 317 | e_p = bisect.bisect_right(times, end) |
| 318 | return data[b_p:e_p] |
| 319 | |
| 320 | |
Ved-vampir | 5c7b614 | 2015-04-24 19:49:59 +0300 | [diff] [blame] | 321 | def main(argv): |
| 322 | opts = parse_args(argv) |
| 323 | |
koder aka kdanilov | f86d7af | 2015-05-06 04:01:54 +0300 | [diff] [blame] | 324 | sensors_data_fname = os.path.join(opts.results_folder, |
| 325 | 'sensor_storage.txt') |
Ved-vampir | 5c7b614 | 2015-04-24 19:49:59 +0300 | [diff] [blame] | 326 | |
koder aka kdanilov | f86d7af | 2015-05-06 04:01:54 +0300 | [diff] [blame] | 327 | roles_file = os.path.join(opts.results_folder, |
| 328 | 'nodes.yaml') |
Ved-vampir | fcea072 | 2015-04-27 14:06:13 +0300 | [diff] [blame] | 329 | |
koder aka kdanilov | d5ed4da | 2015-05-07 23:33:23 +0300 | [diff] [blame^] | 330 | raw_results_file = os.path.join(opts.results_folder, |
| 331 | 'raw_results.yaml') |
koder aka kdanilov | f86d7af | 2015-05-06 04:01:54 +0300 | [diff] [blame] | 332 | |
koder aka kdanilov | d5ed4da | 2015-05-07 23:33:23 +0300 | [diff] [blame^] | 333 | src2roles = yaml.load(open(roles_file)) |
| 334 | timings = load_test_timings(open(raw_results_file)) |
koder aka kdanilov | f86d7af | 2015-05-06 04:01:54 +0300 | [diff] [blame] | 335 | with open(sensors_data_fname) as fd: |
| 336 | data, source_id2hostname = load_results(fd) |
| 337 | |
| 338 | roles_map = make_roles_mapping(src2roles, source_id2hostname) |
| 339 | |
| 340 | # print print_bottlenecks(data, opts.max_bottlenek) |
| 341 | # print print_bottlenecks(data, opts.max_bottlenek) |
| 342 | |
koder aka kdanilov | d5ed4da | 2015-05-07 23:33:23 +0300 | [diff] [blame^] | 343 | for name, interval in sorted(timings.items()): |
| 344 | print |
| 345 | print |
| 346 | print "-" * 30 + " " + name + " " + "-" * 30 |
| 347 | print |
koder aka kdanilov | f86d7af | 2015-05-06 04:01:54 +0300 | [diff] [blame] | 348 | |
koder aka kdanilov | d5ed4da | 2015-05-07 23:33:23 +0300 | [diff] [blame^] | 349 | data_chunk = get_data_for_interval(data, interval) |
Ved-vampir | fcea072 | 2015-04-27 14:06:13 +0300 | [diff] [blame] | 350 | |
koder aka kdanilov | d5ed4da | 2015-05-07 23:33:23 +0300 | [diff] [blame^] | 351 | consumption = total_consumption(data_chunk, roles_map) |
| 352 | |
| 353 | testdata_sz = get_testdata_size(consumption) // 100 |
| 354 | |
| 355 | fields = ('recv_bytes', 'send_bytes', |
| 356 | 'sectors_read', 'sectors_written') |
| 357 | per_consumer_table = {} |
| 358 | |
| 359 | all_consumers = set(consumption.values()[0].all_together) |
| 360 | all_consumers_sum = [] |
| 361 | |
| 362 | for consumer in all_consumers: |
| 363 | tb = per_consumer_table[consumer] = [] |
| 364 | vl = 0 |
| 365 | for name in fields: |
| 366 | val = consumption[name].all_together[consumer] |
| 367 | if val < testdata_sz: |
| 368 | val = 0 |
| 369 | vl += int(val) |
| 370 | tb.append(b2ssize(int(val)) + "B") |
| 371 | all_consumers_sum.append((vl, consumer)) |
| 372 | |
| 373 | all_consumers_sum.sort(reverse=True) |
| 374 | tt = texttable.Texttable(max_width=130) |
| 375 | tt.set_cols_align(["l"] + ["r"] * len(fields)) |
| 376 | tt.header(["Name"] + list(fields)) |
| 377 | |
| 378 | for summ, consumer in all_consumers_sum: |
| 379 | if summ > 0: |
| 380 | tt.add_row([".".join(consumer)] + |
| 381 | [v if v != '0B' else '-' |
| 382 | for v in per_consumer_table[consumer]]) |
| 383 | |
| 384 | tt.set_deco(texttable.Texttable.VLINES | texttable.Texttable.HEADER) |
| 385 | print tt.draw() |
| 386 | |
| 387 | # if name in consumption: |
| 388 | # table = print_consumption(consumption[name], testdata_sz) |
| 389 | # if table is None: |
| 390 | # print "Consumption of", name, "is negligible" |
| 391 | # else: |
| 392 | # ln = max(map(len, table.split('\n'))) |
| 393 | # print '-' * ln |
| 394 | # print name.center(ln) |
| 395 | # print '-' * ln |
| 396 | # print table |
| 397 | # print '-' * ln |
| 398 | # print |
Ved-vampir | 5c7b614 | 2015-04-24 19:49:59 +0300 | [diff] [blame] | 399 | |
| 400 | if __name__ == "__main__": |
| 401 | exit(main(sys.argv)) |