postprocessing added (plot and dev)
diff --git a/scripts/postprocessing/io_py_result_processor.py b/scripts/postprocessing/io_py_result_processor.py
new file mode 100644
index 0000000..befe4eb
--- /dev/null
+++ b/scripts/postprocessing/io_py_result_processor.py
@@ -0,0 +1,207 @@
+import sys
+import math
+import itertools
+
+from colorama import Fore, Style
+
+
+def med_dev(vals):
+ med = sum(vals) / len(vals)
+ dev = ((sum(abs(med - i) ** 2 for i in vals) / len(vals)) ** 0.5)
+ return int(med), int(dev)
+
+
+def round_deviation(med_dev):
+ med, dev = med_dev
+
+ if dev < 1E-7:
+ return med_dev
+
+ dev_div = 10.0 ** (math.floor(math.log10(dev)) - 1)
+ dev = int(dev / dev_div) * dev_div
+ med = int(med / dev_div) * dev_div
+ return (type(med_dev[0])(med),
+ type(med_dev[1])(dev))
+
+
+def groupby_globally(data, key_func):
+ grouped = {}
+ grouped_iter = itertools.groupby(data, key_func)
+
+ for (bs, cache_tp, act), curr_data_it in grouped_iter:
+ key = (bs, cache_tp, act)
+ grouped.setdefault(key, []).extend(curr_data_it)
+
+ return grouped
+
+
+class Data(object):
+ def __init__(self, name):
+ self.name = name
+ self.series = {}
+ self.processed_series = {}
+
+
+def process_inplace(data):
+ processed = {}
+ for key, values in data.series.items():
+ processed[key] = round_deviation(med_dev(values))
+ data.processed_series = processed
+
+
+def diff_table(*datas):
+ res_table = {}
+
+ for key in datas[0].processed_series:
+ baseline = datas[0].processed_series[key]
+ base_max = baseline[0] + baseline[1]
+ base_min = baseline[0] - baseline[1]
+
+ res_line = [baseline]
+
+ for data in datas[1:]:
+ val, dev = data.processed_series[key]
+ val_min = val - dev
+ val_max = val + dev
+
+ diff_1 = int(float(val_min - base_max) / base_max * 100)
+ diff_2 = int(float(val_max - base_min) / base_max * 100)
+
+ diff_max = max(diff_1, diff_2)
+ diff_min = min(diff_1, diff_2)
+
+ res_line.append((diff_max, diff_min))
+ res_table[key] = res_line
+
+ return [data.name for data in datas], res_table
+
+
+def print_table(headers, table):
+ lines = []
+ items = sorted(table.items())
+ lines.append([(len(i), i) for i in [""] + headers])
+ item_frmt = "{0}{1:>4}{2} ~ {3}{4:>4}{5}"
+
+ for key, vals in items:
+ ln1 = "{0:>4} {1} {2:>9} {3}".format(*map(str, key))
+ ln2 = "{0:>4} ~ {1:>3}".format(*vals[0])
+
+ line = [(len(ln1), ln1), (len(ln2), ln2)]
+
+ for idx, val in enumerate(vals[1:], 2):
+ cval = []
+ for vl in val:
+ if vl < -10:
+ cval.extend([Fore.RED, vl, Style.RESET_ALL])
+ elif vl > 10:
+ cval.extend([Fore.GREEN, vl, Style.RESET_ALL])
+ else:
+ cval.extend(["", vl, ""])
+
+ ln = len(item_frmt.format("", cval[1], "", "", cval[4], ""))
+ line.append((ln, item_frmt.format(*cval)))
+
+ lines.append(line)
+
+ max_columns_with = []
+ for idx in range(len(lines[0])):
+ max_columns_with.append(
+ max(line[idx][0] for line in lines))
+
+ sep = '-' * (4 + sum(max_columns_with) + 3 * (len(lines[0]) - 1))
+
+ print sep
+ for idx, line in enumerate(lines):
+ cline = []
+ for (curr_len, txt), exp_ln in zip(line, max_columns_with):
+ cline.append(" " * (exp_ln - curr_len) + txt)
+ print "| " + " | ".join(cline) + " |"
+ if 0 == idx:
+ print sep
+ print sep
+
+
+def key_func(x):
+ return (x['__meta__']['blocksize'],
+ 'd' if 'direct' in x['__meta__'] else 's',
+ x['__meta__']['name'])
+
+
+template = "{bs:>4} {action:>12} {cache_tp:>3} {conc:>4}"
+template += " | {iops[0]:>6} ~ {iops[1]:>5} | {bw[0]:>7} ~ {bw[1]:>6}"
+template += " | {lat[0]:>6} ~ {lat[1]:>5} |"
+
+headers = dict(bs="BS",
+ action="operation",
+ cache_tp="S/D",
+ conc="CONC",
+ iops=("IOPS", "dev"),
+ bw=("BW kBps", "dev"),
+ lat=("LAT ms", "dev"))
+
+
+def load_io_py_file(fname):
+ with open(fname) as fc:
+ block = None
+ for line in fc:
+ if line.startswith("{"):
+ block = line
+ elif block is not None:
+ block += line
+
+ if block is not None and block.count('}') == block.count('{'):
+ cut = block.rfind('}')
+ block = block[0:cut+1]
+ yield eval(block)
+ block = None
+
+ if block is not None and block.count('}') == block.count('{'):
+ yield eval(block)
+
+
+def main(argv):
+ items = []
+ CONC_POS = 3
+ for hdr_fname in argv[1:]:
+ hdr, fname = hdr_fname.split("=", 1)
+ data = list(load_io_py_file(fname))
+ item = Data(hdr)
+ for key, vals in groupby_globally(data, key_func).items():
+ item.series[key] = [val['iops'] * key[CONC_POS] for val in vals]
+ process_inplace(item)
+ items.append(item)
+
+ print_table(*diff_table(*items))
+
+ # print template.format(**headers)
+
+ # for (bs, cache_tp, act, conc), curr_data in sorted(grouped.items()):
+ # iops = med_dev([i['iops'] * int(conc) for i in curr_data])
+ # bw_mean = med_dev([i['bw_mean'] * int(conc) for i in curr_data])
+ # lat = med_dev([i['lat'] / 1000 for i in curr_data])
+
+ # iops = round_deviation(iops)
+ # bw_mean = round_deviation(bw_mean)
+ # lat = round_deviation(lat)
+
+ # params = dict(
+ # bs=bs,
+ # action=act,
+ # cache_tp=cache_tp,
+ # iops=iops,
+ # bw=bw_mean,
+ # lat=lat,
+ # conc=conc
+ # )
+
+ # print template.format(**params)
+
+
+if __name__ == "__main__":
+ exit(main(sys.argv))
+
+ # vals = [(123, 23), (125678, 5678), (123.546756, 23.77),
+ # (123.546756, 102.77), (0.1234, 0.0224),
+ # (0.001234, 0.000224), (0.001234, 0.0000224)]
+ # for val in :
+ # print val, "=>", round_deviation(val)
diff --git a/scripts/postprocessing/stat.py b/scripts/postprocessing/stat.py
new file mode 100644
index 0000000..893c0fc
--- /dev/null
+++ b/scripts/postprocessing/stat.py
@@ -0,0 +1,199 @@
+import sys
+import time
+
+from copy import deepcopy
+
+import numpy
+import scipy.optimize as scp
+import matplotlib.pyplot as plt
+
+import io_py_result_processor as io_test
+
+key_pos = {'blocksize': 0, 'direct_io': 1, 'name': 2}
+actions = ['randwrite', 'randread', 'read', 'write']
+types = ['s', 'd']
+colors = ['red', 'green', 'blue', 'cyan',
+ 'magenta', 'black', 'yellow', 'burlywood']
+
+def get_key(x, no):
+ """ x = (), no = key_pos key """
+ keys = deepcopy(key_pos)
+ del keys[no]
+ key = [x[n] for n in keys.values()]
+ return tuple(key), x[key_pos[no]]
+
+
+def generate_groups(data, group_id):
+ """ select data for plot by group_id
+ data - processed_series"""
+ grouped = {}
+
+ for key, val in data.items():
+ new_key, group_val = get_key(key, group_id)
+ group = grouped.setdefault(new_key, {})
+ group[group_val] = val
+
+ return grouped
+
+
+def gen_dots(val):
+ """Generate dots from real data
+ val = dict (x:y)
+ return ox, oy lists """
+ oy = []
+ ox = []
+ for x in sorted(val.keys()):
+ ox.append(int(x[:-1]))
+ if val[x][0] != 0:
+ oy.append(1.0/val[x][0])
+ else:
+ oy.append(0)
+ return ox, oy
+
+
+def gen_line_numpy(x, y):
+ A = numpy.vstack([x, numpy.ones(len(x))]).T
+ coef = numpy.linalg.lstsq(A, y)[0]
+ funcLine = lambda tpl, x: tpl[0] * x + tpl[1]
+ print coef
+ return x, funcLine(coef, x)
+
+
+def gen_line_scipy(x, y):
+ funcLine = lambda tpl, x: tpl[0] * x + tpl[1]
+ ErrorFunc = lambda tpl, x, y: 1.0 - y/funcLine(tpl, x)
+ tplInitial = (1.0, 0.0)
+ # print x, y
+ tplFinal, success = scp.leastsq(ErrorFunc, tplInitial[:], args=(x, y),
+ diag=(1./x.mean(), 1./y.mean()))
+ if success not in range(1, 4):
+ raise ValueError("No line for this dots")
+ xx = numpy.linspace(x.min(), x.max(), 50)
+ print tplFinal
+ # print x, ErrorFunc(tplFinal, x, y)
+ return xx, funcLine(tplFinal, xx)
+
+
+def gen_app_plot(key, val, plot, color):
+ """ Plots with fake line and real dots around"""
+ ox, oy = gen_dots(val)
+ name = "_".join(str(k) for k in key)
+ if len(ox) < 2:
+ # skip single dots
+ return False
+ # create approximation
+ x = numpy.array(ox)#numpy.log(ox))
+ y = numpy.array(oy)#numpy.log(oy))
+ print x, y
+ try:
+ print name
+ x1, y1 = gen_line_scipy(x, y)
+ plot.plot(x1, y1, color=color)
+ #
+ #plot.loglog(x1, y1, color=color)
+ except ValueError:
+ # just don't draw it - it's ok
+ # we'll see no appr and bad dots
+ # not return False, because we need see dots
+ pass
+ plot.plot(x, y, '^', label=name, markersize=7, color=color)
+ #plot.loglog(x, y, '^', label=name, markersize=7, color=color)
+ return True
+
+
+def save_plot(key, val):
+ """ one plot from one dict item with value list"""
+ ox, oy = gen_dots(val)
+ name = "_".join(str(k) for k in key)
+ plt.plot(ox, oy, label=name)
+
+
+def plot_generation(fname, group_by):
+ """ plots for value group_by in imgs by actions"""
+ data = list(io_test.load_io_py_file(fname))
+ item = io_test.Data("hdr")
+ for key, vals in io_test.groupby_globally(data, io_test.key_func).items():
+ item.series[key] = [val['iops'] for val in vals]
+ io_test.process_inplace(item)
+
+ pr_data = generate_groups(item.processed_series, group_by)
+ print pr_data
+
+ #fig = plt.figure()
+ plot = plt.subplot(111)
+
+ for action in actions:
+ for tp in types:
+ color = 0
+ hasPlot = False
+ for key, val in pr_data.items():
+ if action in key and tp in key:
+ ok = gen_app_plot(key, val, plot, colors[color])
+ hasPlot = hasPlot or ok
+ color += 1
+ # use it for just connect dots
+ #save_plot(key, val)
+ if hasPlot:
+ # Shrink current axis by 10%
+ box = plot.get_position()
+ plot.set_position([box.x0, box.y0 + box.height * 0.1,
+ box.width, box.height * 0.9])
+
+ # Put a legend to the bottom
+ plot.legend(loc='lower center', bbox_to_anchor=(0.5, -0.25),
+ fancybox=True, shadow=True, ncol=4,
+ fontsize='xx-small')
+ plt.title("Plot for %s on %s" % (group_by, action))
+ plt.ylabel("time")
+ plt.xlabel(group_by)
+ plt.grid()
+ # use it if want scale plot somehow
+ # plt.axis([0.0, 5000.0, 0.0, 64.0])
+ name = "%s__%s_%s.png" % (group_by, action, tp)
+ plt.savefig(name, format='png', dpi=100)
+ plt.clf()
+ plot = plt.subplot(111)
+ color = 0
+
+
+def deviation_on_deviation(groups_list, data):
+ """ calc deviation of data all and by selection groups"""
+ total_dev = io_test.round_deviation(io_test.med_dev(data))
+ grouped_dev = [total_dev]
+ for group in groups_list:
+ beg = 0
+ end = group
+ local_dev = []
+ while end <= len(data):
+ local_dev.append(io_test.round_deviation(io_test.med_dev(data[beg:end]))[0])
+ beg += group
+ end += group
+ grouped_dev.append(io_test.round_deviation(io_test.med_dev(local_dev)))
+ return grouped_dev
+
+
+
+def deviation_generation(fname, groups_list):
+ """ Print deviation by groups for data from fname """
+ CONC_POS = key_pos['concurence']
+ int_list = [int(i) for i in groups_list]
+ data = list(io_test.load_io_py_file(fname))
+ item = io_test.Data("hdr")
+ for key, vals in io_test.groupby_globally(data, io_test.key_func).items():
+ item.series[key] = [val['iops'] * key[CONC_POS] for val in vals]
+ print deviation_on_deviation(int_list, item.series[key])
+
+
+def main(argv):
+ if argv[1] == "plot":
+ plot_generation(argv[2], argv[3])
+ elif argv[1] == "dev":
+ deviation_generation(argv[2], argv[3:])
+
+
+if __name__ == "__main__":
+ exit(main(sys.argv))
+
+
+
+