add postprocessing, table difference generation and scipy approximation
diff --git a/assumptions_check.py b/assumptions_check.py
index e17586e..05f9d0e 100644
--- a/assumptions_check.py
+++ b/assumptions_check.py
@@ -1,13 +1,15 @@
import sys
import numpy as np
+import texttable as TT
import matplotlib.pyplot as plt
from numpy.polynomial.chebyshev import chebfit, chebval
-from disk_perf_test_tool.tests.io_results_loader import load_data, filter_data
+from io_results_loader import load_data, filter_data
+from statistic import approximate_line, difference, round_deviation
-def linearity_plot(plt, data, types):
+def linearity_plot(data, types, vals=None):
fields = 'blocksize_b', 'iops_mediana', 'iops_stddev'
names = {}
@@ -18,11 +20,20 @@
name = "{0} {1} {2}".format(*sq)
names["".join(word[0] for word in sq)] = name
+ colors = ['red', 'green', 'blue', 'cyan',
+ 'magenta', 'black', 'yellow', 'burlywood']
+ markers = ['*', '^', 'x', 'o', '+', '.']
+ color = 0
+ marker = 0
+
for tp in types:
filtered_data = filter_data('linearity_test_' + tp, fields)
x = []
y = []
e = []
+ # values to make line
+ ax = []
+ ay = []
for sz, med, dev in sorted(filtered_data(data)):
iotime_ms = 1000. // med
@@ -31,11 +42,76 @@
x.append(sz / 1024)
y.append(iotime_ms)
e.append(iotime_max - iotime_ms)
+ if vals is None or sz in vals:
+ ax.append(sz / 1024)
+ ay.append(iotime_ms)
- plt.errorbar(x, y, e, linestyle='None', marker=names[tp])
+ plt.errorbar(x, y, e, linestyle='None', label=names[tp],
+ color=colors[color], ecolor="black",
+ marker=markers[marker])
+ ynew = approximate_line(ax, ay, ax, True)
+ plt.plot(ax, ynew, color=colors[color])
+ color += 1
+ marker += 1
plt.legend(loc=2)
+def linearity_table(data, types, vals):
+ """ create table by pyplot with diferences
+ between original and approximated
+ vals - values to make line"""
+ fields = 'blocksize_b', 'iops_mediana'
+ for tp in types:
+ filtered_data = filter_data('linearity_test_' + tp, fields)
+ # all values
+ x = []
+ y = []
+ # values to make line
+ ax = []
+ ay = []
+
+ for sz, med in sorted(filtered_data(data)):
+ iotime_ms = 1000. // med
+ x.append(sz / 1024.0)
+ y.append(iotime_ms)
+ if sz in vals:
+ ax.append(sz / 1024.0)
+ ay.append(iotime_ms)
+
+
+ ynew = approximate_line(ax, ay, x, True)
+
+ dif, _, _ = difference(y, ynew)
+ table_data = []
+ for i, d in zip(x, dif):
+ row = [i, round(d[0], 3), round(d[1], 3) * 100]
+ table_data.append(row)
+
+ tab = TT.Texttable()
+ tab.set_deco(tab.HEADER | tab.VLINES | tab.BORDER | tab.HLINES)
+
+ header = ["BlockSize, kB", "Absolute difference (ms)", "Relative difference (%)"]
+ tab.add_row(header)
+ tab.header = header
+
+ for row in table_data:
+ tab.add_row(row)
+
+ print tp
+ print tab.draw()
+
+ # uncomment to get table in pretty pictures :)
+ # colLabels = ("BlockSize, kB", "Absolute difference (ms)", "Relative difference (%)")
+ # fig = plt.figure()
+ # ax = fig.add_subplot(111)
+ # ax.axis('off')
+ # #do the table
+ # the_table = ax.table(cellText=table_data,
+ # colLabels=colLabels,
+ # loc='center')
+ # plt.savefig(tp+".png")
+
+
def th_plot(data, tt):
fields = 'concurence', 'iops_mediana', 'lat_mediana'
conc_4k = filter_data('concurrence_test_' + tt, fields, blocksize='4k')
@@ -84,8 +160,10 @@
def main(argv):
data = list(load_data(open(argv[1]).read()))
- # linearity_plot(data)
- th_plot(data, 'rws')
+ linearity_table(data, ["rwd", "rws", "rrd"], [4096, 4096*1024])
+ # linearity_plot(data, ["rwd", "rws", "rrd"])#, [4096, 4096*1024])
+ # linearity_plot(data, ["rws", "rwd"])
+ # th_plot(data, 'rws')
# th_plot(data, 'rrs')
plt.show()
diff --git a/tests/io_results_loader.py b/io_results_loader.py
similarity index 93%
rename from tests/io_results_loader.py
rename to io_results_loader.py
index 2ccd9de..9c49e06 100644
--- a/tests/io_results_loader.py
+++ b/io_results_loader.py
@@ -2,8 +2,8 @@
import json
-from disk_perf_test_tool.utils import ssize_to_b
-from disk_perf_test_tool.scripts.postprocessing import data_stat
+from utils import ssize_to_b
+import statistic as data_stat
def parse_output(out_err):
diff --git a/statistic.py b/statistic.py
index a662901..7ecdaa0 100644
--- a/statistic.py
+++ b/statistic.py
@@ -1,6 +1,8 @@
import math
import itertools
+from numpy import array, linalg
from numpy.polynomial.chebyshev import chebfit, chebval
+from scipy.optimize import leastsq
def med_dev(vals):
@@ -39,12 +41,69 @@
def approximate_line(x, y, xnew, relative_dist=False):
- """returns ynew - y values of linear approximation"""
+ """ x, y - test data, xnew - dots, where we want find approximation
+ if not relative_dist distance = y - newy
+ returns ynew - y values of linear approximation"""
+ # convert to numpy.array (don't work without it)
+ ox = array(x)
+ oy = array(y)
+ # define function for initial value
+ def get_init(x, y):
+ """ create initial value for better work of leastsq """
+ A = [[x[i], 1.0] for i in range(0, 2)]
+ b = [y[i] for i in range(0, 2)]
+ return tuple(linalg.solve(A, b))
+ # set approximation function
+ funcLine = lambda tpl, x: tpl[0] * x + tpl[1]
+ # choose distance mode
+ if relative_dist:
+ ErrorFunc = lambda tpl, x, y: 1.0 - y/funcLine(tpl, x)
+ else:
+ ErrorFunc = lambda tpl, x, y: y - funcLine(tpl, x)
+ # choose initial value
+ tplInitial = get_init(ox, oy)
+ # find line
+ tplFinal, success = leastsq(ErrorFunc, tplInitial[:], args=(ox, oy))
+ # if error
+ if success not in range(1, 5):
+ raise ValueError("No line for this dots")
+ # return new dots
+ return funcLine(tplFinal, array(xnew))
def difference(y, ynew):
"""returns average and maximum relative and
- absolute differences between y and ynew"""
+ absolute differences between y and ynew
+ result may contain None values for y = 0
+ return value - tuple:
+ [(abs dif, rel dif) * len(y)],
+ (abs average, abs max),
+ (rel average, rel max)"""
+ da_sum = 0.0
+ dr_sum = 0.0
+ da_max = 0.0
+ dr_max = 0.0
+ dlist = []
+ for y1, y2 in zip(y, ynew):
+ # absolute
+ da = y1 - y2
+ da_sum += abs(da)
+ if abs(da) > da_max:
+ da_max = da
+ # relative
+ if y1 != 0:
+ dr = abs(da / y1)
+ dr_sum += dr
+ if dr > dr_max:
+ dr_max = dr
+ else:
+ dr = None
+ # add to list
+ dlist.append((da, dr))
+ da_sum /= len(y)
+ dr_sum /= len(y)
+ return dlist, (da_sum, da_max), (dr_sum, dr_max)
+
def calculate_distribution_properties(data):