add postprocessing, table difference generation and scipy approximation

commit: 031664451570f2e32eb70c58b1d592435e275fff [log] [tgz]
author: Ved-vampir <akiselyova@mirantis.com> Fri Apr 10 17:28:23 2015 +0300
committer: Ved-vampir <akiselyova@mirantis.com> Fri Apr 10 17:29:22 2015 +0300
tree: 3e1c376e05cf4cc8807b323dab4fd6c38ab00e56
parent: 59d80f70ce706cc27f5a0eeda505346698ec3f43 [diff]
diff --git a/assumptions_check.py b/assumptions_check.py
index e17586e..05f9d0e 100644
--- a/assumptions_check.py
+++ b/assumptions_check.py

@@ -1,13 +1,15 @@
 import sys
 
 import numpy as np
+import texttable as TT
 import matplotlib.pyplot as plt
 from numpy.polynomial.chebyshev import chebfit, chebval
 
-from disk_perf_test_tool.tests.io_results_loader import load_data, filter_data
+from io_results_loader import load_data, filter_data
+from statistic import approximate_line, difference, round_deviation
 
 
-def linearity_plot(plt, data, types):
+def linearity_plot(data, types, vals=None):
     fields = 'blocksize_b', 'iops_mediana', 'iops_stddev'
 
     names = {}
@@ -18,11 +20,20 @@
                 name = "{0} {1} {2}".format(*sq)
                 names["".join(word[0] for word in sq)] = name
 
+    colors = ['red', 'green', 'blue', 'cyan',
+              'magenta', 'black', 'yellow', 'burlywood']
+    markers = ['*', '^', 'x', 'o', '+', '.']
+    color = 0
+    marker = 0
+
     for tp in types:
         filtered_data = filter_data('linearity_test_' + tp, fields)
         x = []
         y = []
         e = []
+        # values to make line
+        ax = []
+        ay = []
 
         for sz, med, dev in sorted(filtered_data(data)):
             iotime_ms = 1000. // med
@@ -31,11 +42,76 @@
             x.append(sz / 1024)
             y.append(iotime_ms)
             e.append(iotime_max - iotime_ms)
+            if vals is None or sz in vals:
+                ax.append(sz / 1024)
+                ay.append(iotime_ms)
 
-        plt.errorbar(x, y, e, linestyle='None', marker=names[tp])
+        plt.errorbar(x, y, e, linestyle='None', label=names[tp],
+                     color=colors[color], ecolor="black",
+                     marker=markers[marker])
+        ynew = approximate_line(ax, ay, ax, True)
+        plt.plot(ax, ynew, color=colors[color])
+        color += 1
+        marker += 1
     plt.legend(loc=2)
 
 
+def linearity_table(data, types, vals):
+    """ create table by pyplot with diferences
+        between original and approximated
+        vals - values to make line"""
+    fields = 'blocksize_b', 'iops_mediana'
+    for tp in types:
+        filtered_data = filter_data('linearity_test_' + tp, fields)
+        # all values
+        x = []
+        y = []
+        # values to make line
+        ax = []
+        ay = []
+
+        for sz, med in sorted(filtered_data(data)):
+            iotime_ms = 1000. // med
+            x.append(sz / 1024.0)
+            y.append(iotime_ms)
+            if sz in vals:
+                ax.append(sz / 1024.0)
+                ay.append(iotime_ms)
+
+
+        ynew = approximate_line(ax, ay, x, True)
+
+        dif, _, _ = difference(y, ynew)
+        table_data = []
+        for i, d in zip(x, dif):
+            row = [i, round(d[0], 3), round(d[1], 3) * 100]
+            table_data.append(row)
+
+        tab = TT.Texttable()
+        tab.set_deco(tab.HEADER | tab.VLINES | tab.BORDER | tab.HLINES)
+
+        header = ["BlockSize, kB", "Absolute difference (ms)", "Relative difference (%)"]
+        tab.add_row(header)
+        tab.header = header
+
+        for row in table_data:
+            tab.add_row(row)
+
+        print tp
+        print tab.draw()
+
+        # uncomment to get table in pretty pictures :)
+        # colLabels = ("BlockSize, kB", "Absolute difference (ms)", "Relative difference (%)")
+        # fig = plt.figure()
+        # ax = fig.add_subplot(111)
+        # ax.axis('off')
+        # #do the table
+        # the_table = ax.table(cellText=table_data,
+        #           colLabels=colLabels,
+        #           loc='center')
+        # plt.savefig(tp+".png")
+
+
 def th_plot(data, tt):
     fields = 'concurence', 'iops_mediana', 'lat_mediana'
     conc_4k = filter_data('concurrence_test_' + tt, fields, blocksize='4k')
@@ -84,8 +160,10 @@
 
 def main(argv):
     data = list(load_data(open(argv[1]).read()))
-    # linearity_plot(data)
-    th_plot(data, 'rws')
+    linearity_table(data, ["rwd", "rws", "rrd"], [4096, 4096*1024])
+    # linearity_plot(data, ["rwd", "rws", "rrd"])#, [4096, 4096*1024])
+    # linearity_plot(data, ["rws", "rwd"])
+    # th_plot(data, 'rws')
     # th_plot(data, 'rrs')
     plt.show()
 

diff --git a/tests/io_results_loader.py b/io_results_loader.py
similarity index 93%
rename from tests/io_results_loader.py
rename to io_results_loader.py
index 2ccd9de..9c49e06 100644
--- a/tests/io_results_loader.py
+++ b/io_results_loader.py

@@ -2,8 +2,8 @@
 import json
 
 
-from disk_perf_test_tool.utils import ssize_to_b
-from disk_perf_test_tool.scripts.postprocessing import data_stat
+from utils import ssize_to_b
+import statistic as data_stat
 
 
 def parse_output(out_err):

diff --git a/statistic.py b/statistic.py
index a662901..7ecdaa0 100644
--- a/statistic.py
+++ b/statistic.py

@@ -1,6 +1,8 @@
 import math
 import itertools
+from numpy import array, linalg
 from numpy.polynomial.chebyshev import chebfit, chebval
+from scipy.optimize import leastsq
 
 
 def med_dev(vals):
@@ -39,12 +41,69 @@
 
 
 def approximate_line(x, y, xnew, relative_dist=False):
-    """returns ynew - y values of linear approximation"""
+    """ x, y - test data, xnew - dots, where we want find approximation
+        if not relative_dist distance = y - newy
+        returns ynew - y values of linear approximation"""
+    # convert to numpy.array (don't work without it)
+    ox = array(x)
+    oy = array(y)
+    # define function for initial value
+    def get_init(x, y):
+        """ create initial value for better work of leastsq """
+        A = [[x[i], 1.0] for i in range(0, 2)]
+        b = [y[i] for i in range(0, 2)]
+        return tuple(linalg.solve(A, b))
+    # set approximation function
+    funcLine = lambda tpl, x: tpl[0] * x + tpl[1]
+    # choose distance mode
+    if relative_dist:
+        ErrorFunc = lambda tpl, x, y: 1.0 - y/funcLine(tpl, x)
+    else:
+        ErrorFunc = lambda tpl, x, y: y - funcLine(tpl, x)
+    # choose initial value
+    tplInitial = get_init(ox, oy)
+    # find line
+    tplFinal, success = leastsq(ErrorFunc, tplInitial[:], args=(ox, oy))
+    # if error
+    if success not in range(1, 5):
+        raise ValueError("No line for this dots")
+    # return new dots
+    return funcLine(tplFinal, array(xnew))
 
 
 def difference(y, ynew):
     """returns average and maximum relative and
-       absolute differences between y and ynew"""
+       absolute differences between y and ynew
+       result may contain None values for y = 0
+       return value - tuple:
+       [(abs dif, rel dif) * len(y)],
+       (abs average, abs max),
+       (rel average, rel max)"""
+    da_sum = 0.0
+    dr_sum = 0.0
+    da_max = 0.0
+    dr_max = 0.0
+    dlist = []
+    for y1, y2 in zip(y, ynew):
+        # absolute
+        da = y1 - y2
+        da_sum += abs(da)
+        if abs(da) > da_max:
+            da_max = da
+        # relative
+        if y1 != 0:
+            dr = abs(da / y1)
+            dr_sum += dr
+            if dr > dr_max:
+                dr_max = dr
+        else:
+            dr = None
+        # add to list
+        dlist.append((da, dr))
+    da_sum /= len(y)
+    dr_sum /= len(y)
+    return dlist, (da_sum, da_max), (dr_sum, dr_max)
+
 
 
 def calculate_distribution_properties(data):
commit	031664451570f2e32eb70c58b1d592435e275fff	[log] [tgz]
author	Ved-vampir <akiselyova@mirantis.com>	Fri Apr 10 17:28:23 2015 +0300
committer	Ved-vampir <akiselyova@mirantis.com>	Fri Apr 10 17:29:22 2015 +0300
tree	3e1c376e05cf4cc8807b323dab4fd6c38ab00e56
parent	59d80f70ce706cc27f5a0eeda505346698ec3f43 [diff]