add postprocessing, table difference generation and scipy approximation

commit: 031664451570f2e32eb70c58b1d592435e275fff [log] [tgz]
author: Ved-vampir <akiselyova@mirantis.com> Fri Apr 10 17:28:23 2015 +0300
committer: Ved-vampir <akiselyova@mirantis.com> Fri Apr 10 17:29:22 2015 +0300
tree: 3e1c376e05cf4cc8807b323dab4fd6c38ab00e56
parent: 59d80f70ce706cc27f5a0eeda505346698ec3f43 [diff] [blame]
diff --git a/statistic.py b/statistic.py
index a662901..7ecdaa0 100644
--- a/statistic.py
+++ b/statistic.py

@@ -1,6 +1,8 @@
 import math
 import itertools
+from numpy import array, linalg
 from numpy.polynomial.chebyshev import chebfit, chebval
+from scipy.optimize import leastsq
 
 
 def med_dev(vals):
@@ -39,12 +41,69 @@
 
 
 def approximate_line(x, y, xnew, relative_dist=False):
-    """returns ynew - y values of linear approximation"""
+    """ x, y - test data, xnew - dots, where we want find approximation
+        if not relative_dist distance = y - newy
+        returns ynew - y values of linear approximation"""
+    # convert to numpy.array (don't work without it)
+    ox = array(x)
+    oy = array(y)
+    # define function for initial value
+    def get_init(x, y):
+        """ create initial value for better work of leastsq """
+        A = [[x[i], 1.0] for i in range(0, 2)]
+        b = [y[i] for i in range(0, 2)]
+        return tuple(linalg.solve(A, b))
+    # set approximation function
+    funcLine = lambda tpl, x: tpl[0] * x + tpl[1]
+    # choose distance mode
+    if relative_dist:
+        ErrorFunc = lambda tpl, x, y: 1.0 - y/funcLine(tpl, x)
+    else:
+        ErrorFunc = lambda tpl, x, y: y - funcLine(tpl, x)
+    # choose initial value
+    tplInitial = get_init(ox, oy)
+    # find line
+    tplFinal, success = leastsq(ErrorFunc, tplInitial[:], args=(ox, oy))
+    # if error
+    if success not in range(1, 5):
+        raise ValueError("No line for this dots")
+    # return new dots
+    return funcLine(tplFinal, array(xnew))
 
 
 def difference(y, ynew):
     """returns average and maximum relative and
-       absolute differences between y and ynew"""
+       absolute differences between y and ynew
+       result may contain None values for y = 0
+       return value - tuple:
+       [(abs dif, rel dif) * len(y)],
+       (abs average, abs max),
+       (rel average, rel max)"""
+    da_sum = 0.0
+    dr_sum = 0.0
+    da_max = 0.0
+    dr_max = 0.0
+    dlist = []
+    for y1, y2 in zip(y, ynew):
+        # absolute
+        da = y1 - y2
+        da_sum += abs(da)
+        if abs(da) > da_max:
+            da_max = da
+        # relative
+        if y1 != 0:
+            dr = abs(da / y1)
+            dr_sum += dr
+            if dr > dr_max:
+                dr_max = dr
+        else:
+            dr = None
+        # add to list
+        dlist.append((da, dr))
+    da_sum /= len(y)
+    dr_sum /= len(y)
+    return dlist, (da_sum, da_max), (dr_sum, dr_max)
+
 
 
 def calculate_distribution_properties(data):
commit	031664451570f2e32eb70c58b1d592435e275fff	[log] [tgz]
author	Ved-vampir <akiselyova@mirantis.com>	Fri Apr 10 17:28:23 2015 +0300
committer	Ved-vampir <akiselyova@mirantis.com>	Fri Apr 10 17:29:22 2015 +0300
tree	3e1c376e05cf4cc8807b323dab4fd6c38ab00e56
parent	59d80f70ce706cc27f5a0eeda505346698ec3f43 [diff] [blame]