add postprocessing, table difference generation and scipy approximation
diff --git a/statistic.py b/statistic.py
index a662901..7ecdaa0 100644
--- a/statistic.py
+++ b/statistic.py
@@ -1,6 +1,8 @@
import math
import itertools
+from numpy import array, linalg
from numpy.polynomial.chebyshev import chebfit, chebval
+from scipy.optimize import leastsq
def med_dev(vals):
@@ -39,12 +41,69 @@
def approximate_line(x, y, xnew, relative_dist=False):
- """returns ynew - y values of linear approximation"""
+ """ x, y - test data, xnew - dots, where we want find approximation
+ if not relative_dist distance = y - newy
+ returns ynew - y values of linear approximation"""
+ # convert to numpy.array (don't work without it)
+ ox = array(x)
+ oy = array(y)
+ # define function for initial value
+ def get_init(x, y):
+ """ create initial value for better work of leastsq """
+ A = [[x[i], 1.0] for i in range(0, 2)]
+ b = [y[i] for i in range(0, 2)]
+ return tuple(linalg.solve(A, b))
+ # set approximation function
+ funcLine = lambda tpl, x: tpl[0] * x + tpl[1]
+ # choose distance mode
+ if relative_dist:
+ ErrorFunc = lambda tpl, x, y: 1.0 - y/funcLine(tpl, x)
+ else:
+ ErrorFunc = lambda tpl, x, y: y - funcLine(tpl, x)
+ # choose initial value
+ tplInitial = get_init(ox, oy)
+ # find line
+ tplFinal, success = leastsq(ErrorFunc, tplInitial[:], args=(ox, oy))
+ # if error
+ if success not in range(1, 5):
+ raise ValueError("No line for this dots")
+ # return new dots
+ return funcLine(tplFinal, array(xnew))
def difference(y, ynew):
"""returns average and maximum relative and
- absolute differences between y and ynew"""
+ absolute differences between y and ynew
+ result may contain None values for y = 0
+ return value - tuple:
+ [(abs dif, rel dif) * len(y)],
+ (abs average, abs max),
+ (rel average, rel max)"""
+ da_sum = 0.0
+ dr_sum = 0.0
+ da_max = 0.0
+ dr_max = 0.0
+ dlist = []
+ for y1, y2 in zip(y, ynew):
+ # absolute
+ da = y1 - y2
+ da_sum += abs(da)
+ if abs(da) > da_max:
+ da_max = da
+ # relative
+ if y1 != 0:
+ dr = abs(da / y1)
+ dr_sum += dr
+ if dr > dr_max:
+ dr_max = dr
+ else:
+ dr = None
+ # add to list
+ dlist.append((da, dr))
+ da_sum /= len(y)
+ dr_sum /= len(y)
+ return dlist, (da_sum, da_max), (dr_sum, dr_max)
+
def calculate_distribution_properties(data):