blob: e2021e1115570d68c930abb6f282294b38dc48d0 [file] [log] [blame]
koder aka kdanilov6c491062015-04-09 22:33:13 +03001import math
2import itertools
koder aka kdanilovcff7b2e2015-04-18 20:48:15 +03003
4try:
koder aka kdanilovf86d7af2015-05-06 04:01:54 +03005 from scipy import stats
koder aka kdanilovcff7b2e2015-04-18 20:48:15 +03006 from numpy import array, linalg
7 from scipy.optimize import leastsq
8 from numpy.polynomial.chebyshev import chebfit, chebval
koder aka kdanilovf86d7af2015-05-06 04:01:54 +03009 no_numpy = False
koder aka kdanilovcff7b2e2015-04-18 20:48:15 +030010except ImportError:
11 no_numpy = True
koder aka kdanilov6c491062015-04-09 22:33:13 +030012
13
koder aka kdanilovbb6d6cd2015-06-20 02:55:07 +030014def average(data):
15 return sum(data) / len(data)
16
17
koder aka kdanilov6c491062015-04-09 22:33:13 +030018def med_dev(vals):
koder aka kdanilovb6be5c52016-10-01 01:29:35 +030019 if len(vals) == 1:
20 return vals[0], 0.0
21
koder aka kdanilov6c491062015-04-09 22:33:13 +030022 med = sum(vals) / len(vals)
koder aka kdanilovb6be5c52016-10-01 01:29:35 +030023 dev = ((sum(abs(med - i) ** 2.0 for i in vals) / (len(vals) - 1)) ** 0.5)
koder aka kdanilov6c491062015-04-09 22:33:13 +030024 return med, dev
25
26
koder aka kdanilove87ae652015-04-20 02:14:35 +030027def round_3_digit(val):
28 return round_deviation((val, val / 10.0))[0]
29
30
koder aka kdanilov6c491062015-04-09 22:33:13 +030031def round_deviation(med_dev):
32 med, dev = med_dev
33
34 if dev < 1E-7:
35 return med_dev
36
37 dev_div = 10.0 ** (math.floor(math.log10(dev)) - 1)
38 dev = int(dev / dev_div) * dev_div
39 med = int(med / dev_div) * dev_div
koder aka kdanilov7e0f7cf2015-05-01 17:24:35 +030040 return [type(med_dev[0])(med),
41 type(med_dev[1])(dev)]
koder aka kdanilov6c491062015-04-09 22:33:13 +030042
43
44def groupby_globally(data, key_func):
45 grouped = {}
46 grouped_iter = itertools.groupby(data, key_func)
47
48 for (bs, cache_tp, act, conc), curr_data_it in grouped_iter:
49 key = (bs, cache_tp, act, conc)
50 grouped.setdefault(key, []).extend(curr_data_it)
51
52 return grouped
53
54
55def approximate_curve(x, y, xnew, curved_coef):
56 """returns ynew - y values of some curve approximation"""
koder aka kdanilovcff7b2e2015-04-18 20:48:15 +030057 if no_numpy:
58 return None
59
koder aka kdanilov6c491062015-04-09 22:33:13 +030060 return chebval(xnew, chebfit(x, y, curved_coef))
61
62
63def approximate_line(x, y, xnew, relative_dist=False):
Ved-vampir03166442015-04-10 17:28:23 +030064 """ x, y - test data, xnew - dots, where we want find approximation
65 if not relative_dist distance = y - newy
66 returns ynew - y values of linear approximation"""
koder aka kdanilov66839a92015-04-11 13:22:31 +030067
koder aka kdanilovcff7b2e2015-04-18 20:48:15 +030068 if no_numpy:
69 return None
70
Ved-vampir03166442015-04-10 17:28:23 +030071 # convert to numpy.array (don't work without it)
72 ox = array(x)
73 oy = array(y)
koder aka kdanilov66839a92015-04-11 13:22:31 +030074
Ved-vampir03166442015-04-10 17:28:23 +030075 # set approximation function
koder aka kdanilov66839a92015-04-11 13:22:31 +030076 def func_line(tpl, x):
77 return tpl[0] * x + tpl[1]
78
79 def error_func_rel(tpl, x, y):
80 return 1.0 - y / func_line(tpl, x)
81
82 def error_func_abs(tpl, x, y):
83 return y - func_line(tpl, x)
84
Ved-vampir03166442015-04-10 17:28:23 +030085 # choose distance mode
koder aka kdanilov66839a92015-04-11 13:22:31 +030086 error_func = error_func_rel if relative_dist else error_func_abs
87
88 tpl_initial = tuple(linalg.solve([[ox[0], 1.0], [ox[1], 1.0]],
89 oy[:2]))
90
Ved-vampir03166442015-04-10 17:28:23 +030091 # find line
koder aka kdanilov66839a92015-04-11 13:22:31 +030092 tpl_final, success = leastsq(error_func,
93 tpl_initial[:],
94 args=(ox, oy))
95
Ved-vampir03166442015-04-10 17:28:23 +030096 # if error
97 if success not in range(1, 5):
98 raise ValueError("No line for this dots")
koder aka kdanilov66839a92015-04-11 13:22:31 +030099
Ved-vampir03166442015-04-10 17:28:23 +0300100 # return new dots
koder aka kdanilov66839a92015-04-11 13:22:31 +0300101 return func_line(tpl_final, array(xnew))
koder aka kdanilov6c491062015-04-09 22:33:13 +0300102
103
104def difference(y, ynew):
105 """returns average and maximum relative and
Ved-vampir03166442015-04-10 17:28:23 +0300106 absolute differences between y and ynew
107 result may contain None values for y = 0
108 return value - tuple:
109 [(abs dif, rel dif) * len(y)],
110 (abs average, abs max),
111 (rel average, rel max)"""
koder aka kdanilov66839a92015-04-11 13:22:31 +0300112
113 abs_dlist = []
114 rel_dlist = []
115
Ved-vampir03166442015-04-10 17:28:23 +0300116 for y1, y2 in zip(y, ynew):
117 # absolute
koder aka kdanilov66839a92015-04-11 13:22:31 +0300118 abs_dlist.append(y1 - y2)
Ved-vampir03166442015-04-10 17:28:23 +0300119
koder aka kdanilov66839a92015-04-11 13:22:31 +0300120 if y1 > 1E-6:
121 rel_dlist.append(abs(abs_dlist[-1] / y1))
122 else:
123 raise ZeroDivisionError("{0!r} is too small".format(y1))
124
125 da_avg = sum(abs_dlist) / len(abs_dlist)
126 dr_avg = sum(rel_dlist) / len(rel_dlist)
127
128 return (zip(abs_dlist, rel_dlist),
129 (da_avg, max(abs_dlist)), (dr_avg, max(rel_dlist))
130 )
koder aka kdanilov6c491062015-04-09 22:33:13 +0300131
132
133def calculate_distribution_properties(data):
134 """chi, etc"""
135
136
koder aka kdanilov34052012015-08-27 18:32:11 +0300137def minimal_measurement_count(data, max_diff, req_probability):
koder aka kdanilov6c491062015-04-09 22:33:13 +0300138 """
139 should returns amount of measurements to get results (avg and deviation)
140 with error less, that max_diff in at least req_probability% cases
141 """
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300142
143
144class StatProps(object):
145 def __init__(self):
146 self.average = None
147 self.mediana = None
148 self.perc_95 = None
149 self.perc_5 = None
150 self.deviation = None
151 self.confidence = None
152 self.min = None
153 self.max = None
koder aka kdanilov4af1c1d2015-05-18 15:48:58 +0300154 self.raw = None
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300155
156 def rounded_average_conf(self):
157 return round_deviation((self.average, self.confidence))
158
koder aka kdanilov416b87a2015-05-12 00:26:04 +0300159 def rounded_average_dev(self):
160 return round_deviation((self.average, self.deviation))
161
162 def __str__(self):
163 return "StatProps({0} ~ {1})".format(round_3_digit(self.average),
164 round_3_digit(self.deviation))
165
166 def __repr__(self):
167 return str(self)
168
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300169
170def data_property(data, confidence=0.95):
171 res = StatProps()
172 if len(data) == 0:
173 return res
174
175 data = sorted(data)
176 res.average, res.deviation = med_dev(data)
177 res.max = data[-1]
178 res.min = data[0]
179
180 ln = len(data)
181 if ln % 2 == 0:
182 res.mediana = (data[ln / 2] + data[ln / 2 - 1]) / 2
183 else:
184 res.mediana = data[ln / 2]
185
186 res.perc_95 = data[int((ln - 1) * 0.95)]
187 res.perc_5 = data[int((ln - 1) * 0.05)]
188
189 if not no_numpy and ln >= 3:
190 res.confidence = stats.sem(data) * \
191 stats.t.ppf((1 + confidence) / 2, ln - 1)
192 else:
193 res.confidence = res.deviation
194
koder aka kdanilov4af1c1d2015-05-18 15:48:58 +0300195 res.raw = data[:]
koder aka kdanilovf86d7af2015-05-06 04:01:54 +0300196 return res