koder aka kdanilov | 6c49106 | 2015-04-09 22:33:13 +0300 | [diff] [blame] | 1 | import math |
| 2 | import itertools |
koder aka kdanilov | cff7b2e | 2015-04-18 20:48:15 +0300 | [diff] [blame] | 3 | |
| 4 | try: |
koder aka kdanilov | f86d7af | 2015-05-06 04:01:54 +0300 | [diff] [blame] | 5 | from scipy import stats |
koder aka kdanilov | cff7b2e | 2015-04-18 20:48:15 +0300 | [diff] [blame] | 6 | from numpy import array, linalg |
| 7 | from scipy.optimize import leastsq |
| 8 | from numpy.polynomial.chebyshev import chebfit, chebval |
koder aka kdanilov | f86d7af | 2015-05-06 04:01:54 +0300 | [diff] [blame] | 9 | no_numpy = False |
koder aka kdanilov | cff7b2e | 2015-04-18 20:48:15 +0300 | [diff] [blame] | 10 | except ImportError: |
| 11 | no_numpy = True |
koder aka kdanilov | 6c49106 | 2015-04-09 22:33:13 +0300 | [diff] [blame] | 12 | |
| 13 | |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 14 | def average(data): |
| 15 | return sum(data) / len(data) |
| 16 | |
| 17 | |
koder aka kdanilov | 6c49106 | 2015-04-09 22:33:13 +0300 | [diff] [blame] | 18 | def med_dev(vals): |
koder aka kdanilov | b6be5c5 | 2016-10-01 01:29:35 +0300 | [diff] [blame^] | 19 | if len(vals) == 1: |
| 20 | return vals[0], 0.0 |
| 21 | |
koder aka kdanilov | 6c49106 | 2015-04-09 22:33:13 +0300 | [diff] [blame] | 22 | med = sum(vals) / len(vals) |
koder aka kdanilov | b6be5c5 | 2016-10-01 01:29:35 +0300 | [diff] [blame^] | 23 | dev = ((sum(abs(med - i) ** 2.0 for i in vals) / (len(vals) - 1)) ** 0.5) |
koder aka kdanilov | 6c49106 | 2015-04-09 22:33:13 +0300 | [diff] [blame] | 24 | return med, dev |
| 25 | |
| 26 | |
koder aka kdanilov | e87ae65 | 2015-04-20 02:14:35 +0300 | [diff] [blame] | 27 | def round_3_digit(val): |
| 28 | return round_deviation((val, val / 10.0))[0] |
| 29 | |
| 30 | |
koder aka kdanilov | 6c49106 | 2015-04-09 22:33:13 +0300 | [diff] [blame] | 31 | def round_deviation(med_dev): |
| 32 | med, dev = med_dev |
| 33 | |
| 34 | if dev < 1E-7: |
| 35 | return med_dev |
| 36 | |
| 37 | dev_div = 10.0 ** (math.floor(math.log10(dev)) - 1) |
| 38 | dev = int(dev / dev_div) * dev_div |
| 39 | med = int(med / dev_div) * dev_div |
koder aka kdanilov | 7e0f7cf | 2015-05-01 17:24:35 +0300 | [diff] [blame] | 40 | return [type(med_dev[0])(med), |
| 41 | type(med_dev[1])(dev)] |
koder aka kdanilov | 6c49106 | 2015-04-09 22:33:13 +0300 | [diff] [blame] | 42 | |
| 43 | |
| 44 | def groupby_globally(data, key_func): |
| 45 | grouped = {} |
| 46 | grouped_iter = itertools.groupby(data, key_func) |
| 47 | |
| 48 | for (bs, cache_tp, act, conc), curr_data_it in grouped_iter: |
| 49 | key = (bs, cache_tp, act, conc) |
| 50 | grouped.setdefault(key, []).extend(curr_data_it) |
| 51 | |
| 52 | return grouped |
| 53 | |
| 54 | |
| 55 | def approximate_curve(x, y, xnew, curved_coef): |
| 56 | """returns ynew - y values of some curve approximation""" |
koder aka kdanilov | cff7b2e | 2015-04-18 20:48:15 +0300 | [diff] [blame] | 57 | if no_numpy: |
| 58 | return None |
| 59 | |
koder aka kdanilov | 6c49106 | 2015-04-09 22:33:13 +0300 | [diff] [blame] | 60 | return chebval(xnew, chebfit(x, y, curved_coef)) |
| 61 | |
| 62 | |
| 63 | def approximate_line(x, y, xnew, relative_dist=False): |
Ved-vampir | 0316644 | 2015-04-10 17:28:23 +0300 | [diff] [blame] | 64 | """ x, y - test data, xnew - dots, where we want find approximation |
| 65 | if not relative_dist distance = y - newy |
| 66 | returns ynew - y values of linear approximation""" |
koder aka kdanilov | 66839a9 | 2015-04-11 13:22:31 +0300 | [diff] [blame] | 67 | |
koder aka kdanilov | cff7b2e | 2015-04-18 20:48:15 +0300 | [diff] [blame] | 68 | if no_numpy: |
| 69 | return None |
| 70 | |
Ved-vampir | 0316644 | 2015-04-10 17:28:23 +0300 | [diff] [blame] | 71 | # convert to numpy.array (don't work without it) |
| 72 | ox = array(x) |
| 73 | oy = array(y) |
koder aka kdanilov | 66839a9 | 2015-04-11 13:22:31 +0300 | [diff] [blame] | 74 | |
Ved-vampir | 0316644 | 2015-04-10 17:28:23 +0300 | [diff] [blame] | 75 | # set approximation function |
koder aka kdanilov | 66839a9 | 2015-04-11 13:22:31 +0300 | [diff] [blame] | 76 | def func_line(tpl, x): |
| 77 | return tpl[0] * x + tpl[1] |
| 78 | |
| 79 | def error_func_rel(tpl, x, y): |
| 80 | return 1.0 - y / func_line(tpl, x) |
| 81 | |
| 82 | def error_func_abs(tpl, x, y): |
| 83 | return y - func_line(tpl, x) |
| 84 | |
Ved-vampir | 0316644 | 2015-04-10 17:28:23 +0300 | [diff] [blame] | 85 | # choose distance mode |
koder aka kdanilov | 66839a9 | 2015-04-11 13:22:31 +0300 | [diff] [blame] | 86 | error_func = error_func_rel if relative_dist else error_func_abs |
| 87 | |
| 88 | tpl_initial = tuple(linalg.solve([[ox[0], 1.0], [ox[1], 1.0]], |
| 89 | oy[:2])) |
| 90 | |
Ved-vampir | 0316644 | 2015-04-10 17:28:23 +0300 | [diff] [blame] | 91 | # find line |
koder aka kdanilov | 66839a9 | 2015-04-11 13:22:31 +0300 | [diff] [blame] | 92 | tpl_final, success = leastsq(error_func, |
| 93 | tpl_initial[:], |
| 94 | args=(ox, oy)) |
| 95 | |
Ved-vampir | 0316644 | 2015-04-10 17:28:23 +0300 | [diff] [blame] | 96 | # if error |
| 97 | if success not in range(1, 5): |
| 98 | raise ValueError("No line for this dots") |
koder aka kdanilov | 66839a9 | 2015-04-11 13:22:31 +0300 | [diff] [blame] | 99 | |
Ved-vampir | 0316644 | 2015-04-10 17:28:23 +0300 | [diff] [blame] | 100 | # return new dots |
koder aka kdanilov | 66839a9 | 2015-04-11 13:22:31 +0300 | [diff] [blame] | 101 | return func_line(tpl_final, array(xnew)) |
koder aka kdanilov | 6c49106 | 2015-04-09 22:33:13 +0300 | [diff] [blame] | 102 | |
| 103 | |
| 104 | def difference(y, ynew): |
| 105 | """returns average and maximum relative and |
Ved-vampir | 0316644 | 2015-04-10 17:28:23 +0300 | [diff] [blame] | 106 | absolute differences between y and ynew |
| 107 | result may contain None values for y = 0 |
| 108 | return value - tuple: |
| 109 | [(abs dif, rel dif) * len(y)], |
| 110 | (abs average, abs max), |
| 111 | (rel average, rel max)""" |
koder aka kdanilov | 66839a9 | 2015-04-11 13:22:31 +0300 | [diff] [blame] | 112 | |
| 113 | abs_dlist = [] |
| 114 | rel_dlist = [] |
| 115 | |
Ved-vampir | 0316644 | 2015-04-10 17:28:23 +0300 | [diff] [blame] | 116 | for y1, y2 in zip(y, ynew): |
| 117 | # absolute |
koder aka kdanilov | 66839a9 | 2015-04-11 13:22:31 +0300 | [diff] [blame] | 118 | abs_dlist.append(y1 - y2) |
Ved-vampir | 0316644 | 2015-04-10 17:28:23 +0300 | [diff] [blame] | 119 | |
koder aka kdanilov | 66839a9 | 2015-04-11 13:22:31 +0300 | [diff] [blame] | 120 | if y1 > 1E-6: |
| 121 | rel_dlist.append(abs(abs_dlist[-1] / y1)) |
| 122 | else: |
| 123 | raise ZeroDivisionError("{0!r} is too small".format(y1)) |
| 124 | |
| 125 | da_avg = sum(abs_dlist) / len(abs_dlist) |
| 126 | dr_avg = sum(rel_dlist) / len(rel_dlist) |
| 127 | |
| 128 | return (zip(abs_dlist, rel_dlist), |
| 129 | (da_avg, max(abs_dlist)), (dr_avg, max(rel_dlist)) |
| 130 | ) |
koder aka kdanilov | 6c49106 | 2015-04-09 22:33:13 +0300 | [diff] [blame] | 131 | |
| 132 | |
| 133 | def calculate_distribution_properties(data): |
| 134 | """chi, etc""" |
| 135 | |
| 136 | |
koder aka kdanilov | 3405201 | 2015-08-27 18:32:11 +0300 | [diff] [blame] | 137 | def minimal_measurement_count(data, max_diff, req_probability): |
koder aka kdanilov | 6c49106 | 2015-04-09 22:33:13 +0300 | [diff] [blame] | 138 | """ |
| 139 | should returns amount of measurements to get results (avg and deviation) |
| 140 | with error less, that max_diff in at least req_probability% cases |
| 141 | """ |
koder aka kdanilov | f86d7af | 2015-05-06 04:01:54 +0300 | [diff] [blame] | 142 | |
| 143 | |
| 144 | class StatProps(object): |
| 145 | def __init__(self): |
| 146 | self.average = None |
| 147 | self.mediana = None |
| 148 | self.perc_95 = None |
| 149 | self.perc_5 = None |
| 150 | self.deviation = None |
| 151 | self.confidence = None |
| 152 | self.min = None |
| 153 | self.max = None |
koder aka kdanilov | 4af1c1d | 2015-05-18 15:48:58 +0300 | [diff] [blame] | 154 | self.raw = None |
koder aka kdanilov | f86d7af | 2015-05-06 04:01:54 +0300 | [diff] [blame] | 155 | |
| 156 | def rounded_average_conf(self): |
| 157 | return round_deviation((self.average, self.confidence)) |
| 158 | |
koder aka kdanilov | 416b87a | 2015-05-12 00:26:04 +0300 | [diff] [blame] | 159 | def rounded_average_dev(self): |
| 160 | return round_deviation((self.average, self.deviation)) |
| 161 | |
| 162 | def __str__(self): |
| 163 | return "StatProps({0} ~ {1})".format(round_3_digit(self.average), |
| 164 | round_3_digit(self.deviation)) |
| 165 | |
| 166 | def __repr__(self): |
| 167 | return str(self) |
| 168 | |
koder aka kdanilov | f86d7af | 2015-05-06 04:01:54 +0300 | [diff] [blame] | 169 | |
| 170 | def data_property(data, confidence=0.95): |
| 171 | res = StatProps() |
| 172 | if len(data) == 0: |
| 173 | return res |
| 174 | |
| 175 | data = sorted(data) |
| 176 | res.average, res.deviation = med_dev(data) |
| 177 | res.max = data[-1] |
| 178 | res.min = data[0] |
| 179 | |
| 180 | ln = len(data) |
| 181 | if ln % 2 == 0: |
| 182 | res.mediana = (data[ln / 2] + data[ln / 2 - 1]) / 2 |
| 183 | else: |
| 184 | res.mediana = data[ln / 2] |
| 185 | |
| 186 | res.perc_95 = data[int((ln - 1) * 0.95)] |
| 187 | res.perc_5 = data[int((ln - 1) * 0.05)] |
| 188 | |
| 189 | if not no_numpy and ln >= 3: |
| 190 | res.confidence = stats.sem(data) * \ |
| 191 | stats.t.ppf((1 + confidence) / 2, ln - 1) |
| 192 | else: |
| 193 | res.confidence = res.deviation |
| 194 | |
koder aka kdanilov | 4af1c1d | 2015-05-18 15:48:58 +0300 | [diff] [blame] | 195 | res.raw = data[:] |
koder aka kdanilov | f86d7af | 2015-05-06 04:01:54 +0300 | [diff] [blame] | 196 | return res |