| koder aka kdanilov | 6c49106 | 2015-04-09 22:33:13 +0300 | [diff] [blame] | 1 | import math | 
|  | 2 | import itertools | 
| koder aka kdanilov | cff7b2e | 2015-04-18 20:48:15 +0300 | [diff] [blame] | 3 |  | 
|  | 4 | try: | 
| koder aka kdanilov | f86d7af | 2015-05-06 04:01:54 +0300 | [diff] [blame] | 5 | from scipy import stats | 
| koder aka kdanilov | cff7b2e | 2015-04-18 20:48:15 +0300 | [diff] [blame] | 6 | from numpy import array, linalg | 
|  | 7 | from scipy.optimize import leastsq | 
|  | 8 | from numpy.polynomial.chebyshev import chebfit, chebval | 
| koder aka kdanilov | f86d7af | 2015-05-06 04:01:54 +0300 | [diff] [blame] | 9 | no_numpy = False | 
| koder aka kdanilov | cff7b2e | 2015-04-18 20:48:15 +0300 | [diff] [blame] | 10 | except ImportError: | 
|  | 11 | no_numpy = True | 
| koder aka kdanilov | 6c49106 | 2015-04-09 22:33:13 +0300 | [diff] [blame] | 12 |  | 
|  | 13 |  | 
|  | 14 | def med_dev(vals): | 
|  | 15 | med = sum(vals) / len(vals) | 
|  | 16 | dev = ((sum(abs(med - i) ** 2.0 for i in vals) / len(vals)) ** 0.5) | 
|  | 17 | return med, dev | 
|  | 18 |  | 
|  | 19 |  | 
| koder aka kdanilov | e87ae65 | 2015-04-20 02:14:35 +0300 | [diff] [blame] | 20 | def round_3_digit(val): | 
|  | 21 | return round_deviation((val, val / 10.0))[0] | 
|  | 22 |  | 
|  | 23 |  | 
| koder aka kdanilov | 6c49106 | 2015-04-09 22:33:13 +0300 | [diff] [blame] | 24 | def round_deviation(med_dev): | 
|  | 25 | med, dev = med_dev | 
|  | 26 |  | 
|  | 27 | if dev < 1E-7: | 
|  | 28 | return med_dev | 
|  | 29 |  | 
|  | 30 | dev_div = 10.0 ** (math.floor(math.log10(dev)) - 1) | 
|  | 31 | dev = int(dev / dev_div) * dev_div | 
|  | 32 | med = int(med / dev_div) * dev_div | 
| koder aka kdanilov | 7e0f7cf | 2015-05-01 17:24:35 +0300 | [diff] [blame] | 33 | return [type(med_dev[0])(med), | 
|  | 34 | type(med_dev[1])(dev)] | 
| koder aka kdanilov | 6c49106 | 2015-04-09 22:33:13 +0300 | [diff] [blame] | 35 |  | 
|  | 36 |  | 
|  | 37 | def groupby_globally(data, key_func): | 
|  | 38 | grouped = {} | 
|  | 39 | grouped_iter = itertools.groupby(data, key_func) | 
|  | 40 |  | 
|  | 41 | for (bs, cache_tp, act, conc), curr_data_it in grouped_iter: | 
|  | 42 | key = (bs, cache_tp, act, conc) | 
|  | 43 | grouped.setdefault(key, []).extend(curr_data_it) | 
|  | 44 |  | 
|  | 45 | return grouped | 
|  | 46 |  | 
|  | 47 |  | 
|  | 48 | def approximate_curve(x, y, xnew, curved_coef): | 
|  | 49 | """returns ynew - y values of some curve approximation""" | 
| koder aka kdanilov | cff7b2e | 2015-04-18 20:48:15 +0300 | [diff] [blame] | 50 | if no_numpy: | 
|  | 51 | return None | 
|  | 52 |  | 
| koder aka kdanilov | 6c49106 | 2015-04-09 22:33:13 +0300 | [diff] [blame] | 53 | return chebval(xnew, chebfit(x, y, curved_coef)) | 
|  | 54 |  | 
|  | 55 |  | 
|  | 56 | def approximate_line(x, y, xnew, relative_dist=False): | 
| Ved-vampir | 0316644 | 2015-04-10 17:28:23 +0300 | [diff] [blame] | 57 | """ x, y - test data, xnew - dots, where we want find approximation | 
|  | 58 | if not relative_dist distance = y - newy | 
|  | 59 | returns ynew - y values of linear approximation""" | 
| koder aka kdanilov | 66839a9 | 2015-04-11 13:22:31 +0300 | [diff] [blame] | 60 |  | 
| koder aka kdanilov | cff7b2e | 2015-04-18 20:48:15 +0300 | [diff] [blame] | 61 | if no_numpy: | 
|  | 62 | return None | 
|  | 63 |  | 
| Ved-vampir | 0316644 | 2015-04-10 17:28:23 +0300 | [diff] [blame] | 64 | # convert to numpy.array (don't work without it) | 
|  | 65 | ox = array(x) | 
|  | 66 | oy = array(y) | 
| koder aka kdanilov | 66839a9 | 2015-04-11 13:22:31 +0300 | [diff] [blame] | 67 |  | 
| Ved-vampir | 0316644 | 2015-04-10 17:28:23 +0300 | [diff] [blame] | 68 | # set approximation function | 
| koder aka kdanilov | 66839a9 | 2015-04-11 13:22:31 +0300 | [diff] [blame] | 69 | def func_line(tpl, x): | 
|  | 70 | return tpl[0] * x + tpl[1] | 
|  | 71 |  | 
|  | 72 | def error_func_rel(tpl, x, y): | 
|  | 73 | return 1.0 - y / func_line(tpl, x) | 
|  | 74 |  | 
|  | 75 | def error_func_abs(tpl, x, y): | 
|  | 76 | return y - func_line(tpl, x) | 
|  | 77 |  | 
| Ved-vampir | 0316644 | 2015-04-10 17:28:23 +0300 | [diff] [blame] | 78 | # choose distance mode | 
| koder aka kdanilov | 66839a9 | 2015-04-11 13:22:31 +0300 | [diff] [blame] | 79 | error_func = error_func_rel if relative_dist else error_func_abs | 
|  | 80 |  | 
|  | 81 | tpl_initial = tuple(linalg.solve([[ox[0], 1.0], [ox[1], 1.0]], | 
|  | 82 | oy[:2])) | 
|  | 83 |  | 
| Ved-vampir | 0316644 | 2015-04-10 17:28:23 +0300 | [diff] [blame] | 84 | # find line | 
| koder aka kdanilov | 66839a9 | 2015-04-11 13:22:31 +0300 | [diff] [blame] | 85 | tpl_final, success = leastsq(error_func, | 
|  | 86 | tpl_initial[:], | 
|  | 87 | args=(ox, oy)) | 
|  | 88 |  | 
| Ved-vampir | 0316644 | 2015-04-10 17:28:23 +0300 | [diff] [blame] | 89 | # if error | 
|  | 90 | if success not in range(1, 5): | 
|  | 91 | raise ValueError("No line for this dots") | 
| koder aka kdanilov | 66839a9 | 2015-04-11 13:22:31 +0300 | [diff] [blame] | 92 |  | 
| Ved-vampir | 0316644 | 2015-04-10 17:28:23 +0300 | [diff] [blame] | 93 | # return new dots | 
| koder aka kdanilov | 66839a9 | 2015-04-11 13:22:31 +0300 | [diff] [blame] | 94 | return func_line(tpl_final, array(xnew)) | 
| koder aka kdanilov | 6c49106 | 2015-04-09 22:33:13 +0300 | [diff] [blame] | 95 |  | 
|  | 96 |  | 
|  | 97 | def difference(y, ynew): | 
|  | 98 | """returns average and maximum relative and | 
| Ved-vampir | 0316644 | 2015-04-10 17:28:23 +0300 | [diff] [blame] | 99 | absolute differences between y and ynew | 
|  | 100 | result may contain None values for y = 0 | 
|  | 101 | return value - tuple: | 
|  | 102 | [(abs dif, rel dif) * len(y)], | 
|  | 103 | (abs average, abs max), | 
|  | 104 | (rel average, rel max)""" | 
| koder aka kdanilov | 66839a9 | 2015-04-11 13:22:31 +0300 | [diff] [blame] | 105 |  | 
|  | 106 | abs_dlist = [] | 
|  | 107 | rel_dlist = [] | 
|  | 108 |  | 
| Ved-vampir | 0316644 | 2015-04-10 17:28:23 +0300 | [diff] [blame] | 109 | for y1, y2 in zip(y, ynew): | 
|  | 110 | # absolute | 
| koder aka kdanilov | 66839a9 | 2015-04-11 13:22:31 +0300 | [diff] [blame] | 111 | abs_dlist.append(y1 - y2) | 
| Ved-vampir | 0316644 | 2015-04-10 17:28:23 +0300 | [diff] [blame] | 112 |  | 
| koder aka kdanilov | 66839a9 | 2015-04-11 13:22:31 +0300 | [diff] [blame] | 113 | if y1 > 1E-6: | 
|  | 114 | rel_dlist.append(abs(abs_dlist[-1] / y1)) | 
|  | 115 | else: | 
|  | 116 | raise ZeroDivisionError("{0!r} is too small".format(y1)) | 
|  | 117 |  | 
|  | 118 | da_avg = sum(abs_dlist) / len(abs_dlist) | 
|  | 119 | dr_avg = sum(rel_dlist) / len(rel_dlist) | 
|  | 120 |  | 
|  | 121 | return (zip(abs_dlist, rel_dlist), | 
|  | 122 | (da_avg, max(abs_dlist)), (dr_avg, max(rel_dlist)) | 
|  | 123 | ) | 
| koder aka kdanilov | 6c49106 | 2015-04-09 22:33:13 +0300 | [diff] [blame] | 124 |  | 
|  | 125 |  | 
|  | 126 | def calculate_distribution_properties(data): | 
|  | 127 | """chi, etc""" | 
|  | 128 |  | 
|  | 129 |  | 
|  | 130 | def minimal_measurement_amount(data, max_diff, req_probability): | 
|  | 131 | """ | 
|  | 132 | should returns amount of measurements to get results (avg and deviation) | 
|  | 133 | with error less, that max_diff in at least req_probability% cases | 
|  | 134 | """ | 
| koder aka kdanilov | f86d7af | 2015-05-06 04:01:54 +0300 | [diff] [blame] | 135 |  | 
|  | 136 |  | 
|  | 137 | class StatProps(object): | 
|  | 138 | def __init__(self): | 
|  | 139 | self.average = None | 
|  | 140 | self.mediana = None | 
|  | 141 | self.perc_95 = None | 
|  | 142 | self.perc_5 = None | 
|  | 143 | self.deviation = None | 
|  | 144 | self.confidence = None | 
|  | 145 | self.min = None | 
|  | 146 | self.max = None | 
| koder aka kdanilov | 4af1c1d | 2015-05-18 15:48:58 +0300 | [diff] [blame] | 147 | self.raw = None | 
| koder aka kdanilov | f86d7af | 2015-05-06 04:01:54 +0300 | [diff] [blame] | 148 |  | 
|  | 149 | def rounded_average_conf(self): | 
|  | 150 | return round_deviation((self.average, self.confidence)) | 
|  | 151 |  | 
| koder aka kdanilov | 416b87a | 2015-05-12 00:26:04 +0300 | [diff] [blame] | 152 | def rounded_average_dev(self): | 
|  | 153 | return round_deviation((self.average, self.deviation)) | 
|  | 154 |  | 
|  | 155 | def __str__(self): | 
|  | 156 | return "StatProps({0} ~ {1})".format(round_3_digit(self.average), | 
|  | 157 | round_3_digit(self.deviation)) | 
|  | 158 |  | 
|  | 159 | def __repr__(self): | 
|  | 160 | return str(self) | 
|  | 161 |  | 
| koder aka kdanilov | f86d7af | 2015-05-06 04:01:54 +0300 | [diff] [blame] | 162 |  | 
|  | 163 | def data_property(data, confidence=0.95): | 
|  | 164 | res = StatProps() | 
|  | 165 | if len(data) == 0: | 
|  | 166 | return res | 
|  | 167 |  | 
|  | 168 | data = sorted(data) | 
|  | 169 | res.average, res.deviation = med_dev(data) | 
|  | 170 | res.max = data[-1] | 
|  | 171 | res.min = data[0] | 
|  | 172 |  | 
|  | 173 | ln = len(data) | 
|  | 174 | if ln % 2 == 0: | 
|  | 175 | res.mediana = (data[ln / 2] + data[ln / 2 - 1]) / 2 | 
|  | 176 | else: | 
|  | 177 | res.mediana = data[ln / 2] | 
|  | 178 |  | 
|  | 179 | res.perc_95 = data[int((ln - 1) * 0.95)] | 
|  | 180 | res.perc_5 = data[int((ln - 1) * 0.05)] | 
|  | 181 |  | 
|  | 182 | if not no_numpy and ln >= 3: | 
|  | 183 | res.confidence = stats.sem(data) * \ | 
|  | 184 | stats.t.ppf((1 + confidence) / 2, ln - 1) | 
|  | 185 | else: | 
|  | 186 | res.confidence = res.deviation | 
|  | 187 |  | 
| koder aka kdanilov | 4af1c1d | 2015-05-18 15:48:58 +0300 | [diff] [blame] | 188 | res.raw = data[:] | 
| koder aka kdanilov | f86d7af | 2015-05-06 04:01:54 +0300 | [diff] [blame] | 189 | return res |