blob: 22637884cb421d1fce05b789db343d473a25d7b1 [file] [log] [blame]
koder aka kdanilov6c491062015-04-09 22:33:13 +03001import math
2import itertools
koder aka kdanilov7f59d562016-12-26 01:34:23 +02003import statistics
4from typing import Union, List, TypeVar, Callable, Iterable, Tuple, Any, cast, Dict
koder aka kdanilovcff7b2e2015-04-18 20:48:15 +03005
koder aka kdanilov7f59d562016-12-26 01:34:23 +02006import numpy
7from scipy import stats, optimize
8from numpy import linalg
9from numpy.polynomial.chebyshev import chebfit, chebval
koder aka kdanilov6c491062015-04-09 22:33:13 +030010
11
koder aka kdanilov7f59d562016-12-26 01:34:23 +020012from .result_classes import IStorable
koder aka kdanilovbb6d6cd2015-06-20 02:55:07 +030013
14
koder aka kdanilov7f59d562016-12-26 01:34:23 +020015Number = Union[int, float]
16TNumber = TypeVar('TNumber', int, float)
koder aka kdanilov6c491062015-04-09 22:33:13 +030017
18
koder aka kdanilov7f59d562016-12-26 01:34:23 +020019DOUBLE_DELTA = 1e-8
koder aka kdanilove87ae652015-04-20 02:14:35 +030020
21
koder aka kdanilov7f59d562016-12-26 01:34:23 +020022average = statistics.mean
23dev = statistics.variance
koder aka kdanilov6c491062015-04-09 22:33:13 +030024
25
koder aka kdanilov7f59d562016-12-26 01:34:23 +020026class StatProps(IStorable):
27 "Statistic properties for timeserie"
28
29 yaml_tag = 'stat'
30
31 def __init__(self, data: List[Number]) -> None:
32 self.average = None # type: float
33 self.deviation = None # type: float
34 self.confidence = None # type: float
35 self.confidence_level = None # type: float
36
37 self.perc_99 = None # type: float
38 self.perc_95 = None # type: float
39 self.perc_90 = None # type: float
40 self.perc_50 = None # type: float
41
42 self.min = None # type: Number
43 self.max = None # type: Number
44
45 # bin_center: bin_count
46 self.histo = None # type: Tuple[List[int], List[float]]
47 self.data = data
48
49 self.normtest = None # type: Any
50
51 def __str__(self) -> str:
52 res = ["StatProps(num_el={}):".format(len(self.data)),
53 " distr = {0.average} ~ {0.deviation}".format(self),
54 " confidence({0.confidence_level}) = {0.confidence}".format(self),
55 " perc50={0.perc50}".format(self),
56 " perc90={0.perc90}".format(self),
57 " perc95={0.perc95}".format(self),
58 " perc95={0.perc99}".format(self),
59 " range {0.min} {0.max}".format(self),
60 " nurmtest = {0.nortest}".format(self)]
61 return "\n".join(res)
62
63 def __repr__(self) -> str:
64 return str(self)
65
66 def raw(self) -> Dict[str, Any]:
67 return self.__dict__.copy()
68
69 @classmethod
70 def fromraw(cls, data: Dict[str, Any]) -> 'StatProps':
71 res = cls.__new__(cls)
72 res.__dict__.update(data)
73 return res
74
75
76def greater_digit_pos(val: Number) -> int:
77 return int(math.floor(math.log10(val))) + 1
78
79
80def round_digits(val: TNumber, num_digits: int = 3) -> TNumber:
81 pow = 10 ** (greater_digit_pos(val) - num_digits)
82 return type(val)(int(val / pow) * pow)
83
84
85def calc_stat_props(data: List[Number], confidence: float = 0.95) -> StatProps:
86 "Calculate statistical properties of array of numbers"
87
88 res = StatProps(data)
89
90 if len(data) == 0:
91 raise ValueError("Input array is empty")
92
93 data = sorted(data)
94 res.average = average(data)
95 res.deviation = dev(data)
96 res.max = data[-1]
97 res.min = data[0]
98
99 res.perc_50 = numpy.percentile(data, 50)
100 res.perc_90 = numpy.percentile(data, 90)
101 res.perc_95 = numpy.percentile(data, 95)
102 res.perc_99 = numpy.percentile(data, 99)
103
104 if len(data) >= 3:
105 res.confidence = stats.sem(data) * \
106 stats.t.ppf((1 + confidence) / 2, len(data) - 1)
107 else:
108 res.confidence = None
109
110 res.histo = numpy.histogram(data, 'auto')
111 res.normtest = stats.mstats.normaltest(data)
112 return res
113
114
115def groupby_globally(data: Iterable, key_func: Callable):
116 grouped = {} # type: ignore
koder aka kdanilov6c491062015-04-09 22:33:13 +0300117 grouped_iter = itertools.groupby(data, key_func)
118
119 for (bs, cache_tp, act, conc), curr_data_it in grouped_iter:
120 key = (bs, cache_tp, act, conc)
121 grouped.setdefault(key, []).extend(curr_data_it)
122
123 return grouped
124
125
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200126def approximate_curve(x: List[Number], y: List[float], xnew: List[Number], curved_coef: int) -> List[float]:
koder aka kdanilov6c491062015-04-09 22:33:13 +0300127 """returns ynew - y values of some curve approximation"""
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200128 return cast(List[float], chebval(xnew, chebfit(x, y, curved_coef)))
koder aka kdanilov6c491062015-04-09 22:33:13 +0300129
130
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200131def approximate_line(x: List[Number], y: List[float], xnew: List[Number], relative_dist: bool = False) -> List[float]:
132 """
133 x, y - test data, xnew - dots, where we want find approximation
134 if not relative_dist distance = y - newy
135 returns ynew - y values of linear approximation
136 """
137 ox = numpy.array(x)
138 oy = numpy.array(y)
koder aka kdanilov66839a92015-04-11 13:22:31 +0300139
Ved-vampir03166442015-04-10 17:28:23 +0300140 # set approximation function
koder aka kdanilov66839a92015-04-11 13:22:31 +0300141 def func_line(tpl, x):
142 return tpl[0] * x + tpl[1]
143
144 def error_func_rel(tpl, x, y):
145 return 1.0 - y / func_line(tpl, x)
146
147 def error_func_abs(tpl, x, y):
148 return y - func_line(tpl, x)
149
Ved-vampir03166442015-04-10 17:28:23 +0300150 # choose distance mode
koder aka kdanilov66839a92015-04-11 13:22:31 +0300151 error_func = error_func_rel if relative_dist else error_func_abs
152
153 tpl_initial = tuple(linalg.solve([[ox[0], 1.0], [ox[1], 1.0]],
154 oy[:2]))
155
Ved-vampir03166442015-04-10 17:28:23 +0300156 # find line
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200157 tpl_final, success = optimize.leastsq(error_func, tpl_initial[:], args=(ox, oy))
koder aka kdanilov66839a92015-04-11 13:22:31 +0300158
Ved-vampir03166442015-04-10 17:28:23 +0300159 # if error
160 if success not in range(1, 5):
161 raise ValueError("No line for this dots")
koder aka kdanilov66839a92015-04-11 13:22:31 +0300162
Ved-vampir03166442015-04-10 17:28:23 +0300163 # return new dots
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200164 return func_line(tpl_final, numpy.array(xnew))
koder aka kdanilov6c491062015-04-09 22:33:13 +0300165
166
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200167# TODO: revise next
168# def difference(y, ynew):
169# """returns average and maximum relative and
170# absolute differences between y and ynew
171# result may contain None values for y = 0
172# return value - tuple:
173# [(abs dif, rel dif) * len(y)],
174# (abs average, abs max),
175# (rel average, rel max)"""
176#
177# abs_dlist = []
178# rel_dlist = []
179#
180# for y1, y2 in zip(y, ynew):
181# # absolute
182# abs_dlist.append(y1 - y2)
183#
184# if y1 > 1E-6:
185# rel_dlist.append(abs(abs_dlist[-1] / y1))
186# else:
187# raise ZeroDivisionError("{0!r} is too small".format(y1))
188#
189# da_avg = sum(abs_dlist) / len(abs_dlist)
190# dr_avg = sum(rel_dlist) / len(rel_dlist)
191#
192# return (zip(abs_dlist, rel_dlist),
193# (da_avg, max(abs_dlist)), (dr_avg, max(rel_dlist))
194# )