Blame - wally/statistic.py - mcp/cvp-wally

blob: 0729283d35f0b0e771d4f2ddece08ff062ef4a82 [file] [log] [blame]

koder aka kdanilov	6c49106	2015-04-09 22:33:13 +0300	[diff] [blame]	1	import math
				2	import itertools
koder aka kdanilov	cff7b2e	2015-04-18 20:48:15 +0300	[diff] [blame^]	3
				4	try:
				5	from numpy import array, linalg
				6	from scipy.optimize import leastsq
				7	from numpy.polynomial.chebyshev import chebfit, chebval
				8	except ImportError:
				9	no_numpy = True
koder aka kdanilov	6c49106	2015-04-09 22:33:13 +0300	[diff] [blame]	10
				11
				12	def med_dev(vals):
				13	med = sum(vals) / len(vals)
				14	dev = ((sum(abs(med - i) 2.0 for i in vals) / len(vals)) 0.5)
				15	return med, dev
				16
				17
				18	def round_deviation(med_dev):
				19	med, dev = med_dev
				20
				21	if dev < 1E-7:
				22	return med_dev
				23
				24	dev_div = 10.0 ** (math.floor(math.log10(dev)) - 1)
				25	dev = int(dev / dev_div) * dev_div
				26	med = int(med / dev_div) * dev_div
				27	return (type(med_dev[0])(med),
				28	type(med_dev[1])(dev))
				29
				30
				31	def groupby_globally(data, key_func):
				32	grouped = {}
				33	grouped_iter = itertools.groupby(data, key_func)
				34
				35	for (bs, cache_tp, act, conc), curr_data_it in grouped_iter:
				36	key = (bs, cache_tp, act, conc)
				37	grouped.setdefault(key, []).extend(curr_data_it)
				38
				39	return grouped
				40
				41
				42	def approximate_curve(x, y, xnew, curved_coef):
				43	"""returns ynew - y values of some curve approximation"""
koder aka kdanilov	cff7b2e	2015-04-18 20:48:15 +0300	[diff] [blame^]	44	if no_numpy:
				45	return None
				46
koder aka kdanilov	6c49106	2015-04-09 22:33:13 +0300	[diff] [blame]	47	return chebval(xnew, chebfit(x, y, curved_coef))
				48
				49
				50	def approximate_line(x, y, xnew, relative_dist=False):
Ved-vampir	0316644	2015-04-10 17:28:23 +0300	[diff] [blame]	51	""" x, y - test data, xnew - dots, where we want find approximation
				52	if not relative_dist distance = y - newy
				53	returns ynew - y values of linear approximation"""
koder aka kdanilov	66839a9	2015-04-11 13:22:31 +0300	[diff] [blame]	54
koder aka kdanilov	cff7b2e	2015-04-18 20:48:15 +0300	[diff] [blame^]	55	if no_numpy:
				56	return None
				57
Ved-vampir	0316644	2015-04-10 17:28:23 +0300	[diff] [blame]	58	# convert to numpy.array (don't work without it)
				59	ox = array(x)
				60	oy = array(y)
koder aka kdanilov	66839a9	2015-04-11 13:22:31 +0300	[diff] [blame]	61
Ved-vampir	0316644	2015-04-10 17:28:23 +0300	[diff] [blame]	62	# set approximation function
koder aka kdanilov	66839a9	2015-04-11 13:22:31 +0300	[diff] [blame]	63	def func_line(tpl, x):
				64	return tpl[0] * x + tpl[1]
				65
				66	def error_func_rel(tpl, x, y):
				67	return 1.0 - y / func_line(tpl, x)
				68
				69	def error_func_abs(tpl, x, y):
				70	return y - func_line(tpl, x)
				71
Ved-vampir	0316644	2015-04-10 17:28:23 +0300	[diff] [blame]	72	# choose distance mode
koder aka kdanilov	66839a9	2015-04-11 13:22:31 +0300	[diff] [blame]	73	error_func = error_func_rel if relative_dist else error_func_abs
				74
				75	tpl_initial = tuple(linalg.solve([[ox[0], 1.0], [ox[1], 1.0]],
				76	oy[:2]))
				77
Ved-vampir	0316644	2015-04-10 17:28:23 +0300	[diff] [blame]	78	# find line
koder aka kdanilov	66839a9	2015-04-11 13:22:31 +0300	[diff] [blame]	79	tpl_final, success = leastsq(error_func,
				80	tpl_initial[:],
				81	args=(ox, oy))
				82
Ved-vampir	0316644	2015-04-10 17:28:23 +0300	[diff] [blame]	83	# if error
				84	if success not in range(1, 5):
				85	raise ValueError("No line for this dots")
koder aka kdanilov	66839a9	2015-04-11 13:22:31 +0300	[diff] [blame]	86
Ved-vampir	0316644	2015-04-10 17:28:23 +0300	[diff] [blame]	87	# return new dots
koder aka kdanilov	66839a9	2015-04-11 13:22:31 +0300	[diff] [blame]	88	return func_line(tpl_final, array(xnew))
koder aka kdanilov	6c49106	2015-04-09 22:33:13 +0300	[diff] [blame]	89
				90
				91	def difference(y, ynew):
				92	"""returns average and maximum relative and
Ved-vampir	0316644	2015-04-10 17:28:23 +0300	[diff] [blame]	93	absolute differences between y and ynew
				94	result may contain None values for y = 0
				95	return value - tuple:
				96	[(abs dif, rel dif) * len(y)],
				97	(abs average, abs max),
				98	(rel average, rel max)"""
koder aka kdanilov	66839a9	2015-04-11 13:22:31 +0300	[diff] [blame]	99
				100	abs_dlist = []
				101	rel_dlist = []
				102
Ved-vampir	0316644	2015-04-10 17:28:23 +0300	[diff] [blame]	103	for y1, y2 in zip(y, ynew):
				104	# absolute
koder aka kdanilov	66839a9	2015-04-11 13:22:31 +0300	[diff] [blame]	105	abs_dlist.append(y1 - y2)
Ved-vampir	0316644	2015-04-10 17:28:23 +0300	[diff] [blame]	106
koder aka kdanilov	66839a9	2015-04-11 13:22:31 +0300	[diff] [blame]	107	if y1 > 1E-6:
				108	rel_dlist.append(abs(abs_dlist[-1] / y1))
				109	else:
				110	raise ZeroDivisionError("{0!r} is too small".format(y1))
				111
				112	da_avg = sum(abs_dlist) / len(abs_dlist)
				113	dr_avg = sum(rel_dlist) / len(rel_dlist)
				114
				115	return (zip(abs_dlist, rel_dlist),
				116	(da_avg, max(abs_dlist)), (dr_avg, max(rel_dlist))
				117	)
koder aka kdanilov	6c49106	2015-04-09 22:33:13 +0300	[diff] [blame]	118
				119
				120	def calculate_distribution_properties(data):
				121	"""chi, etc"""
				122
				123
				124	def minimal_measurement_amount(data, max_diff, req_probability):
				125	"""
				126	should returns amount of measurements to get results (avg and deviation)
				127	with error less, that max_diff in at least req_probability% cases
				128	"""