blob: d9c9688911b64a33ed18ef229d66ef684ebe922f [file] [log] [blame]
koder aka kdanilova732a602017-02-01 20:29:56 +02001import numpy
2from wally.statistic import rebin_histogram
kdanylov aka kodercdfcdaf2017-04-29 10:03:39 +03003from wally.result_classes import DataSource, TimeSeries
4from wally.data_selectors import interpolate_ts_on_seconds_border, c_interpolate_ts_on_seconds_border
koder aka kdanilova732a602017-02-01 20:29:56 +02005
6
7def array_eq(x: numpy.array, y: numpy.array, max_diff: float = 1E-3) -> bool:
8 return numpy.abs(x - y).max() <= max_diff
9
10
11def test_rebin_histo():
12 curr_histo = numpy.empty((100,), dtype=int)
13 curr_histo[:] = 1
14 edges = numpy.arange(100)
15 new_histo, new_edges = rebin_histogram(curr_histo, edges, 10)
16
17 assert new_edges.shape == (10,)
18 assert new_histo.shape == (10,)
19 assert new_edges.dtype.name.startswith('float')
20 assert new_histo.dtype.name.startswith('int')
21
22 assert array_eq(new_edges, numpy.arange(10) * 9.9)
23 assert new_histo.sum() == curr_histo.sum()
24 assert list(new_histo) == [10] * 10
25
26 new_histo, new_edges = rebin_histogram(curr_histo, edges, 3,
27 left_tail_idx=20,
28 right_tail_idx=50)
29
30 assert new_edges.shape == (3,)
31 assert new_histo.shape == (3,)
32 assert array_eq(new_edges, numpy.array([20, 30, 40]))
33 assert new_histo.sum() == curr_histo.sum()
34 assert list(new_histo) == [30, 10, 60]
kdanylov aka kodercdfcdaf2017-04-29 10:03:39 +030035
36
37SUITE_ID = "suite1"
38JOB_ID = "job1"
39NODE_ID = "node1"
40SENSOR = "sensor"
41DEV = "dev"
42METRIC = "metric"
43TAG = "csv"
44DATA_UNITS = "x"
45TIME_UNITS = "ms"
46
47
48def test_interpolate():
49 ds = DataSource(node_id=NODE_ID, sensor=SENSOR, dev=DEV, metric=METRIC)
50 samples = 200
51 ms_coef = 1000
52 s_offset = 377 * ms_coef
53 ms_offset = 300 + s_offset
54
55 for i in range(16):
56 source_times = numpy.random.randint(100, size=samples, dtype='uint64') + \
57 ms_coef * numpy.arange(samples, dtype='uint64') + s_offset + ms_offset
58 source_values = numpy.random.randint(30, 60, size=samples, dtype='uint64')
59
60 ts = TimeSeries("test", raw=None, data=source_values, times=source_times, units=DATA_UNITS,
61 source=ds, time_units=TIME_UNITS)
62
63 # ts2 = interpolate_ts_on_seconds_border(ts)
64 ts2 = c_interpolate_ts_on_seconds_border(ts, nc=True)
65
66 # print()
67 # print(ts.times)
68 # print(ts.data, ts.data.sum())
69 # print(ts2.times)
70 # print(ts2.data, ts2.data.sum())
71
72 assert ts.time_units == 'ms'
73 assert ts2.time_units == 's'
74 assert ts2.times.dtype == ts.times.dtype
75 assert ts2.data.dtype == ts.data.dtype
76
77 assert ts.data.sum() == ts2.data.sum()
78
79 borders = 5
80 block_size = samples // 10
81 for begin_idx in numpy.random.randint(borders, samples - borders, size=20):
82 begin_idx = int(begin_idx)
83 end_idx = min(begin_idx + block_size, ts.times.size - 1)
84
85 first_cell_begin_time = ts.times[begin_idx - 1]
86 last_cell_end_time = ts.times[end_idx]
87 ts_sum = ts.data[begin_idx:end_idx].sum()
88
89 ts2_begin_idx = numpy.searchsorted(ts2.times, first_cell_begin_time // ms_coef)
90 ts2_end_idx = numpy.searchsorted(ts2.times, last_cell_end_time // ms_coef) + 1
91 ts2_max = ts.data[ts2_begin_idx: ts2_end_idx].sum()
92 ts2_min = ts.data[ts2_begin_idx + 1: ts2_end_idx - 1].sum()
93
94 assert ts2_min <= ts_sum <= ts2_max, "NOT {} <= {} <= {}".format(ts2_min, ts_sum, ts2_max)