blob: 9cebffdd2d49f4e1fc3fee7d5aea26242c02b6a9 [file] [log] [blame]
koder aka kdanilova732a602017-02-01 20:29:56 +02001import numpy
kdanylov aka koder736e5c12017-05-07 17:27:14 +03002import pytest
3
4
koder aka kdanilova732a602017-02-01 20:29:56 +02005from wally.statistic import rebin_histogram
kdanylov aka kodercdfcdaf2017-04-29 10:03:39 +03006from wally.result_classes import DataSource, TimeSeries
kdanylov aka koder45183182017-04-30 23:55:40 +03007from wally.data_selectors import c_interpolate_ts_on_seconds_border
8from wally.utils import unit_conversion_coef
koder aka kdanilova732a602017-02-01 20:29:56 +02009
10
11def array_eq(x: numpy.array, y: numpy.array, max_diff: float = 1E-3) -> bool:
12 return numpy.abs(x - y).max() <= max_diff
13
14
kdanylov aka koder45183182017-04-30 23:55:40 +030015def test_conversion_coef():
16 units = [
17 ('x', 'mx', 1000),
18 ('Gx', 'Kx', 1000 ** 2),
19 ('Gx', 'x', 1000 ** 3),
20 ('x', 'Kix', 1.0 / 1024),
21 ('x', 'Mix', 1.0 / 1024 ** 2),
22 ('mx', 'Mix', 0.001 / 1024 ** 2),
23 ('Mix', 'Kix', 1024),
24 ('Kix', 'ux', 1024 * 1000 ** 2),
25 ]
26
27 for unit1, unit2, coef in units:
28 cc = float(unit_conversion_coef(unit1, unit2))
29 assert abs(cc / coef - 1) < 1E-5, "{} => {} == {}".format(unit1, unit2, cc)
30 rcc = float(unit_conversion_coef(unit2, unit1))
31 assert abs(rcc * cc - 1) < 1E-5, "{} => {} == {}".format(unit1, unit2, rcc)
32
33
koder aka kdanilova732a602017-02-01 20:29:56 +020034def test_rebin_histo():
35 curr_histo = numpy.empty((100,), dtype=int)
36 curr_histo[:] = 1
37 edges = numpy.arange(100)
38 new_histo, new_edges = rebin_histogram(curr_histo, edges, 10)
39
40 assert new_edges.shape == (10,)
41 assert new_histo.shape == (10,)
42 assert new_edges.dtype.name.startswith('float')
43 assert new_histo.dtype.name.startswith('int')
44
45 assert array_eq(new_edges, numpy.arange(10) * 9.9)
46 assert new_histo.sum() == curr_histo.sum()
47 assert list(new_histo) == [10] * 10
48
49 new_histo, new_edges = rebin_histogram(curr_histo, edges, 3,
50 left_tail_idx=20,
51 right_tail_idx=50)
52
53 assert new_edges.shape == (3,)
54 assert new_histo.shape == (3,)
55 assert array_eq(new_edges, numpy.array([20, 30, 40]))
56 assert new_histo.sum() == curr_histo.sum()
57 assert list(new_histo) == [30, 10, 60]
kdanylov aka kodercdfcdaf2017-04-29 10:03:39 +030058
59
60SUITE_ID = "suite1"
61JOB_ID = "job1"
62NODE_ID = "node1"
63SENSOR = "sensor"
64DEV = "dev"
65METRIC = "metric"
66TAG = "csv"
67DATA_UNITS = "x"
68TIME_UNITS = "ms"
69
70
71def test_interpolate():
72 ds = DataSource(node_id=NODE_ID, sensor=SENSOR, dev=DEV, metric=METRIC)
73 samples = 200
74 ms_coef = 1000
75 s_offset = 377 * ms_coef
76 ms_offset = 300 + s_offset
kdanylov aka koder45183182017-04-30 23:55:40 +030077 borders = 10
78 block_size = 20
kdanylov aka kodercdfcdaf2017-04-29 10:03:39 +030079
80 for i in range(16):
81 source_times = numpy.random.randint(100, size=samples, dtype='uint64') + \
82 ms_coef * numpy.arange(samples, dtype='uint64') + s_offset + ms_offset
83 source_values = numpy.random.randint(30, 60, size=samples, dtype='uint64')
84
85 ts = TimeSeries("test", raw=None, data=source_values, times=source_times, units=DATA_UNITS,
86 source=ds, time_units=TIME_UNITS)
87
kdanylov aka kodercdfcdaf2017-04-29 10:03:39 +030088 ts2 = c_interpolate_ts_on_seconds_border(ts, nc=True)
89
kdanylov aka kodercdfcdaf2017-04-29 10:03:39 +030090 assert ts.time_units == 'ms'
91 assert ts2.time_units == 's'
92 assert ts2.times.dtype == ts.times.dtype
93 assert ts2.data.dtype == ts.data.dtype
94
kdanylov aka kodercdfcdaf2017-04-29 10:03:39 +030095 for begin_idx in numpy.random.randint(borders, samples - borders, size=20):
96 begin_idx = int(begin_idx)
97 end_idx = min(begin_idx + block_size, ts.times.size - 1)
98
99 first_cell_begin_time = ts.times[begin_idx - 1]
100 last_cell_end_time = ts.times[end_idx]
101 ts_sum = ts.data[begin_idx:end_idx].sum()
102
103 ts2_begin_idx = numpy.searchsorted(ts2.times, first_cell_begin_time // ms_coef)
104 ts2_end_idx = numpy.searchsorted(ts2.times, last_cell_end_time // ms_coef) + 1
105 ts2_max = ts.data[ts2_begin_idx: ts2_end_idx].sum()
106 ts2_min = ts.data[ts2_begin_idx + 1: ts2_end_idx - 1].sum()
107
108 assert ts2_min <= ts_sum <= ts2_max, "NOT {} <= {} <= {}".format(ts2_min, ts_sum, ts2_max)
kdanylov aka koder45183182017-04-30 23:55:40 +0300109
110
kdanylov aka koder736e5c12017-05-07 17:27:14 +0300111def test_interpolate2():
112 ds = DataSource(node_id=NODE_ID, sensor=SENSOR, dev=DEV, metric=METRIC)
113 samples = 5
114 ms_coef = 1000
115
116 source_times = numpy.arange(samples, dtype='uint64') * ms_coef + ms_coef + 347
117 source_values = numpy.random.randint(10, 1000, size=samples, dtype='uint64')
118
119 ts = TimeSeries("test", raw=None, data=source_values, times=source_times, units=DATA_UNITS,
120 source=ds, time_units=TIME_UNITS)
121
122 ts2 = c_interpolate_ts_on_seconds_border(ts, nc=True)
123
124 assert ts.time_units == 'ms'
125 assert ts2.time_units == 's'
126 assert ts2.times.dtype == ts.times.dtype
127 assert ts2.data.dtype == ts.data.dtype
128 assert (ts2.data == ts.data).all()
129
130
kdanylov aka koder45183182017-04-30 23:55:40 +0300131def test_interpolate_qd():
132 ds = DataSource(node_id=NODE_ID, sensor=SENSOR, dev=DEV, metric=METRIC)
133 samples = 200
134 ms_coef = 1000
135 s_offset = 377 * ms_coef
136 ms_offset = 300 + s_offset
137
138 for i in range(16):
139 source_times = numpy.random.randint(100, size=samples, dtype='uint64') + \
140 ms_coef * numpy.arange(samples, dtype='uint64') + s_offset + ms_offset
141 source_values = numpy.random.randint(30, 60, size=samples, dtype='uint64')
142
143 ts = TimeSeries("test", raw=None, data=source_values, times=source_times, units=DATA_UNITS,
144 source=ds, time_units=TIME_UNITS)
145
kdanylov aka koder736e5c12017-05-07 17:27:14 +0300146 ts2 = c_interpolate_ts_on_seconds_border(ts, nc=True, tp='qd')
kdanylov aka koder45183182017-04-30 23:55:40 +0300147
148 assert ts.time_units == 'ms'
149 assert ts2.time_units == 's'
150 assert ts2.times.dtype == ts.times.dtype
151 assert ts2.data.dtype == ts.data.dtype
152 assert ts2.data.size == ts2.times.size
kdanylov aka koder45183182017-04-30 23:55:40 +0300153
154 coef = unit_conversion_coef(ts2.time_units, ts.time_units)
155 assert isinstance(coef, int)
156
157 dtime = (ts2.times[1] - ts2.times[0]) * coef // 2
158
159 idxs = numpy.searchsorted(ts.times, ts2.times * coef - dtime)
160 assert (ts2.data == ts.data[idxs]).all()
kdanylov aka koder736e5c12017-05-07 17:27:14 +0300161
162
163def test_interpolate_fio():
164 ds = DataSource(node_id=NODE_ID, sensor=SENSOR, dev=DEV, metric=METRIC)
165 ms_coef = 1000
166 s_offset = 377 * ms_coef
167 gap_start = 5
168 gap_size = 5
169 full_size = 15
170
171 times = list(range(gap_start)) + list(range(gap_start + gap_size, full_size))
172 src_times = numpy.array(times, dtype='uint64') * ms_coef + s_offset
173 src_values = numpy.random.randint(10, 100, size=len(src_times), dtype='uint64')
174
175 ts = TimeSeries("test", raw=None, data=src_values, times=src_times, units=DATA_UNITS,
176 source=ds, time_units=TIME_UNITS)
177
178 ts2 = c_interpolate_ts_on_seconds_border(ts, nc=True, tp='fio')
179
180 assert ts.time_units == 'ms'
181 assert ts2.time_units == 's'
182 assert ts2.times.dtype == ts.times.dtype
183 assert ts2.data.dtype == ts.data.dtype
184 assert ts2.times[0] == ts.times[0] // ms_coef
185 assert ts2.times[-1] == ts.times[-1] // ms_coef
186 assert ts2.data.size == ts2.times.size
187
188 expected_times = numpy.arange(ts.times[0] // ms_coef, ts.times[-1] // ms_coef + 1, dtype='uint64')
189 assert ts2.times.size == expected_times.size
190 assert (ts2.times == expected_times).all()
191
192 assert (ts2.data[:gap_start] == ts.data[:gap_start]).all()
193 assert (ts2.data[gap_start:gap_start + gap_size] == 0).all()
194 assert (ts2.data[gap_start + gap_size:] == ts.data[gap_start:]).all()
195
196
197def test_interpolate_fio_negative():
198 ds = DataSource(node_id=NODE_ID, sensor=SENSOR, dev=DEV, metric=METRIC)
199 ms_coef = 1000
200 s_offset = 377 * ms_coef
201
202 src_times = (numpy.array([1, 2, 3, 4.5, 5, 6, 7]) * ms_coef + s_offset).astype('uint64')
203 src_values = numpy.random.randint(10, 100, size=len(src_times), dtype='uint64')
204
205 ts = TimeSeries("test", raw=None, data=src_values, times=src_times, units=DATA_UNITS,
206 source=ds, time_units=TIME_UNITS)
207
208 with pytest.raises(ValueError):
209 c_interpolate_ts_on_seconds_border(ts, nc=True, tp='fio')