koder aka kdanilov | a732a60 | 2017-02-01 20:29:56 +0200 | [diff] [blame] | 1 | import numpy |
kdanylov aka koder | 736e5c1 | 2017-05-07 17:27:14 +0300 | [diff] [blame] | 2 | import pytest |
| 3 | |
| 4 | |
koder aka kdanilov | a732a60 | 2017-02-01 20:29:56 +0200 | [diff] [blame] | 5 | from wally.statistic import rebin_histogram |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 6 | from wally.result_classes import DataSource, TimeSeries |
kdanylov aka koder | 4518318 | 2017-04-30 23:55:40 +0300 | [diff] [blame] | 7 | from wally.data_selectors import c_interpolate_ts_on_seconds_border |
| 8 | from wally.utils import unit_conversion_coef |
koder aka kdanilov | a732a60 | 2017-02-01 20:29:56 +0200 | [diff] [blame] | 9 | |
| 10 | |
| 11 | def array_eq(x: numpy.array, y: numpy.array, max_diff: float = 1E-3) -> bool: |
| 12 | return numpy.abs(x - y).max() <= max_diff |
| 13 | |
| 14 | |
kdanylov aka koder | 4518318 | 2017-04-30 23:55:40 +0300 | [diff] [blame] | 15 | def test_conversion_coef(): |
| 16 | units = [ |
| 17 | ('x', 'mx', 1000), |
| 18 | ('Gx', 'Kx', 1000 ** 2), |
| 19 | ('Gx', 'x', 1000 ** 3), |
| 20 | ('x', 'Kix', 1.0 / 1024), |
| 21 | ('x', 'Mix', 1.0 / 1024 ** 2), |
| 22 | ('mx', 'Mix', 0.001 / 1024 ** 2), |
| 23 | ('Mix', 'Kix', 1024), |
| 24 | ('Kix', 'ux', 1024 * 1000 ** 2), |
| 25 | ] |
| 26 | |
| 27 | for unit1, unit2, coef in units: |
| 28 | cc = float(unit_conversion_coef(unit1, unit2)) |
| 29 | assert abs(cc / coef - 1) < 1E-5, "{} => {} == {}".format(unit1, unit2, cc) |
| 30 | rcc = float(unit_conversion_coef(unit2, unit1)) |
| 31 | assert abs(rcc * cc - 1) < 1E-5, "{} => {} == {}".format(unit1, unit2, rcc) |
| 32 | |
| 33 | |
koder aka kdanilov | a732a60 | 2017-02-01 20:29:56 +0200 | [diff] [blame] | 34 | def test_rebin_histo(): |
| 35 | curr_histo = numpy.empty((100,), dtype=int) |
| 36 | curr_histo[:] = 1 |
| 37 | edges = numpy.arange(100) |
| 38 | new_histo, new_edges = rebin_histogram(curr_histo, edges, 10) |
| 39 | |
| 40 | assert new_edges.shape == (10,) |
| 41 | assert new_histo.shape == (10,) |
| 42 | assert new_edges.dtype.name.startswith('float') |
| 43 | assert new_histo.dtype.name.startswith('int') |
| 44 | |
| 45 | assert array_eq(new_edges, numpy.arange(10) * 9.9) |
| 46 | assert new_histo.sum() == curr_histo.sum() |
| 47 | assert list(new_histo) == [10] * 10 |
| 48 | |
| 49 | new_histo, new_edges = rebin_histogram(curr_histo, edges, 3, |
| 50 | left_tail_idx=20, |
| 51 | right_tail_idx=50) |
| 52 | |
| 53 | assert new_edges.shape == (3,) |
| 54 | assert new_histo.shape == (3,) |
| 55 | assert array_eq(new_edges, numpy.array([20, 30, 40])) |
| 56 | assert new_histo.sum() == curr_histo.sum() |
| 57 | assert list(new_histo) == [30, 10, 60] |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 58 | |
| 59 | |
| 60 | SUITE_ID = "suite1" |
| 61 | JOB_ID = "job1" |
| 62 | NODE_ID = "node1" |
| 63 | SENSOR = "sensor" |
| 64 | DEV = "dev" |
| 65 | METRIC = "metric" |
| 66 | TAG = "csv" |
| 67 | DATA_UNITS = "x" |
| 68 | TIME_UNITS = "ms" |
| 69 | |
| 70 | |
| 71 | def test_interpolate(): |
| 72 | ds = DataSource(node_id=NODE_ID, sensor=SENSOR, dev=DEV, metric=METRIC) |
| 73 | samples = 200 |
| 74 | ms_coef = 1000 |
| 75 | s_offset = 377 * ms_coef |
| 76 | ms_offset = 300 + s_offset |
kdanylov aka koder | 4518318 | 2017-04-30 23:55:40 +0300 | [diff] [blame] | 77 | borders = 10 |
| 78 | block_size = 20 |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 79 | |
| 80 | for i in range(16): |
| 81 | source_times = numpy.random.randint(100, size=samples, dtype='uint64') + \ |
| 82 | ms_coef * numpy.arange(samples, dtype='uint64') + s_offset + ms_offset |
| 83 | source_values = numpy.random.randint(30, 60, size=samples, dtype='uint64') |
| 84 | |
| 85 | ts = TimeSeries("test", raw=None, data=source_values, times=source_times, units=DATA_UNITS, |
| 86 | source=ds, time_units=TIME_UNITS) |
| 87 | |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 88 | ts2 = c_interpolate_ts_on_seconds_border(ts, nc=True) |
| 89 | |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 90 | assert ts.time_units == 'ms' |
| 91 | assert ts2.time_units == 's' |
| 92 | assert ts2.times.dtype == ts.times.dtype |
| 93 | assert ts2.data.dtype == ts.data.dtype |
| 94 | |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 95 | for begin_idx in numpy.random.randint(borders, samples - borders, size=20): |
| 96 | begin_idx = int(begin_idx) |
| 97 | end_idx = min(begin_idx + block_size, ts.times.size - 1) |
| 98 | |
| 99 | first_cell_begin_time = ts.times[begin_idx - 1] |
| 100 | last_cell_end_time = ts.times[end_idx] |
| 101 | ts_sum = ts.data[begin_idx:end_idx].sum() |
| 102 | |
| 103 | ts2_begin_idx = numpy.searchsorted(ts2.times, first_cell_begin_time // ms_coef) |
| 104 | ts2_end_idx = numpy.searchsorted(ts2.times, last_cell_end_time // ms_coef) + 1 |
| 105 | ts2_max = ts.data[ts2_begin_idx: ts2_end_idx].sum() |
| 106 | ts2_min = ts.data[ts2_begin_idx + 1: ts2_end_idx - 1].sum() |
| 107 | |
| 108 | assert ts2_min <= ts_sum <= ts2_max, "NOT {} <= {} <= {}".format(ts2_min, ts_sum, ts2_max) |
kdanylov aka koder | 4518318 | 2017-04-30 23:55:40 +0300 | [diff] [blame] | 109 | |
| 110 | |
kdanylov aka koder | 736e5c1 | 2017-05-07 17:27:14 +0300 | [diff] [blame] | 111 | def test_interpolate2(): |
| 112 | ds = DataSource(node_id=NODE_ID, sensor=SENSOR, dev=DEV, metric=METRIC) |
| 113 | samples = 5 |
| 114 | ms_coef = 1000 |
| 115 | |
| 116 | source_times = numpy.arange(samples, dtype='uint64') * ms_coef + ms_coef + 347 |
| 117 | source_values = numpy.random.randint(10, 1000, size=samples, dtype='uint64') |
| 118 | |
| 119 | ts = TimeSeries("test", raw=None, data=source_values, times=source_times, units=DATA_UNITS, |
| 120 | source=ds, time_units=TIME_UNITS) |
| 121 | |
| 122 | ts2 = c_interpolate_ts_on_seconds_border(ts, nc=True) |
| 123 | |
| 124 | assert ts.time_units == 'ms' |
| 125 | assert ts2.time_units == 's' |
| 126 | assert ts2.times.dtype == ts.times.dtype |
| 127 | assert ts2.data.dtype == ts.data.dtype |
| 128 | assert (ts2.data == ts.data).all() |
| 129 | |
| 130 | |
kdanylov aka koder | 4518318 | 2017-04-30 23:55:40 +0300 | [diff] [blame] | 131 | def test_interpolate_qd(): |
| 132 | ds = DataSource(node_id=NODE_ID, sensor=SENSOR, dev=DEV, metric=METRIC) |
| 133 | samples = 200 |
| 134 | ms_coef = 1000 |
| 135 | s_offset = 377 * ms_coef |
| 136 | ms_offset = 300 + s_offset |
| 137 | |
| 138 | for i in range(16): |
| 139 | source_times = numpy.random.randint(100, size=samples, dtype='uint64') + \ |
| 140 | ms_coef * numpy.arange(samples, dtype='uint64') + s_offset + ms_offset |
| 141 | source_values = numpy.random.randint(30, 60, size=samples, dtype='uint64') |
| 142 | |
| 143 | ts = TimeSeries("test", raw=None, data=source_values, times=source_times, units=DATA_UNITS, |
| 144 | source=ds, time_units=TIME_UNITS) |
| 145 | |
kdanylov aka koder | 736e5c1 | 2017-05-07 17:27:14 +0300 | [diff] [blame] | 146 | ts2 = c_interpolate_ts_on_seconds_border(ts, nc=True, tp='qd') |
kdanylov aka koder | 4518318 | 2017-04-30 23:55:40 +0300 | [diff] [blame] | 147 | |
| 148 | assert ts.time_units == 'ms' |
| 149 | assert ts2.time_units == 's' |
| 150 | assert ts2.times.dtype == ts.times.dtype |
| 151 | assert ts2.data.dtype == ts.data.dtype |
| 152 | assert ts2.data.size == ts2.times.size |
kdanylov aka koder | 4518318 | 2017-04-30 23:55:40 +0300 | [diff] [blame] | 153 | |
| 154 | coef = unit_conversion_coef(ts2.time_units, ts.time_units) |
| 155 | assert isinstance(coef, int) |
| 156 | |
| 157 | dtime = (ts2.times[1] - ts2.times[0]) * coef // 2 |
| 158 | |
| 159 | idxs = numpy.searchsorted(ts.times, ts2.times * coef - dtime) |
| 160 | assert (ts2.data == ts.data[idxs]).all() |
kdanylov aka koder | 736e5c1 | 2017-05-07 17:27:14 +0300 | [diff] [blame] | 161 | |
| 162 | |
| 163 | def test_interpolate_fio(): |
| 164 | ds = DataSource(node_id=NODE_ID, sensor=SENSOR, dev=DEV, metric=METRIC) |
| 165 | ms_coef = 1000 |
| 166 | s_offset = 377 * ms_coef |
| 167 | gap_start = 5 |
| 168 | gap_size = 5 |
| 169 | full_size = 15 |
| 170 | |
| 171 | times = list(range(gap_start)) + list(range(gap_start + gap_size, full_size)) |
| 172 | src_times = numpy.array(times, dtype='uint64') * ms_coef + s_offset |
| 173 | src_values = numpy.random.randint(10, 100, size=len(src_times), dtype='uint64') |
| 174 | |
| 175 | ts = TimeSeries("test", raw=None, data=src_values, times=src_times, units=DATA_UNITS, |
| 176 | source=ds, time_units=TIME_UNITS) |
| 177 | |
| 178 | ts2 = c_interpolate_ts_on_seconds_border(ts, nc=True, tp='fio') |
| 179 | |
| 180 | assert ts.time_units == 'ms' |
| 181 | assert ts2.time_units == 's' |
| 182 | assert ts2.times.dtype == ts.times.dtype |
| 183 | assert ts2.data.dtype == ts.data.dtype |
| 184 | assert ts2.times[0] == ts.times[0] // ms_coef |
| 185 | assert ts2.times[-1] == ts.times[-1] // ms_coef |
| 186 | assert ts2.data.size == ts2.times.size |
| 187 | |
| 188 | expected_times = numpy.arange(ts.times[0] // ms_coef, ts.times[-1] // ms_coef + 1, dtype='uint64') |
| 189 | assert ts2.times.size == expected_times.size |
| 190 | assert (ts2.times == expected_times).all() |
| 191 | |
| 192 | assert (ts2.data[:gap_start] == ts.data[:gap_start]).all() |
| 193 | assert (ts2.data[gap_start:gap_start + gap_size] == 0).all() |
| 194 | assert (ts2.data[gap_start + gap_size:] == ts.data[gap_start:]).all() |
| 195 | |
| 196 | |
| 197 | def test_interpolate_fio_negative(): |
| 198 | ds = DataSource(node_id=NODE_ID, sensor=SENSOR, dev=DEV, metric=METRIC) |
| 199 | ms_coef = 1000 |
| 200 | s_offset = 377 * ms_coef |
| 201 | |
| 202 | src_times = (numpy.array([1, 2, 3, 4.5, 5, 6, 7]) * ms_coef + s_offset).astype('uint64') |
| 203 | src_values = numpy.random.randint(10, 100, size=len(src_times), dtype='uint64') |
| 204 | |
| 205 | ts = TimeSeries("test", raw=None, data=src_values, times=src_times, units=DATA_UNITS, |
| 206 | source=ds, time_units=TIME_UNITS) |
| 207 | |
| 208 | with pytest.raises(ValueError): |
| 209 | c_interpolate_ts_on_seconds_border(ts, nc=True, tp='fio') |