koder aka kdanilov | a732a60 | 2017-02-01 20:29:56 +0200 | [diff] [blame] | 1 | import numpy |
| 2 | from wally.statistic import rebin_histogram |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame^] | 3 | from wally.result_classes import DataSource, TimeSeries |
| 4 | from wally.data_selectors import interpolate_ts_on_seconds_border, c_interpolate_ts_on_seconds_border |
koder aka kdanilov | a732a60 | 2017-02-01 20:29:56 +0200 | [diff] [blame] | 5 | |
| 6 | |
| 7 | def array_eq(x: numpy.array, y: numpy.array, max_diff: float = 1E-3) -> bool: |
| 8 | return numpy.abs(x - y).max() <= max_diff |
| 9 | |
| 10 | |
| 11 | def test_rebin_histo(): |
| 12 | curr_histo = numpy.empty((100,), dtype=int) |
| 13 | curr_histo[:] = 1 |
| 14 | edges = numpy.arange(100) |
| 15 | new_histo, new_edges = rebin_histogram(curr_histo, edges, 10) |
| 16 | |
| 17 | assert new_edges.shape == (10,) |
| 18 | assert new_histo.shape == (10,) |
| 19 | assert new_edges.dtype.name.startswith('float') |
| 20 | assert new_histo.dtype.name.startswith('int') |
| 21 | |
| 22 | assert array_eq(new_edges, numpy.arange(10) * 9.9) |
| 23 | assert new_histo.sum() == curr_histo.sum() |
| 24 | assert list(new_histo) == [10] * 10 |
| 25 | |
| 26 | new_histo, new_edges = rebin_histogram(curr_histo, edges, 3, |
| 27 | left_tail_idx=20, |
| 28 | right_tail_idx=50) |
| 29 | |
| 30 | assert new_edges.shape == (3,) |
| 31 | assert new_histo.shape == (3,) |
| 32 | assert array_eq(new_edges, numpy.array([20, 30, 40])) |
| 33 | assert new_histo.sum() == curr_histo.sum() |
| 34 | assert list(new_histo) == [30, 10, 60] |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame^] | 35 | |
| 36 | |
| 37 | SUITE_ID = "suite1" |
| 38 | JOB_ID = "job1" |
| 39 | NODE_ID = "node1" |
| 40 | SENSOR = "sensor" |
| 41 | DEV = "dev" |
| 42 | METRIC = "metric" |
| 43 | TAG = "csv" |
| 44 | DATA_UNITS = "x" |
| 45 | TIME_UNITS = "ms" |
| 46 | |
| 47 | |
| 48 | def test_interpolate(): |
| 49 | ds = DataSource(node_id=NODE_ID, sensor=SENSOR, dev=DEV, metric=METRIC) |
| 50 | samples = 200 |
| 51 | ms_coef = 1000 |
| 52 | s_offset = 377 * ms_coef |
| 53 | ms_offset = 300 + s_offset |
| 54 | |
| 55 | for i in range(16): |
| 56 | source_times = numpy.random.randint(100, size=samples, dtype='uint64') + \ |
| 57 | ms_coef * numpy.arange(samples, dtype='uint64') + s_offset + ms_offset |
| 58 | source_values = numpy.random.randint(30, 60, size=samples, dtype='uint64') |
| 59 | |
| 60 | ts = TimeSeries("test", raw=None, data=source_values, times=source_times, units=DATA_UNITS, |
| 61 | source=ds, time_units=TIME_UNITS) |
| 62 | |
| 63 | # ts2 = interpolate_ts_on_seconds_border(ts) |
| 64 | ts2 = c_interpolate_ts_on_seconds_border(ts, nc=True) |
| 65 | |
| 66 | # print() |
| 67 | # print(ts.times) |
| 68 | # print(ts.data, ts.data.sum()) |
| 69 | # print(ts2.times) |
| 70 | # print(ts2.data, ts2.data.sum()) |
| 71 | |
| 72 | assert ts.time_units == 'ms' |
| 73 | assert ts2.time_units == 's' |
| 74 | assert ts2.times.dtype == ts.times.dtype |
| 75 | assert ts2.data.dtype == ts.data.dtype |
| 76 | |
| 77 | assert ts.data.sum() == ts2.data.sum() |
| 78 | |
| 79 | borders = 5 |
| 80 | block_size = samples // 10 |
| 81 | for begin_idx in numpy.random.randint(borders, samples - borders, size=20): |
| 82 | begin_idx = int(begin_idx) |
| 83 | end_idx = min(begin_idx + block_size, ts.times.size - 1) |
| 84 | |
| 85 | first_cell_begin_time = ts.times[begin_idx - 1] |
| 86 | last_cell_end_time = ts.times[end_idx] |
| 87 | ts_sum = ts.data[begin_idx:end_idx].sum() |
| 88 | |
| 89 | ts2_begin_idx = numpy.searchsorted(ts2.times, first_cell_begin_time // ms_coef) |
| 90 | ts2_end_idx = numpy.searchsorted(ts2.times, last_cell_end_time // ms_coef) + 1 |
| 91 | ts2_max = ts.data[ts2_begin_idx: ts2_end_idx].sum() |
| 92 | ts2_min = ts.data[ts2_begin_idx + 1: ts2_end_idx - 1].sum() |
| 93 | |
| 94 | assert ts2_min <= ts_sum <= ts2_max, "NOT {} <= {} <= {}".format(ts2_min, ts_sum, ts2_max) |