kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 1 | import logging |
kdanylov aka koder | 84de1e4 | 2017-05-22 14:00:07 +0300 | [diff] [blame] | 2 | from typing import Tuple, Iterator, List, Iterable, Dict, Union, Callable, Set |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 3 | |
| 4 | import numpy |
| 5 | |
kdanylov aka koder | b083333 | 2017-05-13 20:39:17 +0300 | [diff] [blame] | 6 | from cephlib.numeric_types import DataSource, TimeSeries |
| 7 | from cephlib.storage_selectors import c_interpolate_ts_on_seconds_border |
kdanylov aka koder | 84de1e4 | 2017-05-22 14:00:07 +0300 | [diff] [blame] | 8 | from cephlib.node import NodeInfo |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 9 | |
kdanylov aka koder | 026e5f2 | 2017-05-15 01:04:39 +0300 | [diff] [blame] | 10 | from .result_classes import IWallyStorage |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 11 | from .suits.io.fio_hist import expected_lat_bins |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 12 | |
| 13 | |
| 14 | logger = logging.getLogger("wally") |
| 15 | |
| 16 | # Separately for each test heatmaps & agg acroos whole time histos: |
| 17 | # * fio latency heatmap for all instances |
| 18 | # * data dev iops across all osd |
| 19 | # * data dev bw across all osd |
| 20 | # * date dev qd across all osd |
| 21 | # * journal dev iops across all osd |
| 22 | # * journal dev bw across all osd |
| 23 | # * journal dev qd across all osd |
| 24 | # * net dev pps across all hosts |
| 25 | # * net dev bps across all hosts |
| 26 | |
| 27 | # Main API's |
| 28 | # get sensors by pattern |
| 29 | # allign values to seconds |
| 30 | # cut ranges for particular test |
| 31 | # transform into 2d histos (either make histos or rebin them) and clip outliers same time |
| 32 | |
| 33 | |
| 34 | AGG_TAG = 'ALL' |
| 35 | |
| 36 | |
kdanylov aka koder | 026e5f2 | 2017-05-15 01:04:39 +0300 | [diff] [blame] | 37 | def find_all_series(rstorage: IWallyStorage, suite_id: str, job_id: str, metric: str) -> Iterator[TimeSeries]: |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 38 | "Iterated over selected metric for all nodes for given Suite/job" |
kdanylov aka koder | b083333 | 2017-05-13 20:39:17 +0300 | [diff] [blame] | 39 | return (rstorage.get_ts(ds) for ds in rstorage.iter_ts(suite_id=suite_id, job_id=job_id, metric=metric)) |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 40 | |
| 41 | |
kdanylov aka koder | 026e5f2 | 2017-05-15 01:04:39 +0300 | [diff] [blame] | 42 | def get_aggregated(rstorage: IWallyStorage, suite_id: str, job_id: str, metric: str, |
kdanylov aka koder | b083333 | 2017-05-13 20:39:17 +0300 | [diff] [blame] | 43 | trange: Tuple[int, int]) -> TimeSeries: |
kdanylov aka koder | 84de1e4 | 2017-05-22 14:00:07 +0300 | [diff] [blame] | 44 | "Sum selected fio metric for all nodes for given Suite/job" |
| 45 | |
| 46 | key = (id(rstorage), suite_id, job_id, metric, trange) |
| 47 | aggregated_cache = rstorage.storage.other_caches['aggregated'] |
| 48 | if key in aggregated_cache: |
| 49 | return aggregated_cache[key].copy() |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 50 | |
kdanylov aka koder | b083333 | 2017-05-13 20:39:17 +0300 | [diff] [blame] | 51 | tss = list(find_all_series(rstorage, suite_id, job_id, metric)) |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 52 | |
| 53 | if len(tss) == 0: |
kdanylov aka koder | b083333 | 2017-05-13 20:39:17 +0300 | [diff] [blame] | 54 | raise NameError("Can't found any TS for {},{},{}".format(suite_id, job_id, metric)) |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 55 | |
kdanylov aka koder | 84de1e4 | 2017-05-22 14:00:07 +0300 | [diff] [blame] | 56 | c_intp = c_interpolate_ts_on_seconds_border |
| 57 | tss_inp = [c_intp(ts.select(trange), tp='fio', allow_broken_step=(metric == 'lat')) for ts in tss] |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 58 | |
kdanylov aka koder | 3a9e5db | 2017-05-09 20:00:44 +0300 | [diff] [blame] | 59 | res = None |
kdanylov aka koder | 84de1e4 | 2017-05-22 14:00:07 +0300 | [diff] [blame] | 60 | res_times = None |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 61 | |
kdanylov aka koder | 2e5fce1 | 2017-05-23 01:47:36 +0300 | [diff] [blame] | 62 | for ts, ts_orig in zip(tss_inp, tss): |
kdanylov aka koder | 3a9e5db | 2017-05-09 20:00:44 +0300 | [diff] [blame] | 63 | if ts.time_units != 's': |
| 64 | msg = "time_units must be 's' for fio sensor" |
| 65 | logger.error(msg) |
| 66 | raise ValueError(msg) |
| 67 | |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 68 | if metric == 'lat' and (len(ts.data.shape) != 2 or ts.data.shape[1] != expected_lat_bins): |
kdanylov aka koder | 026e5f2 | 2017-05-15 01:04:39 +0300 | [diff] [blame] | 69 | msg = "Sensor {}.{} on node {} has shape={}. Can only process sensors with shape=[X, {}].".format( |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 70 | ts.source.dev, ts.source.sensor, ts.source.node_id, ts.data.shape, expected_lat_bins) |
| 71 | logger.error(msg) |
| 72 | raise ValueError(msg) |
| 73 | |
| 74 | if metric != 'lat' and len(ts.data.shape) != 1: |
| 75 | msg = "Sensor {}.{} on node {} has shape={}. Can only process 1D sensors.".format( |
| 76 | ts.source.dev, ts.source.sensor, ts.source.node_id, ts.data.shape) |
| 77 | logger.error(msg) |
| 78 | raise ValueError(msg) |
| 79 | |
kdanylov aka koder | 2e5fce1 | 2017-05-23 01:47:36 +0300 | [diff] [blame] | 80 | try: |
| 81 | assert trange[0] >= ts.times[0] and trange[1] <= ts.times[-1], \ |
| 82 | "[{}, {}] not in [{}, {}]".format(ts.times[0], ts.times[-1], trange[0], trange[-1]) |
| 83 | except AssertionError: |
| 84 | import IPython |
| 85 | IPython.embed() |
kdanylov aka koder | 736e5c1 | 2017-05-07 17:27:14 +0300 | [diff] [blame] | 86 | |
kdanylov aka koder | 3a9e5db | 2017-05-09 20:00:44 +0300 | [diff] [blame] | 87 | idx1, idx2 = numpy.searchsorted(ts.times, trange) |
| 88 | idx2 += 1 |
| 89 | |
| 90 | assert (idx2 - idx1) == (trange[1] - trange[0] + 1), \ |
| 91 | "Broken time array at {} for {}".format(trange, ts.source) |
| 92 | |
| 93 | dt = ts.data[idx1: idx2] |
| 94 | if res is None: |
kdanylov aka koder | 84de1e4 | 2017-05-22 14:00:07 +0300 | [diff] [blame] | 95 | res = dt.copy() |
| 96 | res_times = ts.times[idx1: idx2].copy() |
kdanylov aka koder | 3a9e5db | 2017-05-09 20:00:44 +0300 | [diff] [blame] | 97 | else: |
| 98 | assert res.shape == dt.shape, "res.shape(={}) != dt.shape(={})".format(res.shape, dt.shape) |
| 99 | res += dt |
| 100 | |
kdanylov aka koder | 84de1e4 | 2017-05-22 14:00:07 +0300 | [diff] [blame] | 101 | ds = DataSource(suite_id=suite_id, job_id=job_id, node_id=AGG_TAG, sensor='fio', |
| 102 | dev=AGG_TAG, metric=metric, tag='csv') |
kdanylov aka koder | b083333 | 2017-05-13 20:39:17 +0300 | [diff] [blame] | 103 | agg_ts = TimeSeries(res, source=ds, |
kdanylov aka koder | 84de1e4 | 2017-05-22 14:00:07 +0300 | [diff] [blame] | 104 | times=res_times, |
kdanylov aka koder | 3a9e5db | 2017-05-09 20:00:44 +0300 | [diff] [blame] | 105 | units=tss_inp[0].units, |
| 106 | histo_bins=tss_inp[0].histo_bins, |
| 107 | time_units=tss_inp[0].time_units) |
kdanylov aka koder | 84de1e4 | 2017-05-22 14:00:07 +0300 | [diff] [blame] | 108 | aggregated_cache[key] = agg_ts |
| 109 | return agg_ts.copy() |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 110 | |
kdanylov aka koder | 84de1e4 | 2017-05-22 14:00:07 +0300 | [diff] [blame] | 111 | |
| 112 | def get_nodes(storage: IWallyStorage, roles: Iterable[str]) -> List[NodeInfo]: |
| 113 | return [node for node in storage.load_nodes() if node.roles.intersection(roles)] |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 114 | |