blob: 68170ec6a75ff8c57d6feaa8831416d3798968ee [file] [log] [blame]
koder aka kdanilov108ac362017-01-19 20:17:16 +02001import os
koder aka kdanilov7f59d562016-12-26 01:34:23 +02002import abc
koder aka kdanilova047e1b2015-04-21 23:16:59 +03003import logging
koder aka kdanilov108ac362017-01-19 20:17:16 +02004from io import BytesIO
5from functools import wraps
koder aka kdanilova732a602017-02-01 20:29:56 +02006from typing import Dict, Any, Iterator, Tuple, cast, List, Callable, Set, Optional
koder aka kdanilov108ac362017-01-19 20:17:16 +02007from collections import defaultdict
koder aka kdanilovcff7b2e2015-04-18 20:48:15 +03008
koder aka kdanilovffaf48d2016-12-27 02:25:29 +02009import numpy
koder aka kdanilov108ac362017-01-19 20:17:16 +020010import scipy.stats
koder aka kdanilova732a602017-02-01 20:29:56 +020011import matplotlib.pyplot as plt
koder aka kdanilovbe8f89f2015-04-28 14:51:51 +030012
koder aka kdanilov108ac362017-01-19 20:17:16 +020013import wally
koder aka kdanilovffaf48d2016-12-27 02:25:29 +020014
koder aka kdanilov108ac362017-01-19 20:17:16 +020015from . import html
koder aka kdanilov39e449e2016-12-17 15:15:26 +020016from .stage import Stage, StepOrder
17from .test_run_class import TestRun
koder aka kdanilov108ac362017-01-19 20:17:16 +020018from .hlstorage import ResultStorage
19from .node_interfaces import NodeInfo
koder aka kdanilova732a602017-02-01 20:29:56 +020020from .utils import b2ssize, b2ssize_10, STORAGE_ROLES
21from .statistic import (calc_norm_stat_props, calc_histo_stat_props, moving_average, moving_dev,
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +030022 hist_outliers_perc, ts_hist_outliers_perc, find_ouliers_ts, approximate_curve)
koder aka kdanilova732a602017-02-01 20:29:56 +020023from .result_classes import (StatProps, DataSource, TimeSeries, NormStatProps, HistoStatProps, SuiteConfig,
24 IResultStorage)
koder aka kdanilov108ac362017-01-19 20:17:16 +020025from .suits.io.fio_hist import get_lat_vals, expected_lat_bins
26from .suits.io.fio import FioTest, FioJobConfig
koder aka kdanilova732a602017-02-01 20:29:56 +020027from .suits.io.fio_job import FioJobParams
28from .suits.job import JobConfig
koder aka kdanilovcff7b2e2015-04-18 20:48:15 +030029
koder aka kdanilov4a510ee2015-04-21 18:50:42 +030030
koder aka kdanilov962ee5f2016-12-19 02:40:08 +020031logger = logging.getLogger("wally")
koder aka kdanilova047e1b2015-04-21 23:16:59 +030032
33
koder aka kdanilov108ac362017-01-19 20:17:16 +020034# ---------------- CONSTS ---------------------------------------------------------------------------------------------
koder aka kdanilov39e449e2016-12-17 15:15:26 +020035
koder aka kdanilov7f59d562016-12-26 01:34:23 +020036
koder aka kdanilov108ac362017-01-19 20:17:16 +020037DEBUG = False
38LARGE_BLOCKS = 256
39MiB2KiB = 1024
40MS2S = 1000
koder aka kdanilov39e449e2016-12-17 15:15:26 +020041
koder aka kdanilov39e449e2016-12-17 15:15:26 +020042
koder aka kdanilov108ac362017-01-19 20:17:16 +020043# ---------------- PROFILES ------------------------------------------------------------------------------------------
44
45
koder aka kdanilova732a602017-02-01 20:29:56 +020046# this is default values, real values is loaded from config
47
koder aka kdanilov108ac362017-01-19 20:17:16 +020048class ColorProfile:
49 primary_color = 'b'
50 suppl_color1 = 'teal'
51 suppl_color2 = 'magenta'
koder aka kdanilova732a602017-02-01 20:29:56 +020052 suppl_color3 = 'orange'
koder aka kdanilov108ac362017-01-19 20:17:16 +020053 box_color = 'y'
koder aka kdanilova732a602017-02-01 20:29:56 +020054 err_color = 'red'
koder aka kdanilov108ac362017-01-19 20:17:16 +020055
56 noise_alpha = 0.3
57 subinfo_alpha = 0.7
58
koder aka kdanilova732a602017-02-01 20:29:56 +020059 imshow_colormap = None # type: str
60
koder aka kdanilov108ac362017-01-19 20:17:16 +020061
62class StyleProfile:
63 grid = True
64 tide_layout = True
65 hist_boxes = 10
koder aka kdanilova732a602017-02-01 20:29:56 +020066 hist_lat_boxes = 25
67 hm_hist_bins_count = 25
koder aka kdanilov108ac362017-01-19 20:17:16 +020068 min_points_for_dev = 5
69
70 dev_range_x = 2.0
71 dev_perc = 95
72
koder aka kdanilova732a602017-02-01 20:29:56 +020073 point_shape = 'o'
74 err_point_shape = '*'
koder aka kdanilov108ac362017-01-19 20:17:16 +020075
koder aka kdanilova732a602017-02-01 20:29:56 +020076 avg_range = 20
77 approx_average = True
78
79 curve_approx_level = 6
koder aka kdanilov108ac362017-01-19 20:17:16 +020080 curve_approx_points = 100
81 assert avg_range >= min_points_for_dev
82
koder aka kdanilova732a602017-02-01 20:29:56 +020083 # figure size in inches
84 figsize = (10, 6)
85
koder aka kdanilov108ac362017-01-19 20:17:16 +020086 extra_io_spine = True
87
88 legend_for_eng = True
koder aka kdanilova732a602017-02-01 20:29:56 +020089 heatmap_interpolation = '1d'
90 heatmap_interpolation_points = 300
91 outliers_q_nd = 3.0
92 outliers_hide_q_nd = 4.0
93 outliers_lat = (0.01, 0.995)
94
95 violin_instead_of_box = True
96 violin_point_count = 30000
97
98 heatmap_colorbar = False
99
100 min_iops_vs_qd_jobs = 3
koder aka kdanilov108ac362017-01-19 20:17:16 +0200101
102 units = {
103 'bw': ("MiBps", MiB2KiB, "bandwith"),
104 'iops': ("IOPS", 1, "iops"),
105 'lat': ("ms", 1, "latency")
106 }
107
108
109# ---------------- STRUCTS -------------------------------------------------------------------------------------------
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200110
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200111
112# TODO: need to be revised, have to user StatProps fields instead
113class StoragePerfSummary:
114 def __init__(self, name: str) -> None:
115 self.direct_iops_r_max = 0 # type: int
116 self.direct_iops_w_max = 0 # type: int
117
118 # 64 used instead of 4k to faster feed caches
119 self.direct_iops_w64_max = 0 # type: int
120
121 self.rws4k_10ms = 0 # type: int
122 self.rws4k_30ms = 0 # type: int
123 self.rws4k_100ms = 0 # type: int
124 self.bw_write_max = 0 # type: int
125 self.bw_read_max = 0 # type: int
126
127 self.bw = None # type: float
128 self.iops = None # type: float
129 self.lat = None # type: float
130 self.lat_50 = None # type: float
131 self.lat_95 = None # type: float
132
133
koder aka kdanilov108ac362017-01-19 20:17:16 +0200134class IOSummary:
135 def __init__(self,
136 qd: int,
137 block_size: int,
138 nodes_count:int,
139 bw: NormStatProps,
140 lat: HistoStatProps) -> None:
141
142 self.qd = qd
143 self.nodes_count = nodes_count
144 self.block_size = block_size
145
146 self.bw = bw
147 self.lat = lat
148
149
150# -------------- AGGREGATION AND STAT FUNCTIONS ----------------------------------------------------------------------
koder aka kdanilov108ac362017-01-19 20:17:16 +0200151
koder aka kdanilova732a602017-02-01 20:29:56 +0200152def make_iosum(rstorage: ResultStorage, suite: SuiteConfig, job: FioJobConfig) -> IOSummary:
koder aka kdanilov108ac362017-01-19 20:17:16 +0200153 lat = get_aggregated(rstorage, suite, job, "lat")
koder aka kdanilova732a602017-02-01 20:29:56 +0200154 bins_edges = numpy.array(get_lat_vals(lat.data.shape[1]), dtype='float32') / 1000
koder aka kdanilov108ac362017-01-19 20:17:16 +0200155 io = get_aggregated(rstorage, suite, job, "bw")
156
157 return IOSummary(job.qd,
158 nodes_count=len(suite.nodes_ids),
159 block_size=job.bsize,
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300160 lat=calc_histo_stat_props(lat, bins_edges, rebins_count=StyleProfile.hist_boxes),
koder aka kdanilov108ac362017-01-19 20:17:16 +0200161 bw=calc_norm_stat_props(io, StyleProfile.hist_boxes))
162
163#
164# def iter_io_results(rstorage: ResultStorage,
165# qds: List[int] = None,
166# op_types: List[str] = None,
167# sync_types: List[str] = None,
168# block_sizes: List[int] = None) -> Iterator[Tuple[TestSuiteConfig, FioJobConfig]]:
169#
170# for suite in rstorage.iter_suite(FioTest.name):
171# for job in rstorage.iter_job(suite):
172# fjob = cast(FioJobConfig, job)
173# assert int(fjob.vals['numjobs']) == 1
174#
175# if sync_types is not None and fjob.sync_mode in sync_types:
176# continue
177#
178# if block_sizes is not None and fjob.bsize not in block_sizes:
179# continue
180#
181# if op_types is not None and fjob.op_type not in op_types:
182# continue
183#
184# if qds is not None and fjob.qd not in qds:
185# continue
186#
187# yield suite, fjob
188
189
koder aka kdanilova732a602017-02-01 20:29:56 +0200190AGG_TAG = 'ALL'
191
192
193def get_aggregated(rstorage: ResultStorage, suite: SuiteConfig, job: FioJobConfig, metric: str) -> TimeSeries:
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300194 tss = list(rstorage.iter_ts(suite, job, metric=metric))
koder aka kdanilov108ac362017-01-19 20:17:16 +0200195 ds = DataSource(suite_id=suite.storage_id,
196 job_id=job.storage_id,
koder aka kdanilova732a602017-02-01 20:29:56 +0200197 node_id=AGG_TAG,
198 sensor='fio',
199 dev=AGG_TAG,
200 metric=metric,
201 tag='csv')
koder aka kdanilov108ac362017-01-19 20:17:16 +0200202
koder aka kdanilova732a602017-02-01 20:29:56 +0200203 agg_ts = TimeSeries(metric,
koder aka kdanilov108ac362017-01-19 20:17:16 +0200204 raw=None,
205 source=ds,
206 data=numpy.zeros(tss[0].data.shape, dtype=tss[0].data.dtype),
207 times=tss[0].times.copy(),
koder aka kdanilova732a602017-02-01 20:29:56 +0200208 units=tss[0].units)
koder aka kdanilov108ac362017-01-19 20:17:16 +0200209
210 for ts in tss:
koder aka kdanilova732a602017-02-01 20:29:56 +0200211 if metric == 'lat' and (len(ts.data.shape) != 2 or ts.data.shape[1] != expected_lat_bins):
koder aka kdanilov108ac362017-01-19 20:17:16 +0200212 logger.error("Sensor %s.%s on node %s has" +
koder aka kdanilova732a602017-02-01 20:29:56 +0200213 "shape=%s. Can only process sensors with shape=[X, %s].",
koder aka kdanilov108ac362017-01-19 20:17:16 +0200214 ts.source.dev, ts.source.sensor, ts.source.node_id,
koder aka kdanilova732a602017-02-01 20:29:56 +0200215 ts.data.shape, expected_lat_bins)
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300216 raise ValueError()
koder aka kdanilov108ac362017-01-19 20:17:16 +0200217
koder aka kdanilova732a602017-02-01 20:29:56 +0200218 if metric != 'lat' and len(ts.data.shape) != 1:
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300219 logger.error("Sensor %s.%s on node %s has " +
koder aka kdanilova732a602017-02-01 20:29:56 +0200220 "shape=%s. Can only process 1D sensors.",
221 ts.source.dev, ts.source.sensor, ts.source.node_id, ts.data.shape)
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300222 raise ValueError()
koder aka kdanilov108ac362017-01-19 20:17:16 +0200223
224 # TODO: match times on different ts
225 agg_ts.data += ts.data
226
227 return agg_ts
228
229
koder aka kdanilova732a602017-02-01 20:29:56 +0200230def is_sensor_numarray(sensor: str, metric: str) -> bool:
231 """Returns True if sensor provides one-dimension array of numeric values. One number per one measurement."""
232 return True
233
234
235LEVEL_SENSORS = {("block-io", "io_queue"),
236 ("system-cpu", "procs_blocked"),
237 ("system-cpu", "procs_queue")}
238
239
240def is_level_sensor(sensor: str, metric: str) -> bool:
241 """Returns True if sensor measure level of any kind, E.g. queue depth."""
242 return (sensor, metric) in LEVEL_SENSORS
243
244
245def is_delta_sensor(sensor: str, metric: str) -> bool:
246 """Returns True if sensor provides deltas for cumulative value. E.g. io completed in given period"""
247 return not is_level_sensor(sensor, metric)
248
249
250def get_sensor_for_time_range(storage: IResultStorage,
251 node_id: str,
252 sensor: str,
253 dev: str,
254 metric: str,
255 time_range: Tuple[int, int]) -> numpy.array:
256 """Return sensor values for given node for given period. Return per second estimated values array
257
258 Raise an error if required range is not full covered by data in storage.
259 First it finds range of results from sensor, which fully covers requested range.
260 ...."""
261
262 ds = DataSource(node_id=node_id, sensor=sensor, dev=dev, metric=metric)
263 sensor_data = storage.load_sensor(ds)
264 assert sensor_data.time_units == 'us'
265
266 # collected_at is array of pairs (collection_started_at, collection_finished_at)
267 # extract start time from each pair
268 collection_start_at = sensor_data.times[::2] # type: numpy.array
269
270 MICRO = 1000000
271
272 # convert seconds to us
273 begin = time_range[0] * MICRO
274 end = time_range[1] * MICRO
275
276 if begin < collection_start_at[0] or end > collection_start_at[-1] or end <= begin:
277 raise AssertionError(("Incorrect data for get_sensor - time_range={!r}, collected_at=[{}, ..., {}]," +
278 "sensor = {}_{}.{}.{}").format(time_range,
279 sensor_data.times[0] // MICRO,
280 sensor_data.times[-1] // MICRO,
281 node_id, sensor, dev, metric))
282
283 pos1, pos2 = numpy.searchsorted(collection_start_at, (begin, end))
284
285 # current real data time chunk begin time
286 edge_it = iter(collection_start_at[pos1 - 1: pos2 + 1])
287
288 # current real data value
289 val_it = iter(sensor_data.data[pos1 - 1: pos2 + 1])
290
291 # result array, cumulative value per second
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300292 result = numpy.zeros(int(end - begin) // MICRO)
koder aka kdanilova732a602017-02-01 20:29:56 +0200293 idx = 0
294 curr_summ = 0
295
296 # end of current time slot
297 results_cell_ends = begin + MICRO
298
299 # hack to unify looping
300 real_data_end = next(edge_it)
301 while results_cell_ends <= end:
302 real_data_start = real_data_end
303 real_data_end = next(edge_it)
304 real_val_left = next(val_it)
305
306 # real data "speed" for interval [real_data_start, real_data_end]
307 real_val_ps = float(real_val_left) / (real_data_end - real_data_start)
308
309 while real_data_end >= results_cell_ends and results_cell_ends <= end:
310 # part of current real value, which is fit into current result cell
311 curr_real_chunk = int((results_cell_ends - real_data_start) * real_val_ps)
312
313 # calculate rest of real data for next result cell
314 real_val_left -= curr_real_chunk
315 result[idx] = curr_summ + curr_real_chunk
316 idx += 1
317 curr_summ = 0
318
319 # adjust real data start time
320 real_data_start = results_cell_ends
321 results_cell_ends += MICRO
322
323 # don't lost any real data
324 curr_summ += real_val_left
325
326 return result
327
328
koder aka kdanilov108ac362017-01-19 20:17:16 +0200329# -------------- PLOT HELPERS FUNCTIONS ------------------------------------------------------------------------------
330
koder aka kdanilova732a602017-02-01 20:29:56 +0200331def get_emb_data_svg(plt: Any, format: str = 'svg') -> bytes:
koder aka kdanilov108ac362017-01-19 20:17:16 +0200332 bio = BytesIO()
koder aka kdanilova732a602017-02-01 20:29:56 +0200333 if format in ('png', 'jpg'):
334 plt.savefig(bio, format=format)
335 return bio.getvalue()
336 elif format == 'svg':
337 plt.savefig(bio, format='svg')
338 img_start = "<!-- Created with matplotlib (http://matplotlib.org/) -->"
339 return bio.getvalue().decode("utf8").split(img_start, 1)[1].encode("utf8")
koder aka kdanilov108ac362017-01-19 20:17:16 +0200340
341
342def provide_plot(func: Callable[..., None]) -> Callable[..., str]:
343 @wraps(func)
koder aka kdanilova732a602017-02-01 20:29:56 +0200344 def closure1(storage: ResultStorage,
345 path: DataSource,
346 *args, **kwargs) -> str:
koder aka kdanilov108ac362017-01-19 20:17:16 +0200347 fpath = storage.check_plot_file(path)
348 if not fpath:
koder aka kdanilova732a602017-02-01 20:29:56 +0200349 format = path.tag.split(".")[-1]
350
351 plt.figure(figsize=StyleProfile.figsize)
352 plt.subplots_adjust(right=0.66)
353
koder aka kdanilov108ac362017-01-19 20:17:16 +0200354 func(*args, **kwargs)
koder aka kdanilova732a602017-02-01 20:29:56 +0200355 fpath = storage.put_plot_file(get_emb_data_svg(plt, format=format), path)
356 logger.debug("Plot %s saved to %r", path, fpath)
koder aka kdanilov108ac362017-01-19 20:17:16 +0200357 plt.clf()
koder aka kdanilova732a602017-02-01 20:29:56 +0200358 plt.close('all')
koder aka kdanilov108ac362017-01-19 20:17:16 +0200359 return fpath
360 return closure1
361
362
363def apply_style(style: StyleProfile, eng: bool = True, no_legend: bool = False) -> None:
364 if style.grid:
365 plt.grid(True)
366
367 if (style.legend_for_eng or not eng) and not no_legend:
368 legend_location = "center left"
369 legend_bbox_to_anchor = (1.03, 0.81)
370 plt.legend(loc=legend_location, bbox_to_anchor=legend_bbox_to_anchor)
371
372
373# -------------- PLOT FUNCTIONS --------------------------------------------------------------------------------------
374
375
376@provide_plot
377def plot_hist(title: str, units: str,
378 prop: StatProps,
379 colors: Any = ColorProfile,
380 style: Any = StyleProfile) -> None:
381
382 # TODO: unit should came from ts
koder aka kdanilova732a602017-02-01 20:29:56 +0200383 normed_bins = prop.bins_populations / prop.bins_populations.sum()
384 bar_width = prop.bins_edges[1] - prop.bins_edges[0]
385 plt.bar(prop.bins_edges, normed_bins, color=colors.box_color, width=bar_width, label="Real data")
koder aka kdanilov108ac362017-01-19 20:17:16 +0200386
387 plt.xlabel(units)
388 plt.ylabel("Value probability")
389 plt.title(title)
390
391 dist_plotted = False
392 if isinstance(prop, NormStatProps):
393 nprop = cast(NormStatProps, prop)
394 stats = scipy.stats.norm(nprop.average, nprop.deviation)
395
koder aka kdanilova732a602017-02-01 20:29:56 +0200396 new_edges, step = numpy.linspace(prop.bins_edges[0], prop.bins_edges[-1],
397 len(prop.bins_edges) * 10, retstep=True)
koder aka kdanilov108ac362017-01-19 20:17:16 +0200398
koder aka kdanilova732a602017-02-01 20:29:56 +0200399 ypoints = stats.cdf(new_edges) * 11
koder aka kdanilov108ac362017-01-19 20:17:16 +0200400 ypoints = [next - prev for (next, prev) in zip(ypoints[1:], ypoints[:-1])]
koder aka kdanilova732a602017-02-01 20:29:56 +0200401 xpoints = (new_edges[1:] + new_edges[:-1]) / 2
koder aka kdanilov108ac362017-01-19 20:17:16 +0200402
koder aka kdanilova732a602017-02-01 20:29:56 +0200403 plt.plot(xpoints, ypoints, color=colors.primary_color, label="Expected from\nnormal\ndistribution")
koder aka kdanilov108ac362017-01-19 20:17:16 +0200404 dist_plotted = True
405
koder aka kdanilova732a602017-02-01 20:29:56 +0200406 plt.gca().set_xlim(left=prop.bins_edges[0])
407 if prop.log_bins:
408 plt.xscale('log')
409
koder aka kdanilov108ac362017-01-19 20:17:16 +0200410 apply_style(style, eng=True, no_legend=not dist_plotted)
411
412
413@provide_plot
414def plot_v_over_time(title: str, units: str,
415 ts: TimeSeries,
416 plot_avg_dev: bool = True,
417 colors: Any = ColorProfile, style: Any = StyleProfile) -> None:
418
419 min_time = min(ts.times)
420
421 # /1000 is us to ms conversion
koder aka kdanilova732a602017-02-01 20:29:56 +0200422 time_points = numpy.array([(val_time - min_time) / 1000 for val_time in ts.times])
423
424 outliers_idxs = find_ouliers_ts(ts.data, cut_range=style.outliers_q_nd)
425 outliers_4q_idxs = find_ouliers_ts(ts.data, cut_range=style.outliers_hide_q_nd)
426 normal_idxs = numpy.logical_not(outliers_idxs)
427 outliers_idxs = outliers_idxs & numpy.logical_not(outliers_4q_idxs)
428 hidden_outliers_count = numpy.count_nonzero(outliers_4q_idxs)
429
430 data = ts.data[normal_idxs]
431 data_times = time_points[normal_idxs]
432 outliers = ts.data[outliers_idxs]
433 outliers_times = time_points[outliers_idxs]
koder aka kdanilov108ac362017-01-19 20:17:16 +0200434
435 alpha = colors.noise_alpha if plot_avg_dev else 1.0
koder aka kdanilova732a602017-02-01 20:29:56 +0200436 plt.plot(data_times, data, style.point_shape,
437 color=colors.primary_color, alpha=alpha, label="Data")
438 plt.plot(outliers_times, outliers, style.err_point_shape,
439 color=colors.err_color, label="Outliers")
koder aka kdanilov108ac362017-01-19 20:17:16 +0200440
koder aka kdanilova732a602017-02-01 20:29:56 +0200441 has_negative_dev = False
442 plus_minus = "\xb1"
koder aka kdanilov108ac362017-01-19 20:17:16 +0200443
koder aka kdanilova732a602017-02-01 20:29:56 +0200444 if plot_avg_dev and len(data) < style.avg_range * 2:
445 logger.warning("Array %r to small to plot average over %s points", title, style.avg_range)
446 elif plot_avg_dev:
447 avg_vals = moving_average(data, style.avg_range)
448 dev_vals = moving_dev(data, style.avg_range)
449 avg_times = moving_average(data_times, style.avg_range)
koder aka kdanilov108ac362017-01-19 20:17:16 +0200450
koder aka kdanilova732a602017-02-01 20:29:56 +0200451 if style.approx_average:
452 avg_vals = approximate_curve(avg_times, avg_vals, avg_times, style.curve_approx_level)
453 dev_vals = approximate_curve(avg_times, dev_vals, avg_times, style.curve_approx_level)
koder aka kdanilov108ac362017-01-19 20:17:16 +0200454
koder aka kdanilova732a602017-02-01 20:29:56 +0200455 plt.plot(avg_times, avg_vals, c=colors.suppl_color1, label="Average")
koder aka kdanilov108ac362017-01-19 20:17:16 +0200456
koder aka kdanilova732a602017-02-01 20:29:56 +0200457 low_vals_dev = avg_vals - dev_vals * style.dev_range_x
458 hight_vals_dev = avg_vals + dev_vals * style.dev_range_x
459 if style.dev_range_x - int(style.dev_range_x) < 0.01:
460 plt.plot(avg_times, low_vals_dev, c=colors.suppl_color2,
461 label="{}{}*stdev".format(plus_minus, int(style.dev_range_x)))
462 else:
463 plt.plot(avg_times, low_vals_dev, c=colors.suppl_color2,
464 label="{}{}*stdev".format(plus_minus, style.dev_range_x))
465 plt.plot(avg_times, hight_vals_dev, c=colors.suppl_color2)
466 has_negative_dev = low_vals_dev.min() < 0
koder aka kdanilov108ac362017-01-19 20:17:16 +0200467
468 plt.xlim(-5, max(time_points) + 5)
koder aka kdanilov108ac362017-01-19 20:17:16 +0200469 plt.xlabel("Time, seconds from test begin")
koder aka kdanilova732a602017-02-01 20:29:56 +0200470 plt.ylabel("{}. Average and {}stddev over {} points".format(units, plus_minus, style.avg_range))
koder aka kdanilov108ac362017-01-19 20:17:16 +0200471 plt.title(title)
koder aka kdanilova732a602017-02-01 20:29:56 +0200472
473 if has_negative_dev:
474 plt.gca().set_ylim(bottom=0)
475
koder aka kdanilov108ac362017-01-19 20:17:16 +0200476 apply_style(style, eng=True)
477
478
479@provide_plot
480def plot_lat_over_time(title: str, ts: TimeSeries, bins_vals: List[int], samples: int = 5,
koder aka kdanilova732a602017-02-01 20:29:56 +0200481 colors: Any = ColorProfile,
482 style: Any = StyleProfile) -> None:
koder aka kdanilov108ac362017-01-19 20:17:16 +0200483
484 min_time = min(ts.times)
485 times = [int(tm - min_time + 500) // 1000 for tm in ts.times]
486 ts_len = len(times)
487 step = ts_len / samples
488 points = [times[int(i * step + 0.5)] for i in range(samples)]
489 points.append(times[-1])
490 bounds = list(zip(points[:-1], points[1:]))
koder aka kdanilov108ac362017-01-19 20:17:16 +0200491 agg_data = []
492 positions = []
493 labels = []
494
koder aka kdanilov108ac362017-01-19 20:17:16 +0200495 for begin, end in bounds:
koder aka kdanilova732a602017-02-01 20:29:56 +0200496 agg_hist = ts.data[begin:end].sum(axis=0)
497
498 if style.violin_instead_of_box:
499 # cut outliers
500 idx1, idx2 = hist_outliers_perc(agg_hist, style.outliers_lat)
501 agg_hist = agg_hist[idx1:idx2]
502 curr_bins_vals = bins_vals[idx1:idx2]
503
504 correct_coef = style.violin_point_count / sum(agg_hist)
505 if correct_coef > 1:
506 correct_coef = 1
507 else:
508 curr_bins_vals = bins_vals
509 correct_coef = 1
koder aka kdanilov108ac362017-01-19 20:17:16 +0200510
511 vals = numpy.empty(shape=(numpy.sum(agg_hist),), dtype='float32')
512 cidx = 0
koder aka kdanilov108ac362017-01-19 20:17:16 +0200513
koder aka kdanilova732a602017-02-01 20:29:56 +0200514 non_zero, = agg_hist.nonzero()
koder aka kdanilov108ac362017-01-19 20:17:16 +0200515 for pos in non_zero:
koder aka kdanilova732a602017-02-01 20:29:56 +0200516 count = int(agg_hist[pos] * correct_coef + 0.5)
koder aka kdanilov108ac362017-01-19 20:17:16 +0200517
koder aka kdanilova732a602017-02-01 20:29:56 +0200518 if count != 0:
519 vals[cidx: cidx + count] = curr_bins_vals[pos]
520 cidx += count
521
522 agg_data.append(vals[:cidx])
koder aka kdanilov108ac362017-01-19 20:17:16 +0200523 positions.append((end + begin) / 2)
524 labels.append(str((end + begin) // 2))
525
koder aka kdanilova732a602017-02-01 20:29:56 +0200526 if style.violin_instead_of_box:
527 patches = plt.violinplot(agg_data,
528 positions=positions,
529 showmeans=True,
530 showmedians=True,
531 widths=step / 2)
koder aka kdanilov108ac362017-01-19 20:17:16 +0200532
koder aka kdanilova732a602017-02-01 20:29:56 +0200533 patches['cmeans'].set_color("blue")
534 patches['cmedians'].set_color("green")
535 if style.legend_for_eng:
536 legend_location = "center left"
537 legend_bbox_to_anchor = (1.03, 0.81)
538 plt.legend([patches['cmeans'], patches['cmedians']], ["mean", "median"],
539 loc=legend_location, bbox_to_anchor=legend_bbox_to_anchor)
540 else:
541 plt.boxplot(agg_data, 0, '', positions=positions, labels=labels, widths=step / 4)
koder aka kdanilov108ac362017-01-19 20:17:16 +0200542
koder aka kdanilov108ac362017-01-19 20:17:16 +0200543 plt.xlim(min(times), max(times))
koder aka kdanilov108ac362017-01-19 20:17:16 +0200544 plt.xlabel("Time, seconds from test begin, sampled for ~{} seconds".format(int(step)))
545 plt.ylabel("Latency, ms")
546 plt.title(title)
547 apply_style(style, eng=True, no_legend=True)
548
549
550@provide_plot
koder aka kdanilova732a602017-02-01 20:29:56 +0200551def plot_heatmap(title: str,
552 ts: TimeSeries,
553 bins_vals: List[int],
554 colors: Any = ColorProfile,
555 style: Any = StyleProfile) -> None:
koder aka kdanilov108ac362017-01-19 20:17:16 +0200556
koder aka kdanilova732a602017-02-01 20:29:56 +0200557 assert len(ts.data.shape) == 2
558 assert ts.data.shape[1] == len(bins_vals)
559
560 total_hist = ts.data.sum(axis=0)
561
562 # idx1, idx2 = hist_outliers_perc(total_hist, style.outliers_lat)
563 idx1, idx2 = ts_hist_outliers_perc(ts.data, bounds_perc=style.outliers_lat)
564
565 # don't cut too many bins
566 min_bins_left = style.hm_hist_bins_count
567 if idx2 - idx1 < min_bins_left:
568 missed = min_bins_left - (idx2 - idx1) // 2
569 idx2 = min(len(total_hist), idx2 + missed)
570 idx1 = max(0, idx1 - missed)
571
572 data = ts.data[:, idx1:idx2]
573 bins_vals = bins_vals[idx1:idx2]
574
575 # don't using rebin_histogram here, as we need apply same bins for many arrays
576 step = (bins_vals[-1] - bins_vals[0]) / style.hm_hist_bins_count
577 new_bins_edges = numpy.arange(style.hm_hist_bins_count) * step + bins_vals[0]
578 bin_mapping = numpy.clip(numpy.searchsorted(new_bins_edges, bins_vals) - 1, 0, len(new_bins_edges) - 1)
579
580 # map origin bins ranges to heatmap bins, iterate over rows
581 cmap = []
582 for line in data:
583 curr_bins = [0] * style.hm_hist_bins_count
584 for idx, count in zip(bin_mapping, line):
585 curr_bins[idx] += count
586 cmap.append(curr_bins)
587 ncmap = numpy.array(cmap)
588
589 xmin = 0
590 xmax = (ts.times[-1] - ts.times[0]) / 1000 + 1
591 ymin = new_bins_edges[0]
592 ymax = new_bins_edges[-1]
593
594 fig, ax = plt.subplots(figsize=style.figsize)
595
596 if style.heatmap_interpolation == '1d':
597 interpolation = 'none'
598 res = []
599 for column in ncmap:
600 new_x = numpy.linspace(0, len(column), style.heatmap_interpolation_points)
601 old_x = numpy.arange(len(column)) + 0.5
602 new_vals = numpy.interp(new_x, old_x, column)
603 res.append(new_vals)
604 ncmap = numpy.array(res)
605 else:
606 interpolation = style.heatmap_interpolation
607
608 ax.imshow(ncmap[:,::-1].T,
609 interpolation=interpolation,
610 extent=(xmin, xmax, ymin, ymax),
611 cmap=colors.imshow_colormap)
612
613 ax.set_aspect((xmax - xmin) / (ymax - ymin) * (6 / 9))
614 ax.set_ylabel("Latency, ms")
615 ax.set_xlabel("Test time, s")
616
617 plt.title(title)
618
koder aka kdanilov108ac362017-01-19 20:17:16 +0200619
620@provide_plot
621def io_chart(title: str,
622 legend: str,
623 iosums: List[IOSummary],
624 iops_log_spine: bool = False,
625 lat_log_spine: bool = False,
626 colors: Any = ColorProfile,
627 style: Any = StyleProfile) -> None:
628
629 # -------------- MAGIC VALUES ---------------------
630 # IOPS bar width
631 width = 0.35
632
633 # offset from center of bar to deviation/confidence range indicator
634 err_x_offset = 0.05
635
koder aka kdanilov108ac362017-01-19 20:17:16 +0200636 # extra space on top and bottom, comparing to maximal tight layout
637 extra_y_space = 0.05
638
639 # additional spine for BW/IOPS on left side of plot
640 extra_io_spine_x_offset = -0.1
641
642 # extra space on left and right sides
643 extra_x_space = 0.5
644
645 # legend location settings
646 legend_location = "center left"
647 legend_bbox_to_anchor = (1.1, 0.81)
648
649 # plot box size adjust (only plot, not spines and legend)
650 plot_box_adjust = {'right': 0.66}
651 # -------------- END OF MAGIC VALUES ---------------------
652
653 block_size = iosums[0].block_size
654 lc = len(iosums)
655 xt = list(range(1, lc + 1))
656
657 # x coordinate of middle of the bars
658 xpos = [i - width / 2 for i in xt]
659
660 # import matplotlib.gridspec as gridspec
661 # gs = gridspec.GridSpec(1, 3, width_ratios=[1, 4, 1])
662 # p1 = plt.subplot(gs[1])
663
koder aka kdanilova732a602017-02-01 20:29:56 +0200664 fig, p1 = plt.subplots(figsize=StyleProfile.figsize)
koder aka kdanilov108ac362017-01-19 20:17:16 +0200665
666 # plot IOPS/BW bars
667 if block_size >= LARGE_BLOCKS:
668 iops_primary = False
669 coef = MiB2KiB
670 p1.set_ylabel("BW (MiBps)")
671 else:
672 iops_primary = True
673 coef = block_size
674 p1.set_ylabel("IOPS")
675
676 p1.bar(xpos, [iosum.bw.average / coef for iosum in iosums], width=width, color=colors.box_color, label=legend)
677
678 # set correct x limits for primary IO spine
679 min_io = min(iosum.bw.average - iosum.bw.deviation * style.dev_range_x for iosum in iosums)
680 max_io = max(iosum.bw.average + iosum.bw.deviation * style.dev_range_x for iosum in iosums)
681 border = (max_io - min_io) * extra_y_space
682 io_lims = (min_io - border, max_io + border)
683
684 p1.set_ylim(io_lims[0] / coef, io_lims[-1] / coef)
685
686 # plot deviation and confidence error ranges
687 err1_legend = err2_legend = None
688 for pos, iosum in zip(xpos, iosums):
689 err1_legend = p1.errorbar(pos + width / 2 - err_x_offset,
690 iosum.bw.average / coef,
691 iosum.bw.deviation * style.dev_range_x / coef,
692 alpha=colors.subinfo_alpha,
693 color=colors.suppl_color1) # 'magenta'
694 err2_legend = p1.errorbar(pos + width / 2 + err_x_offset,
695 iosum.bw.average / coef,
696 iosum.bw.confidence / coef,
697 alpha=colors.subinfo_alpha,
698 color=colors.suppl_color2) # 'teal'
699
700 if style.grid:
701 p1.grid(True)
702
703 handles1, labels1 = p1.get_legend_handles_labels()
704
705 handles1 += [err1_legend, err2_legend]
706 labels1 += ["{}% dev".format(style.dev_perc),
707 "{}% conf".format(int(100 * iosums[0].bw.confidence_level))]
708
709 # extra y spine for latency on right side
710 p2 = p1.twinx()
711
712 # plot median and 95 perc latency
713 p2.plot(xt, [iosum.lat.perc_50 for iosum in iosums], label="lat med")
714 p2.plot(xt, [iosum.lat.perc_95 for iosum in iosums], label="lat 95%")
715
716 # limit and label x spine
717 plt.xlim(extra_x_space, lc + extra_x_space)
718 plt.xticks(xt, ["{0} * {1}".format(iosum.qd, iosum.nodes_count) for iosum in iosums])
719 p1.set_xlabel("QD * Test node count")
720
721 # apply log scales for X spines, if set
722 if iops_log_spine:
723 p1.set_yscale('log')
724
725 if lat_log_spine:
726 p2.set_yscale('log')
727
728 # extra y spine for BW/IOPS on left side
729 if style.extra_io_spine:
730 p3 = p1.twinx()
731 if iops_log_spine:
732 p3.set_yscale('log')
733
734 if iops_primary:
735 p3.set_ylabel("BW (MiBps)")
736 p3.set_ylim(io_lims[0] / MiB2KiB, io_lims[1] / MiB2KiB)
737 else:
738 p3.set_ylabel("IOPS")
739 p3.set_ylim(io_lims[0] / block_size, io_lims[1] / block_size)
740
741 p3.spines["left"].set_position(("axes", extra_io_spine_x_offset))
742 p3.spines["left"].set_visible(True)
743 p3.yaxis.set_label_position('left')
744 p3.yaxis.set_ticks_position('left')
745
746 p2.set_ylabel("Latency (ms)")
747
748 plt.title(title)
749
750 # legend box
751 handles2, labels2 = p2.get_legend_handles_labels()
koder aka kdanilova732a602017-02-01 20:29:56 +0200752 plt.legend(handles1 + handles2, labels1 + labels2,
753 loc=legend_location,
754 bbox_to_anchor=legend_bbox_to_anchor)
koder aka kdanilov108ac362017-01-19 20:17:16 +0200755
756 # adjust central box size to fit legend
757 plt.subplots_adjust(**plot_box_adjust)
758 apply_style(style, eng=False, no_legend=True)
759
760
761# -------------------- REPORT HELPERS --------------------------------------------------------------------------------
762
763
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200764class HTMLBlock:
765 data = None # type: str
766 js_links = [] # type: List[str]
767 css_links = [] # type: List[str]
koder aka kdanilova732a602017-02-01 20:29:56 +0200768 order_attr = None # type: Any
769
770 def __init__(self, data: str, order_attr: Any = None) -> None:
771 self.data = data
772 self.order_attr = order_attr
773
774 def __eq__(self, o: object) -> bool:
775 return o.order_attr == self.order_attr # type: ignore
776
777 def __lt__(self, o: object) -> bool:
778 return o.order_attr > self.order_attr # type: ignore
779
780
781class Table:
782 def __init__(self, header: List[str]) -> None:
783 self.header = header
784 self.data = []
785
786 def add_line(self, values: List[str]) -> None:
787 self.data.append(values)
788
789 def html(self):
790 return html.table("", self.header, self.data)
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200791
792
koder aka kdanilov108ac362017-01-19 20:17:16 +0200793class Menu1st:
794 engineering = "Engineering"
795 summary = "Summary"
koder aka kdanilova732a602017-02-01 20:29:56 +0200796 per_job = "Per Job"
koder aka kdanilov108ac362017-01-19 20:17:16 +0200797
798
799class Menu2ndEng:
800 iops_time = "IOPS(time)"
801 hist = "IOPS/lat overall histogram"
802 lat_time = "Lat(time)"
803
804
805class Menu2ndSumm:
806 io_lat_qd = "IO & Lat vs QD"
807
808
koder aka kdanilova732a602017-02-01 20:29:56 +0200809menu_1st_order = [Menu1st.summary, Menu1st.engineering, Menu1st.per_job]
koder aka kdanilov108ac362017-01-19 20:17:16 +0200810
811
812# -------------------- REPORTS --------------------------------------------------------------------------------------
813
814
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200815class Reporter(metaclass=abc.ABCMeta):
koder aka kdanilova732a602017-02-01 20:29:56 +0200816 suite_types = set() # type: Set[str]
817
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200818 @abc.abstractmethod
koder aka kdanilova732a602017-02-01 20:29:56 +0200819 def get_divs(self, suite: SuiteConfig, storage: ResultStorage) -> Iterator[Tuple[str, str, HTMLBlock]]:
820 pass
821
822
823class JobReporter(metaclass=abc.ABCMeta):
824 suite_type = set() # type: Set[str]
825
826 @abc.abstractmethod
827 def get_divs(self,
828 suite: SuiteConfig,
829 job: JobConfig,
830 storage: ResultStorage) -> Iterator[Tuple[str, str, HTMLBlock]]:
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200831 pass
832
833
834# Main performance report
835class PerformanceSummary(Reporter):
koder aka kdanilova732a602017-02-01 20:29:56 +0200836 """Aggregated summary fro storage"""
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200837
838
839# Main performance report
koder aka kdanilov108ac362017-01-19 20:17:16 +0200840class IO_QD(Reporter):
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200841 """Creates graph, which show how IOPS and Latency depend on QD"""
koder aka kdanilova732a602017-02-01 20:29:56 +0200842 suite_types = {'fio'}
843
844 def get_divs(self, suite: SuiteConfig, rstorage: ResultStorage) -> Iterator[Tuple[str, str, HTMLBlock]]:
845 ts_map = defaultdict(list) # type: Dict[FioJobParams, List[Tuple[SuiteConfig, FioJobConfig]]]
846 str_summary = {} # type: Dict[FioJobParams, List[IOSummary]]
koder aka kdanilov108ac362017-01-19 20:17:16 +0200847 for job in rstorage.iter_job(suite):
848 fjob = cast(FioJobConfig, job)
koder aka kdanilova732a602017-02-01 20:29:56 +0200849 fjob_no_qd = cast(FioJobParams, fjob.params.copy(qd=None))
850 str_summary[fjob_no_qd] = (fjob_no_qd.summary, fjob_no_qd.long_summary)
851 ts_map[fjob_no_qd].append((suite, fjob))
koder aka kdanilov108ac362017-01-19 20:17:16 +0200852
koder aka kdanilova732a602017-02-01 20:29:56 +0200853 for tpl, suites_jobs in ts_map.items():
854 if len(suites_jobs) > StyleProfile.min_iops_vs_qd_jobs:
855 iosums = [make_iosum(rstorage, suite, job) for suite, job in suites_jobs]
856 iosums.sort(key=lambda x: x.qd)
857 summary, summary_long = str_summary[tpl]
858 ds = DataSource(suite_id=suite.storage_id,
859 job_id=summary,
860 node_id=AGG_TAG,
861 sensor="fio",
862 dev=AGG_TAG,
863 metric="io_over_qd",
864 tag="svg")
koder aka kdanilov108ac362017-01-19 20:17:16 +0200865
koder aka kdanilova732a602017-02-01 20:29:56 +0200866 title = "IOPS, BW, Lat vs. QD.\n" + summary_long
867 fpath = io_chart(rstorage, ds, title=title, legend="IOPS/BW", iosums=iosums) # type: str
868 yield Menu1st.summary, Menu2ndSumm.io_lat_qd, HTMLBlock(html.img(fpath))
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200869
870
871# Linearization report
872class IOPS_Bsize(Reporter):
873 """Creates graphs, which show how IOPS and Latency depend on block size"""
874
875
koder aka kdanilova732a602017-02-01 20:29:56 +0200876def summ_sensors(rstorage: ResultStorage,
877 nodes: List[str],
878 sensor: str,
879 metric: str,
880 time_range: Tuple[int, int]) -> Optional[numpy.array]:
881
882 res = None # type: Optional[numpy.array]
883 for node_id in nodes:
884 for _, groups in rstorage.iter_sensors(node_id=node_id, sensor=sensor, metric=metric):
885 data = get_sensor_for_time_range(rstorage,
886 node_id=node_id,
887 sensor=sensor,
888 dev=groups['dev'],
889 metric=metric,
890 time_range=time_range)
891 if res is None:
892 res = data
893 else:
894 res += data
895 return res
896
897
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200898# IOPS/latency distribution
koder aka kdanilova732a602017-02-01 20:29:56 +0200899class StatInfo(JobReporter):
900 """Statistic info for job results"""
901 suite_types = {'fio'}
902
903 def get_divs(self, suite: SuiteConfig, job: JobConfig,
904 rstorage: ResultStorage) -> Iterator[Tuple[str, str, HTMLBlock]]:
905
906 fjob = cast(FioJobConfig, job)
907 io_sum = make_iosum(rstorage, suite, fjob)
908
909 summary_data = [
910 ["Summary", job.params.long_summary],
911 ]
912
913 res = html.H2(html.center("Test summary"))
914 res += html.table("Test info", None, summary_data)
915 stat_data_headers = ["Name", "Average ~ Dev", "Conf interval", "Mediana", "Mode", "Kurt / Skew", "95%", "99%"]
916
917 KB = 1024
918 bw_data = ["Bandwidth",
919 "{}Bps ~ {}Bps".format(b2ssize(io_sum.bw.average * KB), b2ssize(io_sum.bw.deviation * KB)),
920 b2ssize(io_sum.bw.confidence * KB) + "Bps",
921 b2ssize(io_sum.bw.perc_50 * KB) + "Bps",
922 "-",
923 "{:.2f} / {:.2f}".format(io_sum.bw.kurt, io_sum.bw.skew),
924 b2ssize(io_sum.bw.perc_5 * KB) + "Bps",
925 b2ssize(io_sum.bw.perc_1 * KB) + "Bps"]
926
927 iops_data = ["IOPS",
928 "{}IOPS ~ {}IOPS".format(b2ssize_10(io_sum.bw.average / fjob.bsize),
929 b2ssize_10(io_sum.bw.deviation / fjob.bsize)),
930 b2ssize_10(io_sum.bw.confidence / fjob.bsize) + "IOPS",
931 b2ssize_10(io_sum.bw.perc_50 / fjob.bsize) + "IOPS",
932 "-",
933 "{:.2f} / {:.2f}".format(io_sum.bw.kurt, io_sum.bw.skew),
934 b2ssize_10(io_sum.bw.perc_5 / fjob.bsize) + "IOPS",
935 b2ssize_10(io_sum.bw.perc_1 / fjob.bsize) + "IOPS"]
936
937 MICRO = 1000000
938 # latency
939 lat_data = ["Latency",
940 "-",
941 "-",
942 b2ssize_10(io_sum.bw.perc_50 / MICRO) + "s",
943 "-",
944 "-",
945 b2ssize_10(io_sum.bw.perc_95 / MICRO) + "s",
946 b2ssize_10(io_sum.bw.perc_99 / MICRO) + "s"]
947
948 # sensor usage
949 stat_data = [iops_data, bw_data, lat_data]
950 res += html.table("Load stats info", stat_data_headers, stat_data)
951
952 resource_headers = ["Resource", "Usage count", "Proportional to work done"]
953
954 io_transfered = io_sum.bw.data.sum() * KB
955 resource_data = [
956 ["IO made", b2ssize_10(io_transfered / KB / fjob.bsize) + "OP", "-"],
957 ["Data transfered", b2ssize(io_transfered) + "B", "-"]
958 ]
959
960
961 storage = rstorage.storage
962 nodes = storage.load_list(NodeInfo, 'all_nodes') # type: List[NodeInfo]
963
964 storage_nodes = [node.node_id for node in nodes if node.roles.intersection(STORAGE_ROLES)]
965 test_nodes = [node.node_id for node in nodes if "testnode" in node.roles]
966
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300967 trange = (job.reliable_info_range[0] / 1000, job.reliable_info_range[1] / 1000)
koder aka kdanilova732a602017-02-01 20:29:56 +0200968 ops_done = io_transfered / fjob.bsize / KB
969
970 all_metrics = [
971 ("Test nodes net send", 'net-io', 'send_bytes', b2ssize, test_nodes, "B", io_transfered),
972 ("Test nodes net recv", 'net-io', 'recv_bytes', b2ssize, test_nodes, "B", io_transfered),
973
974 ("Test nodes disk write", 'block-io', 'sectors_written', b2ssize, test_nodes, "B", io_transfered),
975 ("Test nodes disk read", 'block-io', 'sectors_read', b2ssize, test_nodes, "B", io_transfered),
976 ("Test nodes writes", 'block-io', 'writes_completed', b2ssize_10, test_nodes, "OP", ops_done),
977 ("Test nodes reads", 'block-io', 'reads_completed', b2ssize_10, test_nodes, "OP", ops_done),
978
979 ("Storage nodes net send", 'net-io', 'send_bytes', b2ssize, storage_nodes, "B", io_transfered),
980 ("Storage nodes net recv", 'net-io', 'recv_bytes', b2ssize, storage_nodes, "B", io_transfered),
981
982 ("Storage nodes disk write", 'block-io', 'sectors_written', b2ssize, storage_nodes, "B", io_transfered),
983 ("Storage nodes disk read", 'block-io', 'sectors_read', b2ssize, storage_nodes, "B", io_transfered),
984 ("Storage nodes writes", 'block-io', 'writes_completed', b2ssize_10, storage_nodes, "OP", ops_done),
985 ("Storage nodes reads", 'block-io', 'reads_completed', b2ssize_10, storage_nodes, "OP", ops_done),
986 ]
987
988 all_agg = {}
989
990 for descr, sensor, metric, ffunc, nodes, units, denom in all_metrics:
991 if not nodes:
992 continue
993
994 res_arr = summ_sensors(rstorage, nodes=nodes, sensor=sensor, metric=metric, time_range=trange)
995 if res_arr is None:
996 continue
997
998 agg = res_arr.sum()
999 resource_data.append([descr, ffunc(agg) + units, "{:.1f}".format(agg / denom)])
1000 all_agg[descr] = agg
1001
1002
1003 cums = [
1004 ("Test nodes writes", "Test nodes reads", "Total test ops", b2ssize_10, "OP", ops_done),
1005 ("Storage nodes writes", "Storage nodes reads", "Total storage ops", b2ssize_10, "OP", ops_done),
1006 ("Storage nodes disk write", "Storage nodes disk read", "Total storage IO size", b2ssize,
1007 "B", io_transfered),
1008 ("Test nodes disk write", "Test nodes disk read", "Total test nodes IO size", b2ssize, "B", io_transfered),
1009 ]
1010
1011 for name1, name2, descr, ffunc, units, denom in cums:
1012 if name1 in all_agg and name2 in all_agg:
1013 agg = all_agg[name1] + all_agg[name2]
1014 resource_data.append([descr, ffunc(agg) + units, "{:.1f}".format(agg / denom)])
1015
1016 res += html.table("Resources usage", resource_headers, resource_data)
1017
1018 yield Menu1st.per_job, job.summary, HTMLBlock(res)
1019
1020
1021# IOPS/latency distribution
1022class IOHist(JobReporter):
koder aka kdanilov7f59d562016-12-26 01:34:23 +02001023 """IOPS.latency distribution histogram"""
koder aka kdanilova732a602017-02-01 20:29:56 +02001024 suite_types = {'fio'}
koder aka kdanilov108ac362017-01-19 20:17:16 +02001025
koder aka kdanilova732a602017-02-01 20:29:56 +02001026 def get_divs(self,
1027 suite: SuiteConfig,
1028 job: JobConfig,
1029 rstorage: ResultStorage) -> Iterator[Tuple[str, str, HTMLBlock]]:
koder aka kdanilov108ac362017-01-19 20:17:16 +02001030
koder aka kdanilova732a602017-02-01 20:29:56 +02001031 fjob = cast(FioJobConfig, job)
koder aka kdanilov108ac362017-01-19 20:17:16 +02001032
koder aka kdanilova732a602017-02-01 20:29:56 +02001033 yield Menu1st.per_job, fjob.summary, HTMLBlock(html.H2(html.center("Load histograms")))
koder aka kdanilov108ac362017-01-19 20:17:16 +02001034
koder aka kdanilova732a602017-02-01 20:29:56 +02001035 agg_lat = get_aggregated(rstorage, suite, fjob, "lat")
1036 bins_edges = numpy.array(get_lat_vals(agg_lat.data.shape[1]), dtype='float32') / 1000 # convert us to ms
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +03001037 lat_stat_prop = calc_histo_stat_props(agg_lat, bins_edges, rebins_count=StyleProfile.hist_lat_boxes)
koder aka kdanilov108ac362017-01-19 20:17:16 +02001038
koder aka kdanilova732a602017-02-01 20:29:56 +02001039 # import IPython
1040 # IPython.embed()
1041
1042 long_summary = cast(FioJobParams, fjob.params).long_summary
1043
1044 title = "Latency distribution"
1045 units = "ms"
1046
1047 fpath = plot_hist(rstorage, agg_lat.source(tag='hist.svg'), title, units, lat_stat_prop) # type: str
1048 yield Menu1st.per_job, fjob.summary, HTMLBlock(html.img(fpath))
1049
1050 agg_io = get_aggregated(rstorage, suite, fjob, "bw")
1051
1052 if fjob.bsize >= LARGE_BLOCKS:
1053 title = "BW distribution"
1054 units = "MiBps"
1055 agg_io.data //= MiB2KiB
1056 else:
1057 title = "IOPS distribution"
1058 agg_io.data //= fjob.bsize
1059 units = "IOPS"
1060
1061 io_stat_prop = calc_norm_stat_props(agg_io, bins_count=StyleProfile.hist_boxes)
1062 fpath = plot_hist(rstorage, agg_io.source(tag='hist.svg'), title, units, io_stat_prop) # type: str
1063 yield Menu1st.per_job, fjob.summary, HTMLBlock(html.img(fpath))
koder aka kdanilov7f59d562016-12-26 01:34:23 +02001064
1065
koder aka kdanilov108ac362017-01-19 20:17:16 +02001066# IOPS/latency over test time for each job
koder aka kdanilova732a602017-02-01 20:29:56 +02001067class IOTime(JobReporter):
koder aka kdanilov7f59d562016-12-26 01:34:23 +02001068 """IOPS/latency during test"""
koder aka kdanilova732a602017-02-01 20:29:56 +02001069 suite_types = {'fio'}
koder aka kdanilov108ac362017-01-19 20:17:16 +02001070
koder aka kdanilova732a602017-02-01 20:29:56 +02001071 def get_divs(self,
1072 suite: SuiteConfig,
1073 job: JobConfig,
1074 rstorage: ResultStorage) -> Iterator[Tuple[str, str, HTMLBlock]]:
koder aka kdanilov108ac362017-01-19 20:17:16 +02001075
koder aka kdanilova732a602017-02-01 20:29:56 +02001076 fjob = cast(FioJobConfig, job)
koder aka kdanilov108ac362017-01-19 20:17:16 +02001077
koder aka kdanilova732a602017-02-01 20:29:56 +02001078 yield Menu1st.per_job, fjob.summary, HTMLBlock(html.H2(html.center("Load over time")))
koder aka kdanilov108ac362017-01-19 20:17:16 +02001079
koder aka kdanilova732a602017-02-01 20:29:56 +02001080 agg_io = get_aggregated(rstorage, suite, fjob, "bw")
1081 if fjob.bsize >= LARGE_BLOCKS:
1082 title = "Bandwidth"
1083 units = "MiBps"
1084 agg_io.data //= MiB2KiB
1085 else:
1086 title = "IOPS"
1087 agg_io.data //= fjob.bsize
1088 units = "IOPS"
koder aka kdanilov108ac362017-01-19 20:17:16 +02001089
koder aka kdanilova732a602017-02-01 20:29:56 +02001090 fpath = plot_v_over_time(rstorage, agg_io.source(tag='ts.svg'), title, units, agg_io) # type: str
1091 yield Menu1st.per_job, fjob.summary, HTMLBlock(html.img(fpath))
koder aka kdanilov108ac362017-01-19 20:17:16 +02001092
koder aka kdanilova732a602017-02-01 20:29:56 +02001093 agg_lat = get_aggregated(rstorage, suite, fjob, "lat")
1094 bins_edges = numpy.array(get_lat_vals(agg_lat.data.shape[1]), dtype='float32') / 1000
1095 title = "Latency"
koder aka kdanilov108ac362017-01-19 20:17:16 +02001096
koder aka kdanilova732a602017-02-01 20:29:56 +02001097 fpath = plot_lat_over_time(rstorage, agg_lat.source(tag='ts.svg'), title, agg_lat, bins_edges) # type: str
1098 yield Menu1st.per_job, fjob.summary, HTMLBlock(html.img(fpath))
koder aka kdanilov108ac362017-01-19 20:17:16 +02001099
koder aka kdanilova732a602017-02-01 20:29:56 +02001100 title = "Latency heatmap"
1101 fpath = plot_heatmap(rstorage, agg_lat.source(tag='hmap.png'), title, agg_lat, bins_edges) # type: str
koder aka kdanilov108ac362017-01-19 20:17:16 +02001102
koder aka kdanilova732a602017-02-01 20:29:56 +02001103 yield Menu1st.per_job, fjob.summary, HTMLBlock(html.img(fpath))
koder aka kdanilov108ac362017-01-19 20:17:16 +02001104
1105
1106class ResourceUsage:
1107 def __init__(self, io_r_ops: int, io_w_ops: int, io_r_kb: int, io_w_kb: int) -> None:
1108 self.io_w_ops = io_w_ops
1109 self.io_r_ops = io_r_ops
1110 self.io_w_kb = io_w_kb
1111 self.io_r_kb = io_r_kb
1112
1113 self.cpu_used_user = None # type: int
1114 self.cpu_used_sys = None # type: int
1115 self.cpu_wait_io = None # type: int
1116
1117 self.net_send_packets = None # type: int
1118 self.net_recv_packets = None # type: int
1119 self.net_send_kb = None # type: int
1120 self.net_recv_kb = None # type: int
koder aka kdanilov7f59d562016-12-26 01:34:23 +02001121
1122
1123# Cluster load over test time
koder aka kdanilova732a602017-02-01 20:29:56 +02001124class ClusterLoad(JobReporter):
koder aka kdanilov7f59d562016-12-26 01:34:23 +02001125 """IOPS/latency during test"""
1126
koder aka kdanilova732a602017-02-01 20:29:56 +02001127 # TODO: units should came from sensor
koder aka kdanilov108ac362017-01-19 20:17:16 +02001128 storage_sensors = [
koder aka kdanilova732a602017-02-01 20:29:56 +02001129 ('block-io', 'reads_completed', "Read ops", 'iops'),
1130 ('block-io', 'writes_completed', "Write ops", 'iops'),
1131 ('block-io', 'sectors_read', "Read kb", 'kb'),
1132 ('block-io', 'sectors_written', "Write kb", 'kb'),
koder aka kdanilov108ac362017-01-19 20:17:16 +02001133 ]
1134
koder aka kdanilova732a602017-02-01 20:29:56 +02001135 def get_divs(self,
1136 suite: SuiteConfig,
1137 job: JobConfig,
1138 rstorage: ResultStorage) -> Iterator[Tuple[str, str, HTMLBlock]]:
koder aka kdanilov108ac362017-01-19 20:17:16 +02001139 # split nodes on test and other
1140 storage = rstorage.storage
1141 nodes = storage.load_list(NodeInfo, "all_nodes") # type: List[NodeInfo]
1142
koder aka kdanilova732a602017-02-01 20:29:56 +02001143 yield Menu1st.per_job, job.summary, HTMLBlock(html.H2(html.center("Cluster load")))
koder aka kdanilov108ac362017-01-19 20:17:16 +02001144 test_nodes = {node.node_id for node in nodes if 'testnode' in node.roles}
1145 cluster_nodes = {node.node_id for node in nodes if 'testnode' not in node.roles}
1146
koder aka kdanilova732a602017-02-01 20:29:56 +02001147 # convert ms to s
1148 time_range = (job.reliable_info_range[0] // MS2S, job.reliable_info_range[1] // MS2S)
1149 len = time_range[1] - time_range[0]
1150 for sensor, metric, sensor_title, units in self.storage_sensors:
1151 sum_testnode = numpy.zeros((len,))
1152 sum_other = numpy.zeros((len,))
1153 for path, groups in rstorage.iter_sensors(sensor=sensor, metric=metric):
1154 # todo: should return sensor units
1155 data = get_sensor_for_time_range(rstorage,
1156 groups['node_id'],
1157 sensor,
1158 groups['dev'],
1159 metric, time_range)
1160 if groups['node_id'] in test_nodes:
1161 sum_testnode += data
1162 else:
1163 sum_other += data
koder aka kdanilov108ac362017-01-19 20:17:16 +02001164
koder aka kdanilova732a602017-02-01 20:29:56 +02001165 ds = DataSource(suite_id=suite.storage_id,
1166 job_id=job.storage_id,
1167 node_id="test_nodes",
1168 sensor=sensor,
1169 dev=AGG_TAG,
1170 metric=metric,
1171 tag="ts.svg")
koder aka kdanilov108ac362017-01-19 20:17:16 +02001172
koder aka kdanilova732a602017-02-01 20:29:56 +02001173 # s to ms
1174 ts = TimeSeries(name="",
1175 times=numpy.arange(*time_range) * MS2S,
1176 data=sum_testnode,
1177 raw=None,
1178 units=units,
1179 time_units="us",
1180 source=ds)
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +03001181
koder aka kdanilova732a602017-02-01 20:29:56 +02001182 fpath = plot_v_over_time(rstorage, ds, sensor_title, sensor_title, ts=ts) # type: str
1183 yield Menu1st.per_job, job.summary, HTMLBlock(html.img(fpath))
koder aka kdanilov108ac362017-01-19 20:17:16 +02001184
1185
1186# Ceph cluster summary
1187class ResourceConsumption(Reporter):
1188 """Resources consumption report, only text"""
1189
koder aka kdanilov7f59d562016-12-26 01:34:23 +02001190
1191# Node load over test time
1192class NodeLoad(Reporter):
1193 """IOPS/latency during test"""
1194
1195
1196# Ceph cluster summary
1197class CephClusterSummary(Reporter):
1198 """IOPS/latency during test"""
1199
1200
koder aka kdanilov7f59d562016-12-26 01:34:23 +02001201# TODO: Ceph operation breakout report
1202# TODO: Resource consumption for different type of test
1203
1204
koder aka kdanilov108ac362017-01-19 20:17:16 +02001205# ------------------------------------------ REPORT STAGES -----------------------------------------------------------
1206
1207
1208class HtmlReportStage(Stage):
1209 priority = StepOrder.REPORT
1210
1211 def run(self, ctx: TestRun) -> None:
1212 rstorage = ResultStorage(ctx.storage)
koder aka kdanilova732a602017-02-01 20:29:56 +02001213
1214 job_reporters = [StatInfo(), IOTime(), IOHist(), ClusterLoad()] # type: List[JobReporter]
1215 reporters = [IO_QD()] # type: List[Reporter]
1216
1217 # job_reporters = [ClusterLoad()]
1218 # reporters = []
koder aka kdanilov108ac362017-01-19 20:17:16 +02001219
1220 root_dir = os.path.dirname(os.path.dirname(wally.__file__))
1221 doc_templ_path = os.path.join(root_dir, "report_templates/index.html")
1222 report_template = open(doc_templ_path, "rt").read()
1223 css_file_src = os.path.join(root_dir, "report_templates/main.css")
1224 css_file = open(css_file_src, "rt").read()
1225
1226 menu_block = []
1227 content_block = []
1228 link_idx = 0
1229
koder aka kdanilova732a602017-02-01 20:29:56 +02001230 # matplotlib.rcParams.update(ctx.config.reporting.matplotlib_params.raw())
1231 # ColorProfile.__dict__.update(ctx.config.reporting.colors.raw())
1232 # StyleProfile.__dict__.update(ctx.config.reporting.style.raw())
koder aka kdanilov108ac362017-01-19 20:17:16 +02001233
koder aka kdanilova732a602017-02-01 20:29:56 +02001234 items = defaultdict(lambda: defaultdict(list)) # type: Dict[str, Dict[str, List[HTMLBlock]]]
1235
1236 # TODO: filter reporters
koder aka kdanilov108ac362017-01-19 20:17:16 +02001237 for suite in rstorage.iter_suite(FioTest.name):
koder aka kdanilova732a602017-02-01 20:29:56 +02001238 all_jobs = list(rstorage.iter_job(suite))
1239 all_jobs.sort(key=lambda job: job.params)
1240 for job in all_jobs:
1241 for reporter in job_reporters:
1242 for block, item, html in reporter.get_divs(suite, job, rstorage):
1243 items[block][item].append(html)
1244 if DEBUG:
1245 break
1246
koder aka kdanilov108ac362017-01-19 20:17:16 +02001247 for reporter in reporters:
1248 for block, item, html in reporter.get_divs(suite, rstorage):
1249 items[block][item].append(html)
1250
koder aka kdanilova732a602017-02-01 20:29:56 +02001251 if DEBUG:
1252 break
1253
koder aka kdanilov108ac362017-01-19 20:17:16 +02001254 for idx_1st, menu_1st in enumerate(sorted(items, key=lambda x: menu_1st_order.index(x))):
1255 menu_block.append(
1256 '<a href="#item{}" class="nav-group" data-toggle="collapse" data-parent="#MainMenu">{}</a>'
1257 .format(idx_1st, menu_1st)
1258 )
1259 menu_block.append('<div class="collapse" id="item{}">'.format(idx_1st))
1260 for menu_2nd in sorted(items[menu_1st]):
1261 menu_block.append(' <a href="#content{}" class="nav-group-item">{}</a>'
1262 .format(link_idx, menu_2nd))
1263 content_block.append('<div id="content{}">'.format(link_idx))
koder aka kdanilova732a602017-02-01 20:29:56 +02001264 content_block.extend(" " + x.data for x in items[menu_1st][menu_2nd])
koder aka kdanilov108ac362017-01-19 20:17:16 +02001265 content_block.append('</div>')
1266 link_idx += 1
1267 menu_block.append('</div>')
1268
1269 report = report_template.replace("{{{menu}}}", ("\n" + " " * 16).join(menu_block))
1270 report = report.replace("{{{content}}}", ("\n" + " " * 16).join(content_block))
1271 report_path = rstorage.put_report(report, "index.html")
1272 rstorage.put_report(css_file, "main.css")
1273 logger.info("Report is stored into %r", report_path)
1274
1275
1276class ConsoleReportStage(Stage):
1277
1278 priority = StepOrder.REPORT
1279
1280 def run(self, ctx: TestRun) -> None:
1281 # TODO(koder): load data from storage
1282 raise NotImplementedError("...")