blob: 0b0540e3cf00a54d4a77a568a4ce45aef721990f [file] [log] [blame]
koder aka kdanilov108ac362017-01-19 20:17:16 +02001import os
koder aka kdanilov7f59d562016-12-26 01:34:23 +02002import abc
koder aka kdanilova047e1b2015-04-21 23:16:59 +03003import logging
koder aka kdanilov108ac362017-01-19 20:17:16 +02004from io import BytesIO
5from functools import wraps
koder aka kdanilova732a602017-02-01 20:29:56 +02006from typing import Dict, Any, Iterator, Tuple, cast, List, Callable, Set, Optional
koder aka kdanilov108ac362017-01-19 20:17:16 +02007from collections import defaultdict
koder aka kdanilovcff7b2e2015-04-18 20:48:15 +03008
koder aka kdanilovffaf48d2016-12-27 02:25:29 +02009import numpy
koder aka kdanilov108ac362017-01-19 20:17:16 +020010import scipy.stats
koder aka kdanilova732a602017-02-01 20:29:56 +020011import matplotlib.pyplot as plt
koder aka kdanilovbe8f89f2015-04-28 14:51:51 +030012
koder aka kdanilov108ac362017-01-19 20:17:16 +020013import wally
koder aka kdanilovffaf48d2016-12-27 02:25:29 +020014
koder aka kdanilov108ac362017-01-19 20:17:16 +020015from . import html
koder aka kdanilov39e449e2016-12-17 15:15:26 +020016from .stage import Stage, StepOrder
17from .test_run_class import TestRun
koder aka kdanilov108ac362017-01-19 20:17:16 +020018from .hlstorage import ResultStorage
19from .node_interfaces import NodeInfo
koder aka kdanilova732a602017-02-01 20:29:56 +020020from .utils import b2ssize, b2ssize_10, STORAGE_ROLES
21from .statistic import (calc_norm_stat_props, calc_histo_stat_props, moving_average, moving_dev,
22 hist_outliers_perc, ts_hist_outliers_perc, find_ouliers_ts, approximate_curve,
23 rebin_histogram)
24from .result_classes import (StatProps, DataSource, TimeSeries, NormStatProps, HistoStatProps, SuiteConfig,
25 IResultStorage)
koder aka kdanilov108ac362017-01-19 20:17:16 +020026from .suits.io.fio_hist import get_lat_vals, expected_lat_bins
27from .suits.io.fio import FioTest, FioJobConfig
koder aka kdanilova732a602017-02-01 20:29:56 +020028from .suits.io.fio_job import FioJobParams
29from .suits.job import JobConfig
koder aka kdanilovcff7b2e2015-04-18 20:48:15 +030030
koder aka kdanilov4a510ee2015-04-21 18:50:42 +030031
koder aka kdanilov962ee5f2016-12-19 02:40:08 +020032logger = logging.getLogger("wally")
koder aka kdanilova047e1b2015-04-21 23:16:59 +030033
34
koder aka kdanilov108ac362017-01-19 20:17:16 +020035# ---------------- CONSTS ---------------------------------------------------------------------------------------------
koder aka kdanilov39e449e2016-12-17 15:15:26 +020036
koder aka kdanilov7f59d562016-12-26 01:34:23 +020037
koder aka kdanilov108ac362017-01-19 20:17:16 +020038DEBUG = False
39LARGE_BLOCKS = 256
40MiB2KiB = 1024
41MS2S = 1000
koder aka kdanilov39e449e2016-12-17 15:15:26 +020042
koder aka kdanilov39e449e2016-12-17 15:15:26 +020043
koder aka kdanilov108ac362017-01-19 20:17:16 +020044# ---------------- PROFILES ------------------------------------------------------------------------------------------
45
46
koder aka kdanilova732a602017-02-01 20:29:56 +020047# this is default values, real values is loaded from config
48
koder aka kdanilov108ac362017-01-19 20:17:16 +020049class ColorProfile:
50 primary_color = 'b'
51 suppl_color1 = 'teal'
52 suppl_color2 = 'magenta'
koder aka kdanilova732a602017-02-01 20:29:56 +020053 suppl_color3 = 'orange'
koder aka kdanilov108ac362017-01-19 20:17:16 +020054 box_color = 'y'
koder aka kdanilova732a602017-02-01 20:29:56 +020055 err_color = 'red'
koder aka kdanilov108ac362017-01-19 20:17:16 +020056
57 noise_alpha = 0.3
58 subinfo_alpha = 0.7
59
koder aka kdanilova732a602017-02-01 20:29:56 +020060 imshow_colormap = None # type: str
61
koder aka kdanilov108ac362017-01-19 20:17:16 +020062
63class StyleProfile:
64 grid = True
65 tide_layout = True
66 hist_boxes = 10
koder aka kdanilova732a602017-02-01 20:29:56 +020067 hist_lat_boxes = 25
68 hm_hist_bins_count = 25
koder aka kdanilov108ac362017-01-19 20:17:16 +020069 min_points_for_dev = 5
70
71 dev_range_x = 2.0
72 dev_perc = 95
73
koder aka kdanilova732a602017-02-01 20:29:56 +020074 point_shape = 'o'
75 err_point_shape = '*'
koder aka kdanilov108ac362017-01-19 20:17:16 +020076
koder aka kdanilova732a602017-02-01 20:29:56 +020077 avg_range = 20
78 approx_average = True
79
80 curve_approx_level = 6
koder aka kdanilov108ac362017-01-19 20:17:16 +020081 curve_approx_points = 100
82 assert avg_range >= min_points_for_dev
83
koder aka kdanilova732a602017-02-01 20:29:56 +020084 # figure size in inches
85 figsize = (10, 6)
86
koder aka kdanilov108ac362017-01-19 20:17:16 +020087 extra_io_spine = True
88
89 legend_for_eng = True
koder aka kdanilova732a602017-02-01 20:29:56 +020090 heatmap_interpolation = '1d'
91 heatmap_interpolation_points = 300
92 outliers_q_nd = 3.0
93 outliers_hide_q_nd = 4.0
94 outliers_lat = (0.01, 0.995)
95
96 violin_instead_of_box = True
97 violin_point_count = 30000
98
99 heatmap_colorbar = False
100
101 min_iops_vs_qd_jobs = 3
koder aka kdanilov108ac362017-01-19 20:17:16 +0200102
103 units = {
104 'bw': ("MiBps", MiB2KiB, "bandwith"),
105 'iops': ("IOPS", 1, "iops"),
106 'lat': ("ms", 1, "latency")
107 }
108
109
110# ---------------- STRUCTS -------------------------------------------------------------------------------------------
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200111
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200112
113# TODO: need to be revised, have to user StatProps fields instead
114class StoragePerfSummary:
115 def __init__(self, name: str) -> None:
116 self.direct_iops_r_max = 0 # type: int
117 self.direct_iops_w_max = 0 # type: int
118
119 # 64 used instead of 4k to faster feed caches
120 self.direct_iops_w64_max = 0 # type: int
121
122 self.rws4k_10ms = 0 # type: int
123 self.rws4k_30ms = 0 # type: int
124 self.rws4k_100ms = 0 # type: int
125 self.bw_write_max = 0 # type: int
126 self.bw_read_max = 0 # type: int
127
128 self.bw = None # type: float
129 self.iops = None # type: float
130 self.lat = None # type: float
131 self.lat_50 = None # type: float
132 self.lat_95 = None # type: float
133
134
koder aka kdanilov108ac362017-01-19 20:17:16 +0200135class IOSummary:
136 def __init__(self,
137 qd: int,
138 block_size: int,
139 nodes_count:int,
140 bw: NormStatProps,
141 lat: HistoStatProps) -> None:
142
143 self.qd = qd
144 self.nodes_count = nodes_count
145 self.block_size = block_size
146
147 self.bw = bw
148 self.lat = lat
149
150
151# -------------- AGGREGATION AND STAT FUNCTIONS ----------------------------------------------------------------------
koder aka kdanilov108ac362017-01-19 20:17:16 +0200152
koder aka kdanilova732a602017-02-01 20:29:56 +0200153def make_iosum(rstorage: ResultStorage, suite: SuiteConfig, job: FioJobConfig) -> IOSummary:
koder aka kdanilov108ac362017-01-19 20:17:16 +0200154 lat = get_aggregated(rstorage, suite, job, "lat")
koder aka kdanilova732a602017-02-01 20:29:56 +0200155 bins_edges = numpy.array(get_lat_vals(lat.data.shape[1]), dtype='float32') / 1000
koder aka kdanilov108ac362017-01-19 20:17:16 +0200156 io = get_aggregated(rstorage, suite, job, "bw")
157
158 return IOSummary(job.qd,
159 nodes_count=len(suite.nodes_ids),
160 block_size=job.bsize,
161 lat=calc_histo_stat_props(lat, bins_edges, StyleProfile.hist_boxes),
162 bw=calc_norm_stat_props(io, StyleProfile.hist_boxes))
163
164#
165# def iter_io_results(rstorage: ResultStorage,
166# qds: List[int] = None,
167# op_types: List[str] = None,
168# sync_types: List[str] = None,
169# block_sizes: List[int] = None) -> Iterator[Tuple[TestSuiteConfig, FioJobConfig]]:
170#
171# for suite in rstorage.iter_suite(FioTest.name):
172# for job in rstorage.iter_job(suite):
173# fjob = cast(FioJobConfig, job)
174# assert int(fjob.vals['numjobs']) == 1
175#
176# if sync_types is not None and fjob.sync_mode in sync_types:
177# continue
178#
179# if block_sizes is not None and fjob.bsize not in block_sizes:
180# continue
181#
182# if op_types is not None and fjob.op_type not in op_types:
183# continue
184#
185# if qds is not None and fjob.qd not in qds:
186# continue
187#
188# yield suite, fjob
189
190
koder aka kdanilova732a602017-02-01 20:29:56 +0200191AGG_TAG = 'ALL'
192
193
194def get_aggregated(rstorage: ResultStorage, suite: SuiteConfig, job: FioJobConfig, metric: str) -> TimeSeries:
195 tss = list(rstorage.iter_ts(suite, job, sensor=metric))
koder aka kdanilov108ac362017-01-19 20:17:16 +0200196 ds = DataSource(suite_id=suite.storage_id,
197 job_id=job.storage_id,
koder aka kdanilova732a602017-02-01 20:29:56 +0200198 node_id=AGG_TAG,
199 sensor='fio',
200 dev=AGG_TAG,
201 metric=metric,
202 tag='csv')
koder aka kdanilov108ac362017-01-19 20:17:16 +0200203
koder aka kdanilova732a602017-02-01 20:29:56 +0200204 agg_ts = TimeSeries(metric,
koder aka kdanilov108ac362017-01-19 20:17:16 +0200205 raw=None,
206 source=ds,
207 data=numpy.zeros(tss[0].data.shape, dtype=tss[0].data.dtype),
208 times=tss[0].times.copy(),
koder aka kdanilova732a602017-02-01 20:29:56 +0200209 units=tss[0].units)
koder aka kdanilov108ac362017-01-19 20:17:16 +0200210
211 for ts in tss:
koder aka kdanilova732a602017-02-01 20:29:56 +0200212 if metric == 'lat' and (len(ts.data.shape) != 2 or ts.data.shape[1] != expected_lat_bins):
koder aka kdanilov108ac362017-01-19 20:17:16 +0200213 logger.error("Sensor %s.%s on node %s has" +
koder aka kdanilova732a602017-02-01 20:29:56 +0200214 "shape=%s. Can only process sensors with shape=[X, %s].",
koder aka kdanilov108ac362017-01-19 20:17:16 +0200215 ts.source.dev, ts.source.sensor, ts.source.node_id,
koder aka kdanilova732a602017-02-01 20:29:56 +0200216 ts.data.shape, expected_lat_bins)
koder aka kdanilov108ac362017-01-19 20:17:16 +0200217 continue
218
koder aka kdanilova732a602017-02-01 20:29:56 +0200219 if metric != 'lat' and len(ts.data.shape) != 1:
koder aka kdanilov108ac362017-01-19 20:17:16 +0200220 logger.error("Sensor %s.%s on node %s has" +
koder aka kdanilova732a602017-02-01 20:29:56 +0200221 "shape=%s. Can only process 1D sensors.",
222 ts.source.dev, ts.source.sensor, ts.source.node_id, ts.data.shape)
koder aka kdanilov108ac362017-01-19 20:17:16 +0200223 continue
224
225 # TODO: match times on different ts
226 agg_ts.data += ts.data
227
228 return agg_ts
229
230
koder aka kdanilova732a602017-02-01 20:29:56 +0200231def is_sensor_numarray(sensor: str, metric: str) -> bool:
232 """Returns True if sensor provides one-dimension array of numeric values. One number per one measurement."""
233 return True
234
235
236LEVEL_SENSORS = {("block-io", "io_queue"),
237 ("system-cpu", "procs_blocked"),
238 ("system-cpu", "procs_queue")}
239
240
241def is_level_sensor(sensor: str, metric: str) -> bool:
242 """Returns True if sensor measure level of any kind, E.g. queue depth."""
243 return (sensor, metric) in LEVEL_SENSORS
244
245
246def is_delta_sensor(sensor: str, metric: str) -> bool:
247 """Returns True if sensor provides deltas for cumulative value. E.g. io completed in given period"""
248 return not is_level_sensor(sensor, metric)
249
250
251def get_sensor_for_time_range(storage: IResultStorage,
252 node_id: str,
253 sensor: str,
254 dev: str,
255 metric: str,
256 time_range: Tuple[int, int]) -> numpy.array:
257 """Return sensor values for given node for given period. Return per second estimated values array
258
259 Raise an error if required range is not full covered by data in storage.
260 First it finds range of results from sensor, which fully covers requested range.
261 ...."""
262
263 ds = DataSource(node_id=node_id, sensor=sensor, dev=dev, metric=metric)
264 sensor_data = storage.load_sensor(ds)
265 assert sensor_data.time_units == 'us'
266
267 # collected_at is array of pairs (collection_started_at, collection_finished_at)
268 # extract start time from each pair
269 collection_start_at = sensor_data.times[::2] # type: numpy.array
270
271 MICRO = 1000000
272
273 # convert seconds to us
274 begin = time_range[0] * MICRO
275 end = time_range[1] * MICRO
276
277 if begin < collection_start_at[0] or end > collection_start_at[-1] or end <= begin:
278 raise AssertionError(("Incorrect data for get_sensor - time_range={!r}, collected_at=[{}, ..., {}]," +
279 "sensor = {}_{}.{}.{}").format(time_range,
280 sensor_data.times[0] // MICRO,
281 sensor_data.times[-1] // MICRO,
282 node_id, sensor, dev, metric))
283
284 pos1, pos2 = numpy.searchsorted(collection_start_at, (begin, end))
285
286 # current real data time chunk begin time
287 edge_it = iter(collection_start_at[pos1 - 1: pos2 + 1])
288
289 # current real data value
290 val_it = iter(sensor_data.data[pos1 - 1: pos2 + 1])
291
292 # result array, cumulative value per second
293 result = numpy.zeros((end - begin) // MICRO)
294 idx = 0
295 curr_summ = 0
296
297 # end of current time slot
298 results_cell_ends = begin + MICRO
299
300 # hack to unify looping
301 real_data_end = next(edge_it)
302 while results_cell_ends <= end:
303 real_data_start = real_data_end
304 real_data_end = next(edge_it)
305 real_val_left = next(val_it)
306
307 # real data "speed" for interval [real_data_start, real_data_end]
308 real_val_ps = float(real_val_left) / (real_data_end - real_data_start)
309
310 while real_data_end >= results_cell_ends and results_cell_ends <= end:
311 # part of current real value, which is fit into current result cell
312 curr_real_chunk = int((results_cell_ends - real_data_start) * real_val_ps)
313
314 # calculate rest of real data for next result cell
315 real_val_left -= curr_real_chunk
316 result[idx] = curr_summ + curr_real_chunk
317 idx += 1
318 curr_summ = 0
319
320 # adjust real data start time
321 real_data_start = results_cell_ends
322 results_cell_ends += MICRO
323
324 # don't lost any real data
325 curr_summ += real_val_left
326
327 return result
328
329
koder aka kdanilov108ac362017-01-19 20:17:16 +0200330# -------------- PLOT HELPERS FUNCTIONS ------------------------------------------------------------------------------
331
koder aka kdanilova732a602017-02-01 20:29:56 +0200332def get_emb_data_svg(plt: Any, format: str = 'svg') -> bytes:
koder aka kdanilov108ac362017-01-19 20:17:16 +0200333 bio = BytesIO()
koder aka kdanilova732a602017-02-01 20:29:56 +0200334 if format in ('png', 'jpg'):
335 plt.savefig(bio, format=format)
336 return bio.getvalue()
337 elif format == 'svg':
338 plt.savefig(bio, format='svg')
339 img_start = "<!-- Created with matplotlib (http://matplotlib.org/) -->"
340 return bio.getvalue().decode("utf8").split(img_start, 1)[1].encode("utf8")
koder aka kdanilov108ac362017-01-19 20:17:16 +0200341
342
343def provide_plot(func: Callable[..., None]) -> Callable[..., str]:
344 @wraps(func)
koder aka kdanilova732a602017-02-01 20:29:56 +0200345 def closure1(storage: ResultStorage,
346 path: DataSource,
347 *args, **kwargs) -> str:
koder aka kdanilov108ac362017-01-19 20:17:16 +0200348 fpath = storage.check_plot_file(path)
349 if not fpath:
koder aka kdanilova732a602017-02-01 20:29:56 +0200350 format = path.tag.split(".")[-1]
351
352 plt.figure(figsize=StyleProfile.figsize)
353 plt.subplots_adjust(right=0.66)
354
koder aka kdanilov108ac362017-01-19 20:17:16 +0200355 func(*args, **kwargs)
koder aka kdanilova732a602017-02-01 20:29:56 +0200356 fpath = storage.put_plot_file(get_emb_data_svg(plt, format=format), path)
357 logger.debug("Plot %s saved to %r", path, fpath)
koder aka kdanilov108ac362017-01-19 20:17:16 +0200358 plt.clf()
koder aka kdanilova732a602017-02-01 20:29:56 +0200359 plt.close('all')
koder aka kdanilov108ac362017-01-19 20:17:16 +0200360 return fpath
361 return closure1
362
363
364def apply_style(style: StyleProfile, eng: bool = True, no_legend: bool = False) -> None:
365 if style.grid:
366 plt.grid(True)
367
368 if (style.legend_for_eng or not eng) and not no_legend:
369 legend_location = "center left"
370 legend_bbox_to_anchor = (1.03, 0.81)
371 plt.legend(loc=legend_location, bbox_to_anchor=legend_bbox_to_anchor)
372
373
374# -------------- PLOT FUNCTIONS --------------------------------------------------------------------------------------
375
376
377@provide_plot
378def plot_hist(title: str, units: str,
379 prop: StatProps,
380 colors: Any = ColorProfile,
381 style: Any = StyleProfile) -> None:
382
383 # TODO: unit should came from ts
koder aka kdanilova732a602017-02-01 20:29:56 +0200384 normed_bins = prop.bins_populations / prop.bins_populations.sum()
385 bar_width = prop.bins_edges[1] - prop.bins_edges[0]
386 plt.bar(prop.bins_edges, normed_bins, color=colors.box_color, width=bar_width, label="Real data")
koder aka kdanilov108ac362017-01-19 20:17:16 +0200387
388 plt.xlabel(units)
389 plt.ylabel("Value probability")
390 plt.title(title)
391
392 dist_plotted = False
393 if isinstance(prop, NormStatProps):
394 nprop = cast(NormStatProps, prop)
395 stats = scipy.stats.norm(nprop.average, nprop.deviation)
396
koder aka kdanilova732a602017-02-01 20:29:56 +0200397 new_edges, step = numpy.linspace(prop.bins_edges[0], prop.bins_edges[-1],
398 len(prop.bins_edges) * 10, retstep=True)
koder aka kdanilov108ac362017-01-19 20:17:16 +0200399
koder aka kdanilova732a602017-02-01 20:29:56 +0200400 ypoints = stats.cdf(new_edges) * 11
koder aka kdanilov108ac362017-01-19 20:17:16 +0200401 ypoints = [next - prev for (next, prev) in zip(ypoints[1:], ypoints[:-1])]
koder aka kdanilova732a602017-02-01 20:29:56 +0200402 xpoints = (new_edges[1:] + new_edges[:-1]) / 2
koder aka kdanilov108ac362017-01-19 20:17:16 +0200403
koder aka kdanilova732a602017-02-01 20:29:56 +0200404 plt.plot(xpoints, ypoints, color=colors.primary_color, label="Expected from\nnormal\ndistribution")
koder aka kdanilov108ac362017-01-19 20:17:16 +0200405 dist_plotted = True
406
koder aka kdanilova732a602017-02-01 20:29:56 +0200407 plt.gca().set_xlim(left=prop.bins_edges[0])
408 if prop.log_bins:
409 plt.xscale('log')
410
koder aka kdanilov108ac362017-01-19 20:17:16 +0200411 apply_style(style, eng=True, no_legend=not dist_plotted)
412
413
414@provide_plot
415def plot_v_over_time(title: str, units: str,
416 ts: TimeSeries,
417 plot_avg_dev: bool = True,
418 colors: Any = ColorProfile, style: Any = StyleProfile) -> None:
419
420 min_time = min(ts.times)
421
422 # /1000 is us to ms conversion
koder aka kdanilova732a602017-02-01 20:29:56 +0200423 time_points = numpy.array([(val_time - min_time) / 1000 for val_time in ts.times])
424
425 outliers_idxs = find_ouliers_ts(ts.data, cut_range=style.outliers_q_nd)
426 outliers_4q_idxs = find_ouliers_ts(ts.data, cut_range=style.outliers_hide_q_nd)
427 normal_idxs = numpy.logical_not(outliers_idxs)
428 outliers_idxs = outliers_idxs & numpy.logical_not(outliers_4q_idxs)
429 hidden_outliers_count = numpy.count_nonzero(outliers_4q_idxs)
430
431 data = ts.data[normal_idxs]
432 data_times = time_points[normal_idxs]
433 outliers = ts.data[outliers_idxs]
434 outliers_times = time_points[outliers_idxs]
koder aka kdanilov108ac362017-01-19 20:17:16 +0200435
436 alpha = colors.noise_alpha if plot_avg_dev else 1.0
koder aka kdanilova732a602017-02-01 20:29:56 +0200437 plt.plot(data_times, data, style.point_shape,
438 color=colors.primary_color, alpha=alpha, label="Data")
439 plt.plot(outliers_times, outliers, style.err_point_shape,
440 color=colors.err_color, label="Outliers")
koder aka kdanilov108ac362017-01-19 20:17:16 +0200441
koder aka kdanilova732a602017-02-01 20:29:56 +0200442 has_negative_dev = False
443 plus_minus = "\xb1"
koder aka kdanilov108ac362017-01-19 20:17:16 +0200444
koder aka kdanilova732a602017-02-01 20:29:56 +0200445 if plot_avg_dev and len(data) < style.avg_range * 2:
446 logger.warning("Array %r to small to plot average over %s points", title, style.avg_range)
447 elif plot_avg_dev:
448 avg_vals = moving_average(data, style.avg_range)
449 dev_vals = moving_dev(data, style.avg_range)
450 avg_times = moving_average(data_times, style.avg_range)
koder aka kdanilov108ac362017-01-19 20:17:16 +0200451
koder aka kdanilova732a602017-02-01 20:29:56 +0200452 if style.approx_average:
453 avg_vals = approximate_curve(avg_times, avg_vals, avg_times, style.curve_approx_level)
454 dev_vals = approximate_curve(avg_times, dev_vals, avg_times, style.curve_approx_level)
koder aka kdanilov108ac362017-01-19 20:17:16 +0200455
koder aka kdanilova732a602017-02-01 20:29:56 +0200456 plt.plot(avg_times, avg_vals, c=colors.suppl_color1, label="Average")
koder aka kdanilov108ac362017-01-19 20:17:16 +0200457
koder aka kdanilova732a602017-02-01 20:29:56 +0200458 low_vals_dev = avg_vals - dev_vals * style.dev_range_x
459 hight_vals_dev = avg_vals + dev_vals * style.dev_range_x
460 if style.dev_range_x - int(style.dev_range_x) < 0.01:
461 plt.plot(avg_times, low_vals_dev, c=colors.suppl_color2,
462 label="{}{}*stdev".format(plus_minus, int(style.dev_range_x)))
463 else:
464 plt.plot(avg_times, low_vals_dev, c=colors.suppl_color2,
465 label="{}{}*stdev".format(plus_minus, style.dev_range_x))
466 plt.plot(avg_times, hight_vals_dev, c=colors.suppl_color2)
467 has_negative_dev = low_vals_dev.min() < 0
koder aka kdanilov108ac362017-01-19 20:17:16 +0200468
469 plt.xlim(-5, max(time_points) + 5)
koder aka kdanilov108ac362017-01-19 20:17:16 +0200470 plt.xlabel("Time, seconds from test begin")
koder aka kdanilova732a602017-02-01 20:29:56 +0200471 plt.ylabel("{}. Average and {}stddev over {} points".format(units, plus_minus, style.avg_range))
koder aka kdanilov108ac362017-01-19 20:17:16 +0200472 plt.title(title)
koder aka kdanilova732a602017-02-01 20:29:56 +0200473
474 if has_negative_dev:
475 plt.gca().set_ylim(bottom=0)
476
koder aka kdanilov108ac362017-01-19 20:17:16 +0200477 apply_style(style, eng=True)
478
479
480@provide_plot
481def plot_lat_over_time(title: str, ts: TimeSeries, bins_vals: List[int], samples: int = 5,
koder aka kdanilova732a602017-02-01 20:29:56 +0200482 colors: Any = ColorProfile,
483 style: Any = StyleProfile) -> None:
koder aka kdanilov108ac362017-01-19 20:17:16 +0200484
485 min_time = min(ts.times)
486 times = [int(tm - min_time + 500) // 1000 for tm in ts.times]
487 ts_len = len(times)
488 step = ts_len / samples
489 points = [times[int(i * step + 0.5)] for i in range(samples)]
490 points.append(times[-1])
491 bounds = list(zip(points[:-1], points[1:]))
koder aka kdanilov108ac362017-01-19 20:17:16 +0200492 agg_data = []
493 positions = []
494 labels = []
495
koder aka kdanilov108ac362017-01-19 20:17:16 +0200496 for begin, end in bounds:
koder aka kdanilova732a602017-02-01 20:29:56 +0200497 agg_hist = ts.data[begin:end].sum(axis=0)
498
499 if style.violin_instead_of_box:
500 # cut outliers
501 idx1, idx2 = hist_outliers_perc(agg_hist, style.outliers_lat)
502 agg_hist = agg_hist[idx1:idx2]
503 curr_bins_vals = bins_vals[idx1:idx2]
504
505 correct_coef = style.violin_point_count / sum(agg_hist)
506 if correct_coef > 1:
507 correct_coef = 1
508 else:
509 curr_bins_vals = bins_vals
510 correct_coef = 1
koder aka kdanilov108ac362017-01-19 20:17:16 +0200511
512 vals = numpy.empty(shape=(numpy.sum(agg_hist),), dtype='float32')
513 cidx = 0
koder aka kdanilov108ac362017-01-19 20:17:16 +0200514
koder aka kdanilova732a602017-02-01 20:29:56 +0200515 non_zero, = agg_hist.nonzero()
koder aka kdanilov108ac362017-01-19 20:17:16 +0200516 for pos in non_zero:
koder aka kdanilova732a602017-02-01 20:29:56 +0200517 count = int(agg_hist[pos] * correct_coef + 0.5)
koder aka kdanilov108ac362017-01-19 20:17:16 +0200518
koder aka kdanilova732a602017-02-01 20:29:56 +0200519 if count != 0:
520 vals[cidx: cidx + count] = curr_bins_vals[pos]
521 cidx += count
522
523 agg_data.append(vals[:cidx])
koder aka kdanilov108ac362017-01-19 20:17:16 +0200524 positions.append((end + begin) / 2)
525 labels.append(str((end + begin) // 2))
526
koder aka kdanilova732a602017-02-01 20:29:56 +0200527 if style.violin_instead_of_box:
528 patches = plt.violinplot(agg_data,
529 positions=positions,
530 showmeans=True,
531 showmedians=True,
532 widths=step / 2)
koder aka kdanilov108ac362017-01-19 20:17:16 +0200533
koder aka kdanilova732a602017-02-01 20:29:56 +0200534 patches['cmeans'].set_color("blue")
535 patches['cmedians'].set_color("green")
536 if style.legend_for_eng:
537 legend_location = "center left"
538 legend_bbox_to_anchor = (1.03, 0.81)
539 plt.legend([patches['cmeans'], patches['cmedians']], ["mean", "median"],
540 loc=legend_location, bbox_to_anchor=legend_bbox_to_anchor)
541 else:
542 plt.boxplot(agg_data, 0, '', positions=positions, labels=labels, widths=step / 4)
koder aka kdanilov108ac362017-01-19 20:17:16 +0200543
koder aka kdanilov108ac362017-01-19 20:17:16 +0200544 plt.xlim(min(times), max(times))
koder aka kdanilov108ac362017-01-19 20:17:16 +0200545 plt.xlabel("Time, seconds from test begin, sampled for ~{} seconds".format(int(step)))
546 plt.ylabel("Latency, ms")
547 plt.title(title)
548 apply_style(style, eng=True, no_legend=True)
549
550
551@provide_plot
koder aka kdanilova732a602017-02-01 20:29:56 +0200552def plot_heatmap(title: str,
553 ts: TimeSeries,
554 bins_vals: List[int],
555 colors: Any = ColorProfile,
556 style: Any = StyleProfile) -> None:
koder aka kdanilov108ac362017-01-19 20:17:16 +0200557
koder aka kdanilova732a602017-02-01 20:29:56 +0200558 assert len(ts.data.shape) == 2
559 assert ts.data.shape[1] == len(bins_vals)
560
561 total_hist = ts.data.sum(axis=0)
562
563 # idx1, idx2 = hist_outliers_perc(total_hist, style.outliers_lat)
564 idx1, idx2 = ts_hist_outliers_perc(ts.data, bounds_perc=style.outliers_lat)
565
566 # don't cut too many bins
567 min_bins_left = style.hm_hist_bins_count
568 if idx2 - idx1 < min_bins_left:
569 missed = min_bins_left - (idx2 - idx1) // 2
570 idx2 = min(len(total_hist), idx2 + missed)
571 idx1 = max(0, idx1 - missed)
572
573 data = ts.data[:, idx1:idx2]
574 bins_vals = bins_vals[idx1:idx2]
575
576 # don't using rebin_histogram here, as we need apply same bins for many arrays
577 step = (bins_vals[-1] - bins_vals[0]) / style.hm_hist_bins_count
578 new_bins_edges = numpy.arange(style.hm_hist_bins_count) * step + bins_vals[0]
579 bin_mapping = numpy.clip(numpy.searchsorted(new_bins_edges, bins_vals) - 1, 0, len(new_bins_edges) - 1)
580
581 # map origin bins ranges to heatmap bins, iterate over rows
582 cmap = []
583 for line in data:
584 curr_bins = [0] * style.hm_hist_bins_count
585 for idx, count in zip(bin_mapping, line):
586 curr_bins[idx] += count
587 cmap.append(curr_bins)
588 ncmap = numpy.array(cmap)
589
590 xmin = 0
591 xmax = (ts.times[-1] - ts.times[0]) / 1000 + 1
592 ymin = new_bins_edges[0]
593 ymax = new_bins_edges[-1]
594
595 fig, ax = plt.subplots(figsize=style.figsize)
596
597 if style.heatmap_interpolation == '1d':
598 interpolation = 'none'
599 res = []
600 for column in ncmap:
601 new_x = numpy.linspace(0, len(column), style.heatmap_interpolation_points)
602 old_x = numpy.arange(len(column)) + 0.5
603 new_vals = numpy.interp(new_x, old_x, column)
604 res.append(new_vals)
605 ncmap = numpy.array(res)
606 else:
607 interpolation = style.heatmap_interpolation
608
609 ax.imshow(ncmap[:,::-1].T,
610 interpolation=interpolation,
611 extent=(xmin, xmax, ymin, ymax),
612 cmap=colors.imshow_colormap)
613
614 ax.set_aspect((xmax - xmin) / (ymax - ymin) * (6 / 9))
615 ax.set_ylabel("Latency, ms")
616 ax.set_xlabel("Test time, s")
617
618 plt.title(title)
619
koder aka kdanilov108ac362017-01-19 20:17:16 +0200620
621@provide_plot
622def io_chart(title: str,
623 legend: str,
624 iosums: List[IOSummary],
625 iops_log_spine: bool = False,
626 lat_log_spine: bool = False,
627 colors: Any = ColorProfile,
628 style: Any = StyleProfile) -> None:
629
630 # -------------- MAGIC VALUES ---------------------
631 # IOPS bar width
632 width = 0.35
633
634 # offset from center of bar to deviation/confidence range indicator
635 err_x_offset = 0.05
636
koder aka kdanilov108ac362017-01-19 20:17:16 +0200637 # extra space on top and bottom, comparing to maximal tight layout
638 extra_y_space = 0.05
639
640 # additional spine for BW/IOPS on left side of plot
641 extra_io_spine_x_offset = -0.1
642
643 # extra space on left and right sides
644 extra_x_space = 0.5
645
646 # legend location settings
647 legend_location = "center left"
648 legend_bbox_to_anchor = (1.1, 0.81)
649
650 # plot box size adjust (only plot, not spines and legend)
651 plot_box_adjust = {'right': 0.66}
652 # -------------- END OF MAGIC VALUES ---------------------
653
654 block_size = iosums[0].block_size
655 lc = len(iosums)
656 xt = list(range(1, lc + 1))
657
658 # x coordinate of middle of the bars
659 xpos = [i - width / 2 for i in xt]
660
661 # import matplotlib.gridspec as gridspec
662 # gs = gridspec.GridSpec(1, 3, width_ratios=[1, 4, 1])
663 # p1 = plt.subplot(gs[1])
664
koder aka kdanilova732a602017-02-01 20:29:56 +0200665 fig, p1 = plt.subplots(figsize=StyleProfile.figsize)
koder aka kdanilov108ac362017-01-19 20:17:16 +0200666
667 # plot IOPS/BW bars
668 if block_size >= LARGE_BLOCKS:
669 iops_primary = False
670 coef = MiB2KiB
671 p1.set_ylabel("BW (MiBps)")
672 else:
673 iops_primary = True
674 coef = block_size
675 p1.set_ylabel("IOPS")
676
677 p1.bar(xpos, [iosum.bw.average / coef for iosum in iosums], width=width, color=colors.box_color, label=legend)
678
679 # set correct x limits for primary IO spine
680 min_io = min(iosum.bw.average - iosum.bw.deviation * style.dev_range_x for iosum in iosums)
681 max_io = max(iosum.bw.average + iosum.bw.deviation * style.dev_range_x for iosum in iosums)
682 border = (max_io - min_io) * extra_y_space
683 io_lims = (min_io - border, max_io + border)
684
685 p1.set_ylim(io_lims[0] / coef, io_lims[-1] / coef)
686
687 # plot deviation and confidence error ranges
688 err1_legend = err2_legend = None
689 for pos, iosum in zip(xpos, iosums):
690 err1_legend = p1.errorbar(pos + width / 2 - err_x_offset,
691 iosum.bw.average / coef,
692 iosum.bw.deviation * style.dev_range_x / coef,
693 alpha=colors.subinfo_alpha,
694 color=colors.suppl_color1) # 'magenta'
695 err2_legend = p1.errorbar(pos + width / 2 + err_x_offset,
696 iosum.bw.average / coef,
697 iosum.bw.confidence / coef,
698 alpha=colors.subinfo_alpha,
699 color=colors.suppl_color2) # 'teal'
700
701 if style.grid:
702 p1.grid(True)
703
704 handles1, labels1 = p1.get_legend_handles_labels()
705
706 handles1 += [err1_legend, err2_legend]
707 labels1 += ["{}% dev".format(style.dev_perc),
708 "{}% conf".format(int(100 * iosums[0].bw.confidence_level))]
709
710 # extra y spine for latency on right side
711 p2 = p1.twinx()
712
713 # plot median and 95 perc latency
714 p2.plot(xt, [iosum.lat.perc_50 for iosum in iosums], label="lat med")
715 p2.plot(xt, [iosum.lat.perc_95 for iosum in iosums], label="lat 95%")
716
717 # limit and label x spine
718 plt.xlim(extra_x_space, lc + extra_x_space)
719 plt.xticks(xt, ["{0} * {1}".format(iosum.qd, iosum.nodes_count) for iosum in iosums])
720 p1.set_xlabel("QD * Test node count")
721
722 # apply log scales for X spines, if set
723 if iops_log_spine:
724 p1.set_yscale('log')
725
726 if lat_log_spine:
727 p2.set_yscale('log')
728
729 # extra y spine for BW/IOPS on left side
730 if style.extra_io_spine:
731 p3 = p1.twinx()
732 if iops_log_spine:
733 p3.set_yscale('log')
734
735 if iops_primary:
736 p3.set_ylabel("BW (MiBps)")
737 p3.set_ylim(io_lims[0] / MiB2KiB, io_lims[1] / MiB2KiB)
738 else:
739 p3.set_ylabel("IOPS")
740 p3.set_ylim(io_lims[0] / block_size, io_lims[1] / block_size)
741
742 p3.spines["left"].set_position(("axes", extra_io_spine_x_offset))
743 p3.spines["left"].set_visible(True)
744 p3.yaxis.set_label_position('left')
745 p3.yaxis.set_ticks_position('left')
746
747 p2.set_ylabel("Latency (ms)")
748
749 plt.title(title)
750
751 # legend box
752 handles2, labels2 = p2.get_legend_handles_labels()
koder aka kdanilova732a602017-02-01 20:29:56 +0200753 plt.legend(handles1 + handles2, labels1 + labels2,
754 loc=legend_location,
755 bbox_to_anchor=legend_bbox_to_anchor)
koder aka kdanilov108ac362017-01-19 20:17:16 +0200756
757 # adjust central box size to fit legend
758 plt.subplots_adjust(**plot_box_adjust)
759 apply_style(style, eng=False, no_legend=True)
760
761
762# -------------------- REPORT HELPERS --------------------------------------------------------------------------------
763
764
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200765class HTMLBlock:
766 data = None # type: str
767 js_links = [] # type: List[str]
768 css_links = [] # type: List[str]
koder aka kdanilova732a602017-02-01 20:29:56 +0200769 order_attr = None # type: Any
770
771 def __init__(self, data: str, order_attr: Any = None) -> None:
772 self.data = data
773 self.order_attr = order_attr
774
775 def __eq__(self, o: object) -> bool:
776 return o.order_attr == self.order_attr # type: ignore
777
778 def __lt__(self, o: object) -> bool:
779 return o.order_attr > self.order_attr # type: ignore
780
781
782class Table:
783 def __init__(self, header: List[str]) -> None:
784 self.header = header
785 self.data = []
786
787 def add_line(self, values: List[str]) -> None:
788 self.data.append(values)
789
790 def html(self):
791 return html.table("", self.header, self.data)
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200792
793
koder aka kdanilov108ac362017-01-19 20:17:16 +0200794class Menu1st:
795 engineering = "Engineering"
796 summary = "Summary"
koder aka kdanilova732a602017-02-01 20:29:56 +0200797 per_job = "Per Job"
koder aka kdanilov108ac362017-01-19 20:17:16 +0200798
799
800class Menu2ndEng:
801 iops_time = "IOPS(time)"
802 hist = "IOPS/lat overall histogram"
803 lat_time = "Lat(time)"
804
805
806class Menu2ndSumm:
807 io_lat_qd = "IO & Lat vs QD"
808
809
koder aka kdanilova732a602017-02-01 20:29:56 +0200810menu_1st_order = [Menu1st.summary, Menu1st.engineering, Menu1st.per_job]
koder aka kdanilov108ac362017-01-19 20:17:16 +0200811
812
813# -------------------- REPORTS --------------------------------------------------------------------------------------
814
815
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200816class Reporter(metaclass=abc.ABCMeta):
koder aka kdanilova732a602017-02-01 20:29:56 +0200817 suite_types = set() # type: Set[str]
818
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200819 @abc.abstractmethod
koder aka kdanilova732a602017-02-01 20:29:56 +0200820 def get_divs(self, suite: SuiteConfig, storage: ResultStorage) -> Iterator[Tuple[str, str, HTMLBlock]]:
821 pass
822
823
824class JobReporter(metaclass=abc.ABCMeta):
825 suite_type = set() # type: Set[str]
826
827 @abc.abstractmethod
828 def get_divs(self,
829 suite: SuiteConfig,
830 job: JobConfig,
831 storage: ResultStorage) -> Iterator[Tuple[str, str, HTMLBlock]]:
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200832 pass
833
834
835# Main performance report
836class PerformanceSummary(Reporter):
koder aka kdanilova732a602017-02-01 20:29:56 +0200837 """Aggregated summary fro storage"""
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200838
839
840# Main performance report
koder aka kdanilov108ac362017-01-19 20:17:16 +0200841class IO_QD(Reporter):
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200842 """Creates graph, which show how IOPS and Latency depend on QD"""
koder aka kdanilova732a602017-02-01 20:29:56 +0200843 suite_types = {'fio'}
844
845 def get_divs(self, suite: SuiteConfig, rstorage: ResultStorage) -> Iterator[Tuple[str, str, HTMLBlock]]:
846 ts_map = defaultdict(list) # type: Dict[FioJobParams, List[Tuple[SuiteConfig, FioJobConfig]]]
847 str_summary = {} # type: Dict[FioJobParams, List[IOSummary]]
koder aka kdanilov108ac362017-01-19 20:17:16 +0200848 for job in rstorage.iter_job(suite):
849 fjob = cast(FioJobConfig, job)
koder aka kdanilova732a602017-02-01 20:29:56 +0200850 fjob_no_qd = cast(FioJobParams, fjob.params.copy(qd=None))
851 str_summary[fjob_no_qd] = (fjob_no_qd.summary, fjob_no_qd.long_summary)
852 ts_map[fjob_no_qd].append((suite, fjob))
koder aka kdanilov108ac362017-01-19 20:17:16 +0200853
koder aka kdanilova732a602017-02-01 20:29:56 +0200854 for tpl, suites_jobs in ts_map.items():
855 if len(suites_jobs) > StyleProfile.min_iops_vs_qd_jobs:
856 iosums = [make_iosum(rstorage, suite, job) for suite, job in suites_jobs]
857 iosums.sort(key=lambda x: x.qd)
858 summary, summary_long = str_summary[tpl]
859 ds = DataSource(suite_id=suite.storage_id,
860 job_id=summary,
861 node_id=AGG_TAG,
862 sensor="fio",
863 dev=AGG_TAG,
864 metric="io_over_qd",
865 tag="svg")
koder aka kdanilov108ac362017-01-19 20:17:16 +0200866
koder aka kdanilova732a602017-02-01 20:29:56 +0200867 title = "IOPS, BW, Lat vs. QD.\n" + summary_long
868 fpath = io_chart(rstorage, ds, title=title, legend="IOPS/BW", iosums=iosums) # type: str
869 yield Menu1st.summary, Menu2ndSumm.io_lat_qd, HTMLBlock(html.img(fpath))
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200870
871
872# Linearization report
873class IOPS_Bsize(Reporter):
874 """Creates graphs, which show how IOPS and Latency depend on block size"""
875
876
koder aka kdanilova732a602017-02-01 20:29:56 +0200877def summ_sensors(rstorage: ResultStorage,
878 nodes: List[str],
879 sensor: str,
880 metric: str,
881 time_range: Tuple[int, int]) -> Optional[numpy.array]:
882
883 res = None # type: Optional[numpy.array]
884 for node_id in nodes:
885 for _, groups in rstorage.iter_sensors(node_id=node_id, sensor=sensor, metric=metric):
886 data = get_sensor_for_time_range(rstorage,
887 node_id=node_id,
888 sensor=sensor,
889 dev=groups['dev'],
890 metric=metric,
891 time_range=time_range)
892 if res is None:
893 res = data
894 else:
895 res += data
896 return res
897
898
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200899# IOPS/latency distribution
koder aka kdanilova732a602017-02-01 20:29:56 +0200900class StatInfo(JobReporter):
901 """Statistic info for job results"""
902 suite_types = {'fio'}
903
904 def get_divs(self, suite: SuiteConfig, job: JobConfig,
905 rstorage: ResultStorage) -> Iterator[Tuple[str, str, HTMLBlock]]:
906
907 fjob = cast(FioJobConfig, job)
908 io_sum = make_iosum(rstorage, suite, fjob)
909
910 summary_data = [
911 ["Summary", job.params.long_summary],
912 ]
913
914 res = html.H2(html.center("Test summary"))
915 res += html.table("Test info", None, summary_data)
916 stat_data_headers = ["Name", "Average ~ Dev", "Conf interval", "Mediana", "Mode", "Kurt / Skew", "95%", "99%"]
917
918 KB = 1024
919 bw_data = ["Bandwidth",
920 "{}Bps ~ {}Bps".format(b2ssize(io_sum.bw.average * KB), b2ssize(io_sum.bw.deviation * KB)),
921 b2ssize(io_sum.bw.confidence * KB) + "Bps",
922 b2ssize(io_sum.bw.perc_50 * KB) + "Bps",
923 "-",
924 "{:.2f} / {:.2f}".format(io_sum.bw.kurt, io_sum.bw.skew),
925 b2ssize(io_sum.bw.perc_5 * KB) + "Bps",
926 b2ssize(io_sum.bw.perc_1 * KB) + "Bps"]
927
928 iops_data = ["IOPS",
929 "{}IOPS ~ {}IOPS".format(b2ssize_10(io_sum.bw.average / fjob.bsize),
930 b2ssize_10(io_sum.bw.deviation / fjob.bsize)),
931 b2ssize_10(io_sum.bw.confidence / fjob.bsize) + "IOPS",
932 b2ssize_10(io_sum.bw.perc_50 / fjob.bsize) + "IOPS",
933 "-",
934 "{:.2f} / {:.2f}".format(io_sum.bw.kurt, io_sum.bw.skew),
935 b2ssize_10(io_sum.bw.perc_5 / fjob.bsize) + "IOPS",
936 b2ssize_10(io_sum.bw.perc_1 / fjob.bsize) + "IOPS"]
937
938 MICRO = 1000000
939 # latency
940 lat_data = ["Latency",
941 "-",
942 "-",
943 b2ssize_10(io_sum.bw.perc_50 / MICRO) + "s",
944 "-",
945 "-",
946 b2ssize_10(io_sum.bw.perc_95 / MICRO) + "s",
947 b2ssize_10(io_sum.bw.perc_99 / MICRO) + "s"]
948
949 # sensor usage
950 stat_data = [iops_data, bw_data, lat_data]
951 res += html.table("Load stats info", stat_data_headers, stat_data)
952
953 resource_headers = ["Resource", "Usage count", "Proportional to work done"]
954
955 io_transfered = io_sum.bw.data.sum() * KB
956 resource_data = [
957 ["IO made", b2ssize_10(io_transfered / KB / fjob.bsize) + "OP", "-"],
958 ["Data transfered", b2ssize(io_transfered) + "B", "-"]
959 ]
960
961
962 storage = rstorage.storage
963 nodes = storage.load_list(NodeInfo, 'all_nodes') # type: List[NodeInfo]
964
965 storage_nodes = [node.node_id for node in nodes if node.roles.intersection(STORAGE_ROLES)]
966 test_nodes = [node.node_id for node in nodes if "testnode" in node.roles]
967
968 trange = [job.reliable_info_range[0] / 1000, job.reliable_info_range[1] / 1000]
969 ops_done = io_transfered / fjob.bsize / KB
970
971 all_metrics = [
972 ("Test nodes net send", 'net-io', 'send_bytes', b2ssize, test_nodes, "B", io_transfered),
973 ("Test nodes net recv", 'net-io', 'recv_bytes', b2ssize, test_nodes, "B", io_transfered),
974
975 ("Test nodes disk write", 'block-io', 'sectors_written', b2ssize, test_nodes, "B", io_transfered),
976 ("Test nodes disk read", 'block-io', 'sectors_read', b2ssize, test_nodes, "B", io_transfered),
977 ("Test nodes writes", 'block-io', 'writes_completed', b2ssize_10, test_nodes, "OP", ops_done),
978 ("Test nodes reads", 'block-io', 'reads_completed', b2ssize_10, test_nodes, "OP", ops_done),
979
980 ("Storage nodes net send", 'net-io', 'send_bytes', b2ssize, storage_nodes, "B", io_transfered),
981 ("Storage nodes net recv", 'net-io', 'recv_bytes', b2ssize, storage_nodes, "B", io_transfered),
982
983 ("Storage nodes disk write", 'block-io', 'sectors_written', b2ssize, storage_nodes, "B", io_transfered),
984 ("Storage nodes disk read", 'block-io', 'sectors_read', b2ssize, storage_nodes, "B", io_transfered),
985 ("Storage nodes writes", 'block-io', 'writes_completed', b2ssize_10, storage_nodes, "OP", ops_done),
986 ("Storage nodes reads", 'block-io', 'reads_completed', b2ssize_10, storage_nodes, "OP", ops_done),
987 ]
988
989 all_agg = {}
990
991 for descr, sensor, metric, ffunc, nodes, units, denom in all_metrics:
992 if not nodes:
993 continue
994
995 res_arr = summ_sensors(rstorage, nodes=nodes, sensor=sensor, metric=metric, time_range=trange)
996 if res_arr is None:
997 continue
998
999 agg = res_arr.sum()
1000 resource_data.append([descr, ffunc(agg) + units, "{:.1f}".format(agg / denom)])
1001 all_agg[descr] = agg
1002
1003
1004 cums = [
1005 ("Test nodes writes", "Test nodes reads", "Total test ops", b2ssize_10, "OP", ops_done),
1006 ("Storage nodes writes", "Storage nodes reads", "Total storage ops", b2ssize_10, "OP", ops_done),
1007 ("Storage nodes disk write", "Storage nodes disk read", "Total storage IO size", b2ssize,
1008 "B", io_transfered),
1009 ("Test nodes disk write", "Test nodes disk read", "Total test nodes IO size", b2ssize, "B", io_transfered),
1010 ]
1011
1012 for name1, name2, descr, ffunc, units, denom in cums:
1013 if name1 in all_agg and name2 in all_agg:
1014 agg = all_agg[name1] + all_agg[name2]
1015 resource_data.append([descr, ffunc(agg) + units, "{:.1f}".format(agg / denom)])
1016
1017 res += html.table("Resources usage", resource_headers, resource_data)
1018
1019 yield Menu1st.per_job, job.summary, HTMLBlock(res)
1020
1021
1022# IOPS/latency distribution
1023class IOHist(JobReporter):
koder aka kdanilov7f59d562016-12-26 01:34:23 +02001024 """IOPS.latency distribution histogram"""
koder aka kdanilova732a602017-02-01 20:29:56 +02001025 suite_types = {'fio'}
koder aka kdanilov108ac362017-01-19 20:17:16 +02001026
koder aka kdanilova732a602017-02-01 20:29:56 +02001027 def get_divs(self,
1028 suite: SuiteConfig,
1029 job: JobConfig,
1030 rstorage: ResultStorage) -> Iterator[Tuple[str, str, HTMLBlock]]:
koder aka kdanilov108ac362017-01-19 20:17:16 +02001031
koder aka kdanilova732a602017-02-01 20:29:56 +02001032 fjob = cast(FioJobConfig, job)
koder aka kdanilov108ac362017-01-19 20:17:16 +02001033
koder aka kdanilova732a602017-02-01 20:29:56 +02001034 yield Menu1st.per_job, fjob.summary, HTMLBlock(html.H2(html.center("Load histograms")))
koder aka kdanilov108ac362017-01-19 20:17:16 +02001035
koder aka kdanilova732a602017-02-01 20:29:56 +02001036 agg_lat = get_aggregated(rstorage, suite, fjob, "lat")
1037 bins_edges = numpy.array(get_lat_vals(agg_lat.data.shape[1]), dtype='float32') / 1000 # convert us to ms
1038 lat_stat_prop = calc_histo_stat_props(agg_lat, bins_edges, bins_count=StyleProfile.hist_lat_boxes)
koder aka kdanilov108ac362017-01-19 20:17:16 +02001039
koder aka kdanilova732a602017-02-01 20:29:56 +02001040 # import IPython
1041 # IPython.embed()
1042
1043 long_summary = cast(FioJobParams, fjob.params).long_summary
1044
1045 title = "Latency distribution"
1046 units = "ms"
1047
1048 fpath = plot_hist(rstorage, agg_lat.source(tag='hist.svg'), title, units, lat_stat_prop) # type: str
1049 yield Menu1st.per_job, fjob.summary, HTMLBlock(html.img(fpath))
1050
1051 agg_io = get_aggregated(rstorage, suite, fjob, "bw")
1052
1053 if fjob.bsize >= LARGE_BLOCKS:
1054 title = "BW distribution"
1055 units = "MiBps"
1056 agg_io.data //= MiB2KiB
1057 else:
1058 title = "IOPS distribution"
1059 agg_io.data //= fjob.bsize
1060 units = "IOPS"
1061
1062 io_stat_prop = calc_norm_stat_props(agg_io, bins_count=StyleProfile.hist_boxes)
1063 fpath = plot_hist(rstorage, agg_io.source(tag='hist.svg'), title, units, io_stat_prop) # type: str
1064 yield Menu1st.per_job, fjob.summary, HTMLBlock(html.img(fpath))
koder aka kdanilov7f59d562016-12-26 01:34:23 +02001065
1066
koder aka kdanilov108ac362017-01-19 20:17:16 +02001067# IOPS/latency over test time for each job
koder aka kdanilova732a602017-02-01 20:29:56 +02001068class IOTime(JobReporter):
koder aka kdanilov7f59d562016-12-26 01:34:23 +02001069 """IOPS/latency during test"""
koder aka kdanilova732a602017-02-01 20:29:56 +02001070 suite_types = {'fio'}
koder aka kdanilov108ac362017-01-19 20:17:16 +02001071
koder aka kdanilova732a602017-02-01 20:29:56 +02001072 def get_divs(self,
1073 suite: SuiteConfig,
1074 job: JobConfig,
1075 rstorage: ResultStorage) -> Iterator[Tuple[str, str, HTMLBlock]]:
koder aka kdanilov108ac362017-01-19 20:17:16 +02001076
koder aka kdanilova732a602017-02-01 20:29:56 +02001077 fjob = cast(FioJobConfig, job)
koder aka kdanilov108ac362017-01-19 20:17:16 +02001078
koder aka kdanilova732a602017-02-01 20:29:56 +02001079 yield Menu1st.per_job, fjob.summary, HTMLBlock(html.H2(html.center("Load over time")))
koder aka kdanilov108ac362017-01-19 20:17:16 +02001080
koder aka kdanilova732a602017-02-01 20:29:56 +02001081 agg_io = get_aggregated(rstorage, suite, fjob, "bw")
1082 if fjob.bsize >= LARGE_BLOCKS:
1083 title = "Bandwidth"
1084 units = "MiBps"
1085 agg_io.data //= MiB2KiB
1086 else:
1087 title = "IOPS"
1088 agg_io.data //= fjob.bsize
1089 units = "IOPS"
koder aka kdanilov108ac362017-01-19 20:17:16 +02001090
koder aka kdanilova732a602017-02-01 20:29:56 +02001091 fpath = plot_v_over_time(rstorage, agg_io.source(tag='ts.svg'), title, units, agg_io) # type: str
1092 yield Menu1st.per_job, fjob.summary, HTMLBlock(html.img(fpath))
koder aka kdanilov108ac362017-01-19 20:17:16 +02001093
koder aka kdanilova732a602017-02-01 20:29:56 +02001094 agg_lat = get_aggregated(rstorage, suite, fjob, "lat")
1095 bins_edges = numpy.array(get_lat_vals(agg_lat.data.shape[1]), dtype='float32') / 1000
1096 title = "Latency"
koder aka kdanilov108ac362017-01-19 20:17:16 +02001097
koder aka kdanilova732a602017-02-01 20:29:56 +02001098 fpath = plot_lat_over_time(rstorage, agg_lat.source(tag='ts.svg'), title, agg_lat, bins_edges) # type: str
1099 yield Menu1st.per_job, fjob.summary, HTMLBlock(html.img(fpath))
koder aka kdanilov108ac362017-01-19 20:17:16 +02001100
koder aka kdanilova732a602017-02-01 20:29:56 +02001101 title = "Latency heatmap"
1102 fpath = plot_heatmap(rstorage, agg_lat.source(tag='hmap.png'), title, agg_lat, bins_edges) # type: str
koder aka kdanilov108ac362017-01-19 20:17:16 +02001103
koder aka kdanilova732a602017-02-01 20:29:56 +02001104 yield Menu1st.per_job, fjob.summary, HTMLBlock(html.img(fpath))
koder aka kdanilov108ac362017-01-19 20:17:16 +02001105
1106
1107class ResourceUsage:
1108 def __init__(self, io_r_ops: int, io_w_ops: int, io_r_kb: int, io_w_kb: int) -> None:
1109 self.io_w_ops = io_w_ops
1110 self.io_r_ops = io_r_ops
1111 self.io_w_kb = io_w_kb
1112 self.io_r_kb = io_r_kb
1113
1114 self.cpu_used_user = None # type: int
1115 self.cpu_used_sys = None # type: int
1116 self.cpu_wait_io = None # type: int
1117
1118 self.net_send_packets = None # type: int
1119 self.net_recv_packets = None # type: int
1120 self.net_send_kb = None # type: int
1121 self.net_recv_kb = None # type: int
koder aka kdanilov7f59d562016-12-26 01:34:23 +02001122
1123
1124# Cluster load over test time
koder aka kdanilova732a602017-02-01 20:29:56 +02001125class ClusterLoad(JobReporter):
koder aka kdanilov7f59d562016-12-26 01:34:23 +02001126 """IOPS/latency during test"""
1127
koder aka kdanilova732a602017-02-01 20:29:56 +02001128 # TODO: units should came from sensor
koder aka kdanilov108ac362017-01-19 20:17:16 +02001129 storage_sensors = [
koder aka kdanilova732a602017-02-01 20:29:56 +02001130 ('block-io', 'reads_completed', "Read ops", 'iops'),
1131 ('block-io', 'writes_completed', "Write ops", 'iops'),
1132 ('block-io', 'sectors_read', "Read kb", 'kb'),
1133 ('block-io', 'sectors_written', "Write kb", 'kb'),
koder aka kdanilov108ac362017-01-19 20:17:16 +02001134 ]
1135
koder aka kdanilova732a602017-02-01 20:29:56 +02001136 def get_divs(self,
1137 suite: SuiteConfig,
1138 job: JobConfig,
1139 rstorage: ResultStorage) -> Iterator[Tuple[str, str, HTMLBlock]]:
koder aka kdanilov108ac362017-01-19 20:17:16 +02001140 # split nodes on test and other
1141 storage = rstorage.storage
1142 nodes = storage.load_list(NodeInfo, "all_nodes") # type: List[NodeInfo]
1143
koder aka kdanilova732a602017-02-01 20:29:56 +02001144 yield Menu1st.per_job, job.summary, HTMLBlock(html.H2(html.center("Cluster load")))
koder aka kdanilov108ac362017-01-19 20:17:16 +02001145 test_nodes = {node.node_id for node in nodes if 'testnode' in node.roles}
1146 cluster_nodes = {node.node_id for node in nodes if 'testnode' not in node.roles}
1147
koder aka kdanilova732a602017-02-01 20:29:56 +02001148 # convert ms to s
1149 time_range = (job.reliable_info_range[0] // MS2S, job.reliable_info_range[1] // MS2S)
1150 len = time_range[1] - time_range[0]
1151 for sensor, metric, sensor_title, units in self.storage_sensors:
1152 sum_testnode = numpy.zeros((len,))
1153 sum_other = numpy.zeros((len,))
1154 for path, groups in rstorage.iter_sensors(sensor=sensor, metric=metric):
1155 # todo: should return sensor units
1156 data = get_sensor_for_time_range(rstorage,
1157 groups['node_id'],
1158 sensor,
1159 groups['dev'],
1160 metric, time_range)
1161 if groups['node_id'] in test_nodes:
1162 sum_testnode += data
1163 else:
1164 sum_other += data
koder aka kdanilov108ac362017-01-19 20:17:16 +02001165
koder aka kdanilova732a602017-02-01 20:29:56 +02001166 ds = DataSource(suite_id=suite.storage_id,
1167 job_id=job.storage_id,
1168 node_id="test_nodes",
1169 sensor=sensor,
1170 dev=AGG_TAG,
1171 metric=metric,
1172 tag="ts.svg")
koder aka kdanilov108ac362017-01-19 20:17:16 +02001173
koder aka kdanilova732a602017-02-01 20:29:56 +02001174 # s to ms
1175 ts = TimeSeries(name="",
1176 times=numpy.arange(*time_range) * MS2S,
1177 data=sum_testnode,
1178 raw=None,
1179 units=units,
1180 time_units="us",
1181 source=ds)
1182 fpath = plot_v_over_time(rstorage, ds, sensor_title, sensor_title, ts=ts) # type: str
1183 yield Menu1st.per_job, job.summary, HTMLBlock(html.img(fpath))
koder aka kdanilov108ac362017-01-19 20:17:16 +02001184
1185
1186# Ceph cluster summary
1187class ResourceConsumption(Reporter):
1188 """Resources consumption report, only text"""
1189
koder aka kdanilov7f59d562016-12-26 01:34:23 +02001190
1191# Node load over test time
1192class NodeLoad(Reporter):
1193 """IOPS/latency during test"""
1194
1195
1196# Ceph cluster summary
1197class CephClusterSummary(Reporter):
1198 """IOPS/latency during test"""
1199
1200
koder aka kdanilov7f59d562016-12-26 01:34:23 +02001201# TODO: Ceph operation breakout report
1202# TODO: Resource consumption for different type of test
1203
1204
koder aka kdanilov108ac362017-01-19 20:17:16 +02001205# ------------------------------------------ REPORT STAGES -----------------------------------------------------------
1206
1207
1208class HtmlReportStage(Stage):
1209 priority = StepOrder.REPORT
1210
1211 def run(self, ctx: TestRun) -> None:
1212 rstorage = ResultStorage(ctx.storage)
koder aka kdanilova732a602017-02-01 20:29:56 +02001213
1214 job_reporters = [StatInfo(), IOTime(), IOHist(), ClusterLoad()] # type: List[JobReporter]
1215 reporters = [IO_QD()] # type: List[Reporter]
1216
1217 # job_reporters = [ClusterLoad()]
1218 # reporters = []
koder aka kdanilov108ac362017-01-19 20:17:16 +02001219
1220 root_dir = os.path.dirname(os.path.dirname(wally.__file__))
1221 doc_templ_path = os.path.join(root_dir, "report_templates/index.html")
1222 report_template = open(doc_templ_path, "rt").read()
1223 css_file_src = os.path.join(root_dir, "report_templates/main.css")
1224 css_file = open(css_file_src, "rt").read()
1225
1226 menu_block = []
1227 content_block = []
1228 link_idx = 0
1229
koder aka kdanilova732a602017-02-01 20:29:56 +02001230 # matplotlib.rcParams.update(ctx.config.reporting.matplotlib_params.raw())
1231 # ColorProfile.__dict__.update(ctx.config.reporting.colors.raw())
1232 # StyleProfile.__dict__.update(ctx.config.reporting.style.raw())
koder aka kdanilov108ac362017-01-19 20:17:16 +02001233
koder aka kdanilova732a602017-02-01 20:29:56 +02001234 items = defaultdict(lambda: defaultdict(list)) # type: Dict[str, Dict[str, List[HTMLBlock]]]
1235
1236 # TODO: filter reporters
koder aka kdanilov108ac362017-01-19 20:17:16 +02001237 for suite in rstorage.iter_suite(FioTest.name):
koder aka kdanilova732a602017-02-01 20:29:56 +02001238 all_jobs = list(rstorage.iter_job(suite))
1239 all_jobs.sort(key=lambda job: job.params)
1240 for job in all_jobs:
1241 for reporter in job_reporters:
1242 for block, item, html in reporter.get_divs(suite, job, rstorage):
1243 items[block][item].append(html)
1244 if DEBUG:
1245 break
1246
koder aka kdanilov108ac362017-01-19 20:17:16 +02001247 for reporter in reporters:
1248 for block, item, html in reporter.get_divs(suite, rstorage):
1249 items[block][item].append(html)
1250
koder aka kdanilova732a602017-02-01 20:29:56 +02001251 if DEBUG:
1252 break
1253
koder aka kdanilov108ac362017-01-19 20:17:16 +02001254 for idx_1st, menu_1st in enumerate(sorted(items, key=lambda x: menu_1st_order.index(x))):
1255 menu_block.append(
1256 '<a href="#item{}" class="nav-group" data-toggle="collapse" data-parent="#MainMenu">{}</a>'
1257 .format(idx_1st, menu_1st)
1258 )
1259 menu_block.append('<div class="collapse" id="item{}">'.format(idx_1st))
1260 for menu_2nd in sorted(items[menu_1st]):
1261 menu_block.append(' <a href="#content{}" class="nav-group-item">{}</a>'
1262 .format(link_idx, menu_2nd))
1263 content_block.append('<div id="content{}">'.format(link_idx))
koder aka kdanilova732a602017-02-01 20:29:56 +02001264 content_block.extend(" " + x.data for x in items[menu_1st][menu_2nd])
koder aka kdanilov108ac362017-01-19 20:17:16 +02001265 content_block.append('</div>')
1266 link_idx += 1
1267 menu_block.append('</div>')
1268
1269 report = report_template.replace("{{{menu}}}", ("\n" + " " * 16).join(menu_block))
1270 report = report.replace("{{{content}}}", ("\n" + " " * 16).join(content_block))
1271 report_path = rstorage.put_report(report, "index.html")
1272 rstorage.put_report(css_file, "main.css")
1273 logger.info("Report is stored into %r", report_path)
1274
1275
1276class ConsoleReportStage(Stage):
1277
1278 priority = StepOrder.REPORT
1279
1280 def run(self, ctx: TestRun) -> None:
1281 # TODO(koder): load data from storage
1282 raise NotImplementedError("...")