blob: 6729584580164849bfdb679d5c22a0df21a6381d [file] [log] [blame]
kdanylov aka koder3a9e5db2017-05-09 20:00:44 +03001import logging
2from io import BytesIO
3from functools import wraps
4from typing import Tuple, cast, List, Callable, Optional, Any
5
6import numpy
7import scipy.stats
8import matplotlib.axis
9import matplotlib.style
10from matplotlib.ticker import FuncFormatter
11from matplotlib.figure import Figure
12import matplotlib.pyplot as plt
13
14# to make seaborn styles available
15import warnings
16with warnings.catch_warnings():
17 warnings.simplefilter("ignore")
18 import seaborn
19
20from cephlib.plot import process_heatmap_data, hmap_from_2d, do_plot_hmap_with_histo
21
22from .hlstorage import ResultStorage
23from .utils import unit_conversion_coef
24from .statistic import moving_average, moving_dev, hist_outliers_perc, find_ouliers_ts, approximate_curve
25from .result_classes import StatProps, DataSource, TimeSeries, NormStatProps
26from .report_profiles import StyleProfile, ColorProfile
27from .resources import IOSummary
28
29
30logger = logging.getLogger("wally")
31
32
33# -------------- PLOT HELPERS FUNCTIONS ------------------------------------------------------------------------------
34
35def get_emb_image(fig: Figure, file_format: str, **opts) -> bytes:
36 bio = BytesIO()
37 if file_format == 'svg':
38 fig.savefig(bio, format='svg', **opts)
39 img_start = "<!-- Created with matplotlib (http://matplotlib.org/) -->"
40 return bio.getvalue().decode("utf8").split(img_start, 1)[1].encode("utf8")
41 else:
42 fig.savefig(bio, format=file_format, **opts)
43 return bio.getvalue()
44
45
46class PlotParams:
47 def __init__(self, fig: Figure, ax: Any, title: str,
48 style: StyleProfile, colors: ColorProfile) -> None:
49 self.fig = fig
50 self.ax = ax
51 self.style = style
52 self.colors = colors
53 self.title = title
54
55
56def provide_plot(noaxis: bool = False,
57 eng: bool = False,
58 no_legend: bool = False,
59 long_plot: bool = True,
60 grid: Any = None,
61 style_name: str = 'default',
62 noadjust: bool = False) -> Callable[..., Callable[..., str]]:
63 def closure1(func: Callable[..., None]) -> Callable[..., str]:
64 @wraps(func)
65 def closure2(storage: ResultStorage,
66 style: StyleProfile,
67 colors: ColorProfile,
68 path: DataSource,
69 title: Optional[str],
70 *args, **kwargs) -> str:
71 fpath = storage.check_plot_file(path)
72 if not fpath:
73
74 assert style_name in ('default', 'ioqd')
75 mlstyle = style.default_style if style_name == 'default' else style.io_chart_style
76 with matplotlib.style.context(mlstyle):
77 file_format = path.tag.split(".")[-1]
78 fig = plt.figure(figsize=style.figsize_long if long_plot else style.figsize)
79
80 if not noaxis:
81 xlabel = kwargs.pop('xlabel', None)
82 ylabel = kwargs.pop('ylabel', None)
83 ax = fig.add_subplot(111)
84
85 if xlabel is not None:
86 ax.set_xlabel(xlabel)
87
88 if ylabel is not None:
89 ax.set_ylabel(ylabel)
90
91 if grid:
92 ax.grid(axis=grid)
93 else:
94 ax = None
95
96 if title:
97 fig.suptitle(title, fontsize=style.title_font_size)
98
99 pp = PlotParams(fig, ax, title, style, colors)
100 func(pp, *args, **kwargs)
101 apply_style(pp, eng=eng, no_legend=no_legend, noadjust=noadjust)
102
103 fpath = storage.put_plot_file(get_emb_image(fig, file_format=file_format, dpi=style.dpi), path)
104 logger.debug("Plot %s saved to %r", path, fpath)
105 plt.close(fig)
106 return fpath
107 return closure2
108 return closure1
109
110
111def apply_style(pp: PlotParams, eng: bool = True, no_legend: bool = False, noadjust: bool = False) -> None:
112
113 if (pp.style.legend_for_eng or not eng) and not no_legend:
114 if not noadjust:
115 pp.fig.subplots_adjust(right=StyleProfile.subplot_adjust_r)
116 legend_location = "center left"
117 legend_bbox_to_anchor = (1.03, 0.81)
118
119 for ax in pp.fig.axes:
120 ax.legend(loc=legend_location, bbox_to_anchor=legend_bbox_to_anchor)
121 elif not noadjust:
122 pp.fig.subplots_adjust(right=StyleProfile.subplot_adjust_r_no_legend)
123
124 if pp.style.tide_layout:
125 pp.fig.set_tight_layout(True)
126
127
128# -------------- PLOT FUNCTIONS --------------------------------------------------------------------------------------
129
130
131@provide_plot(eng=True)
132def plot_hist(pp: PlotParams, units: str, prop: StatProps) -> None:
133
134 normed_bins = prop.bins_populations / prop.bins_populations.sum()
135 bar_width = prop.bins_edges[1] - prop.bins_edges[0]
136 pp.ax.bar(prop.bins_edges, normed_bins, color=pp.colors.box_color, width=bar_width, label="Real data")
137
138 pp.ax.set(xlabel=units, ylabel="Value probability")
139
140 if isinstance(prop, NormStatProps):
141 nprop = cast(NormStatProps, prop)
142 stats = scipy.stats.norm(nprop.average, nprop.deviation)
143
144 new_edges, step = numpy.linspace(prop.bins_edges[0], prop.bins_edges[-1],
145 len(prop.bins_edges) * 10, retstep=True)
146
147 ypoints = stats.cdf(new_edges) * 11
148 ypoints = [nextpt - prevpt for (nextpt, prevpt) in zip(ypoints[1:], ypoints[:-1])]
149 xpoints = (new_edges[1:] + new_edges[:-1]) / 2
150
151 pp.ax.plot(xpoints, ypoints, color=pp.colors.primary_color, label="Expected from\nnormal\ndistribution")
152
153 pp.ax.set_xlim(left=prop.bins_edges[0])
154 if prop.log_bins:
155 pp.ax.set_xscale('log')
156
157
158@provide_plot(grid='y')
159def plot_simple_over_time(pp: PlotParams, tss: List[Tuple[str, numpy.ndarray]], average: bool = False) -> None:
160 max_len = 0
161 for name, arr in tss:
162 if average:
163 avg_vals = moving_average(arr, pp.style.avg_range)
164 if pp.style.approx_average_no_points:
165 time_points = numpy.arange(len(avg_vals))
166 avg_vals = approximate_curve(cast(List[int], time_points),
167 avg_vals,
168 cast(List[int], time_points),
169 pp.style.curve_approx_level)
170 arr = avg_vals
171 pp.ax.plot(arr, label=name)
172 max_len = max(max_len, len(arr))
173 pp.ax.set_xlim(-5, max_len + 5)
174
175
176@provide_plot(no_legend=True, grid='x', noadjust=True)
177def plot_simple_bars(pp: PlotParams,
178 names: List[str],
179 values: List[float],
180 errs: List[float] = None,
181 x_formatter: Callable[[float, float], str] = None,
182 one_point_zero_line: bool = True) -> None:
183
184 ind = numpy.arange(len(names))
185 width = 0.35
186 pp.ax.barh(ind, values, width, xerr=errs)
187
188 pp.ax.set_yticks(ind)
189 pp.ax.set_yticklabels(names)
190 pp.ax.set_xlim(0, max(val + err for val, err in zip(values, errs)) * 1.1)
191
192 if one_point_zero_line:
193 pp.ax.axvline(x=1.0, color='r', linestyle='--', linewidth=1, alpha=0.5)
194
195 if x_formatter:
196 pp.ax.xaxis.set_major_formatter(FuncFormatter(x_formatter))
197
198 pp.fig.subplots_adjust(left=0.2)
199
200
201@provide_plot(no_legend=True, long_plot=True, noaxis=True)
202def plot_hmap_from_2d(pp: PlotParams, data2d: numpy.ndarray, xlabel: str, ylabel: str,
203 bins: numpy.ndarray = None) -> None:
204 ioq1d, ranges = hmap_from_2d(data2d)
205 heatmap, bins = process_heatmap_data(ioq1d, bin_ranges=ranges, bins=bins)
206 bins_populations, _ = numpy.histogram(ioq1d, bins)
207
208 ax, _ = do_plot_hmap_with_histo(pp.fig,
209 heatmap,
210 bins_populations,
211 bins,
212 cmap=pp.colors.hmap_cmap,
213 cbar=pp.style.heatmap_colorbar,
214 histo_grid=pp.style.histo_grid)
215 ax.set(ylabel=ylabel, xlabel=xlabel)
216
217
218@provide_plot(eng=True, grid='y')
219def plot_v_over_time(pp: PlotParams, units: str, ts: TimeSeries,
220 plot_avg_dev: bool = True, plot_points: bool = True) -> None:
221
222 min_time = min(ts.times)
223
224 # convert time to ms
225 coef = float(unit_conversion_coef(ts.time_units, 's'))
226 time_points = numpy.array([(val_time - min_time) * coef for val_time in ts.times])
227
228 outliers_idxs = find_ouliers_ts(ts.data, cut_range=pp.style.outliers_q_nd)
229 outliers_4q_idxs = find_ouliers_ts(ts.data, cut_range=pp.style.outliers_hide_q_nd)
230 normal_idxs = numpy.logical_not(outliers_idxs)
231 outliers_idxs = outliers_idxs & numpy.logical_not(outliers_4q_idxs)
232 # hidden_outliers_count = numpy.count_nonzero(outliers_4q_idxs)
233
234 data = ts.data[normal_idxs]
235 data_times = time_points[normal_idxs]
236 outliers = ts.data[outliers_idxs]
237 outliers_times = time_points[outliers_idxs]
238
239 if plot_points:
240 alpha = pp.colors.noise_alpha if plot_avg_dev else 1.0
241 pp.ax.plot(data_times, data, pp.style.point_shape, color=pp.colors.primary_color, alpha=alpha, label="Data")
242 pp.ax.plot(outliers_times, outliers, pp.style.err_point_shape, color=pp.colors.err_color, label="Outliers")
243
244 has_negative_dev = False
245 plus_minus = "\xb1"
246
247 if plot_avg_dev and len(data) < pp.style.avg_range * 2:
248 logger.warning("Array %r to small to plot average over %s points", pp.title, pp.style.avg_range)
249 elif plot_avg_dev:
250 avg_vals = moving_average(data, pp.style.avg_range)
251 dev_vals = moving_dev(data, pp.style.avg_range)
252 avg_times = moving_average(data_times, pp.style.avg_range)
253
254 if (plot_points and pp.style.approx_average) or (not plot_points and pp.style.approx_average_no_points):
255 avg_vals = approximate_curve(avg_times, avg_vals, avg_times, pp.style.curve_approx_level)
256 dev_vals = approximate_curve(avg_times, dev_vals, avg_times, pp.style.curve_approx_level)
257
258 pp.ax.plot(avg_times, avg_vals, c=pp.colors.suppl_color1, label="Average")
259
260 low_vals_dev = avg_vals - dev_vals * pp.style.dev_range_x
261 hight_vals_dev = avg_vals + dev_vals * pp.style.dev_range_x
262 if (pp.style.dev_range_x - int(pp.style.dev_range_x)) < 0.01:
263 pp.ax.plot(avg_times, low_vals_dev, c=pp.colors.suppl_color2,
264 label="{}{}*stdev".format(plus_minus, int(pp.style.dev_range_x)))
265 else:
266 pp.ax.plot(avg_times, low_vals_dev, c=pp.colors.suppl_color2,
267 label="{}{}*stdev".format(plus_minus, pp.style.dev_range_x))
268 pp.ax.plot(avg_times, hight_vals_dev, c=pp.colors.suppl_color2)
269 has_negative_dev = low_vals_dev.min() < 0
270
271 pp.ax.set_xlim(-5, max(time_points) + 5)
272 pp.ax.set_xlabel("Time, seconds from test begin")
273
274 if plot_avg_dev:
275 pp.ax.set_ylabel("{}. Average and {}stddev over {} points".format(units, plus_minus, pp.style.avg_range))
276 else:
277 pp.ax.set_ylabel(units)
278
279 if has_negative_dev:
280 pp.ax.set_ylim(bottom=0)
281
282
283@provide_plot(eng=True, no_legend=True, grid='y', noadjust=True)
284def plot_lat_over_time(pp: PlotParams, ts: TimeSeries) -> None:
285 times = ts.times - min(ts.times)
286 step = len(times) / pp.style.lat_samples
287 points = [times[int(i * step + 0.5)] for i in range(pp.style.lat_samples)]
288 points.append(times[-1])
289 bounds = list(zip(points[:-1], points[1:]))
290 agg_data = []
291 positions = []
292 labels = []
293
294 for begin, end in bounds:
295 agg_hist = ts.data[begin:end].sum(axis=0)
296
297 if pp.style.violin_instead_of_box:
298 # cut outliers
299 idx1, idx2 = hist_outliers_perc(agg_hist, pp.style.outliers_lat)
300 agg_hist = agg_hist[idx1:idx2]
301 curr_bins_vals = ts.histo_bins[idx1:idx2]
302
303 correct_coef = pp.style.violin_point_count / sum(agg_hist)
304 if correct_coef > 1:
305 correct_coef = 1
306 else:
307 curr_bins_vals = ts.histo_bins
308 correct_coef = 1
309
310 vals = numpy.empty(shape=[numpy.sum(agg_hist)], dtype='float32')
311 cidx = 0
312
313 non_zero, = agg_hist.nonzero()
314 for pos in non_zero:
315 count = int(agg_hist[pos] * correct_coef + 0.5)
316
317 if count != 0:
318 vals[cidx: cidx + count] = curr_bins_vals[pos]
319 cidx += count
320
321 agg_data.append(vals[:cidx])
322 positions.append((end + begin) / 2)
323 labels.append(str((end + begin) // 2))
324
325 if pp.style.violin_instead_of_box:
326 patches = pp.ax.violinplot(agg_data, positions=positions, showmeans=True, showmedians=True, widths=step / 2)
327 patches['cmeans'].set_color("blue")
328 patches['cmedians'].set_color("green")
329 if pp.style.legend_for_eng:
330 legend_location = "center left"
331 legend_bbox_to_anchor = (1.03, 0.81)
332 pp.ax.legend([patches['cmeans'], patches['cmedians']], ["mean", "median"],
333 loc=legend_location, bbox_to_anchor=legend_bbox_to_anchor)
334 else:
335 pp.ax.boxplot(agg_data, 0, '', positions=positions, labels=labels, widths=step / 4)
336
337 pp.ax.set_xlim(min(times), max(times))
338 pp.ax.set_xlabel("Time, seconds from test begin, sampled for ~{} seconds".format(int(step)))
339 pp.fig.subplots_adjust(right=pp.style.subplot_adjust_r)
340
341
342@provide_plot(eng=True, no_legend=True, noaxis=True, long_plot=True)
343def plot_histo_heatmap(pp: PlotParams, ts: TimeSeries, ylabel: str, xlabel: str = "time, s") -> None:
344
345 # only histogram-based ts can be plotted
346 assert len(ts.data.shape) == 2
347
348 # Find global outliers. As load is expected to be stable during one job
349 # outliers range can be detected globally
350 total_hist = ts.data.sum(axis=0)
351 idx1, idx2 = hist_outliers_perc(total_hist,
352 bounds_perc=pp.style.outliers_lat,
353 min_bins_left=pp.style.hm_hist_bins_count)
354
355 # merge outliers with most close non-outliers cell
356 orig_data = ts.data[:, idx1:idx2].copy()
357 if idx1 > 0:
358 orig_data[:, 0] += ts.data[:, :idx1].sum(axis=1)
359
360 if idx2 < ts.data.shape[1]:
361 orig_data[:, -1] += ts.data[:, idx2:].sum(axis=1)
362
363 bins_vals = ts.histo_bins[idx1:idx2]
364
365 # rebin over X axis
366 # aggregate some lines in ts.data to plot ~style.hm_x_slots x bins
367 agg_idx = float(len(orig_data)) / pp.style.hm_x_slots
368 if agg_idx >= 2:
369 idxs = list(map(int, numpy.round(numpy.arange(0, len(orig_data) + 1, agg_idx))))
370 assert len(idxs) > 1
371 data = numpy.empty([len(idxs) - 1, orig_data.shape[1]], dtype=numpy.float32) # type: List[numpy.ndarray]
372 for idx, (sidx, eidx) in enumerate(zip(idxs[:-1], idxs[1:])):
373 data[idx] = orig_data[sidx:eidx,:].sum(axis=0) / (eidx - sidx)
374 else:
375 data = orig_data
376
377 # rebin over Y axis
378 # =================
379
380 # don't using rebin_histogram here, as we need apply same bins for many arrays
381 step = (bins_vals[-1] - bins_vals[0]) / pp.style.hm_hist_bins_count
382 new_bins_edges = numpy.arange(pp.style.hm_hist_bins_count) * step + bins_vals[0]
383 bin_mapping = numpy.clip(numpy.searchsorted(new_bins_edges, bins_vals) - 1, 0, len(new_bins_edges) - 1)
384
385 # map origin bins ranges to heatmap bins, iterate over rows
386 cmap = []
387 for line in data:
388 curr_bins = [0] * pp.style.hm_hist_bins_count
389 for idx, count in zip(bin_mapping, line):
390 curr_bins[idx] += count
391 cmap.append(curr_bins)
392 ncmap = numpy.array(cmap)
393
394 histo = ncmap.sum(axis=0).reshape((-1,))
395 ax, _ = do_plot_hmap_with_histo(pp.fig, ncmap, histo, new_bins_edges,
396 cmap=pp.colors.hmap_cmap,
397 cbar=pp.style.heatmap_colorbar, avg_labels=True)
398 ax.set(ylabel=ylabel, xlabel=xlabel)
399
400
401@provide_plot(eng=False, no_legend=True, grid='y', style_name='ioqd', noadjust=True)
402def io_chart(pp: PlotParams,
403 legend: str,
404 iosums: List[IOSummary],
405 iops_log_spine: bool = False,
406 lat_log_spine: bool = False) -> None:
407
408 # -------------- MAGIC VALUES ---------------------
409 # IOPS bar width
410 width = 0.2
411
412 # offset from center of bar to deviation/confidence range indicator
413 err_x_offset = 0.03
414
415 # extra space on top and bottom, comparing to maximal tight layout
416 extra_y_space = 0.05
417
418 # additional spine for BW/IOPS on left side of plot
419 extra_io_spine_x_offset = -0.1
420
421 # extra space on left and right sides
422 extra_x_space = 0.5
423
424 # legend location settings
425 legend_location = "center left"
426 legend_bbox_to_anchor = (1.1, 0.81)
427
428 # -------------- END OF MAGIC VALUES ---------------------
429
430 block_size = iosums[0].block_size
431 xpos = numpy.arange(1, len(iosums) + 1, dtype='uint')
432
433 coef_mb = float(unit_conversion_coef(iosums[0].bw.units, "MiBps"))
434 coef_iops = float(unit_conversion_coef(iosums[0].bw.units, "KiBps")) / block_size
435
436 iops_primary = block_size < pp.style.large_blocks
437
438 coef = coef_iops if iops_primary else coef_mb
439 pp.ax.set_ylabel("IOPS" if iops_primary else "BW (MiBps)")
440
441 vals = [iosum.bw.average * coef for iosum in iosums]
442
443 # set correct x limits for primary IO spine
444 min_io = min(iosum.bw.average - iosum.bw.deviation * pp.style.dev_range_x for iosum in iosums)
445 max_io = max(iosum.bw.average + iosum.bw.deviation * pp.style.dev_range_x for iosum in iosums)
446 border = (max_io - min_io) * extra_y_space
447 io_lims = (min_io - border, max_io + border)
448
449 pp.ax.set_ylim(io_lims[0] * coef, io_lims[-1] * coef)
450 pp.ax.bar(xpos - width / 2, vals, width=width, color=pp.colors.box_color, label=legend)
451
452 # plot deviation and confidence error ranges
453 err1_legend = err2_legend = None
454 for pos, iosum in zip(xpos, iosums):
455 dev_bar_pos = pos - err_x_offset
456 err1_legend = pp.ax.errorbar(dev_bar_pos,
457 iosum.bw.average * coef,
458 iosum.bw.deviation * pp.style.dev_range_x * coef,
459 alpha=pp.colors.subinfo_alpha,
460 color=pp.colors.suppl_color1) # 'magenta'
461
462 conf_bar_pos = pos + err_x_offset
463 err2_legend = pp.ax.errorbar(conf_bar_pos,
464 iosum.bw.average * coef,
465 iosum.bw.confidence * coef,
466 alpha=pp.colors.subinfo_alpha,
467 color=pp.colors.suppl_color2) # 'teal'
468
469 handles1, labels1 = pp.ax.get_legend_handles_labels()
470
471 handles1 += [err1_legend, err2_legend]
472 labels1 += ["{}% dev".format(pp.style.dev_perc),
473 "{}% conf".format(int(100 * iosums[0].bw.confidence_level))]
474
475 # extra y spine for latency on right side
476 ax2 = pp.ax.twinx()
477
478 # plot median and 95 perc latency
479 lat_coef_ms = float(unit_conversion_coef(iosums[0].lat.units, "ms"))
480 ax2.plot(xpos, [iosum.lat.perc_50 * lat_coef_ms for iosum in iosums], label="lat med")
481 ax2.plot(xpos, [iosum.lat.perc_95 * lat_coef_ms for iosum in iosums], label="lat 95%")
482
483 for grid_line in ax2.get_ygridlines():
484 grid_line.set_linestyle(":")
485
486 # extra y spine for BW/IOPS on left side
487 if pp.style.extra_io_spine:
488 ax3 = pp.ax.twinx()
489 if iops_log_spine:
490 ax3.set_yscale('log')
491
492 ax3.set_ylabel("BW (MiBps)" if iops_primary else "IOPS")
493 secondary_coef = coef_mb if iops_primary else coef_iops
494 ax3.set_ylim(io_lims[0] * secondary_coef, io_lims[1] * secondary_coef)
495 ax3.spines["left"].set_position(("axes", extra_io_spine_x_offset))
496 ax3.spines["left"].set_visible(True)
497 ax3.yaxis.set_label_position('left')
498 ax3.yaxis.set_ticks_position('left')
499 else:
500 ax3 = None
501
502 ax2.set_ylabel("Latency (ms)")
503
504 # legend box
505 handles2, labels2 = ax2.get_legend_handles_labels()
506 pp.ax.legend(handles1 + handles2, labels1 + labels2, loc=legend_location, bbox_to_anchor=legend_bbox_to_anchor)
507
508 # limit and label x spine
509 pp.ax.set_xlim(extra_x_space, len(iosums) + extra_x_space)
510 pp.ax.set_xticks(xpos)
511 pp.ax.set_xticklabels(["{0}*{1}={2}".format(iosum.qd, iosum.nodes_count, iosum.qd * iosum.nodes_count)
512 for iosum in iosums],
513 rotation=30 if len(iosums) > 9 else 0)
514 pp.ax.set_xlabel("IO queue depth * test node count = total parallel requests")
515
516 # apply log scales for X spines, if set
517 if iops_log_spine:
518 pp.ax.set_yscale('log')
519
520 if lat_log_spine:
521 ax2.set_yscale('log')
522
523 # override some styles
524 pp.fig.set_size_inches(*pp.style.qd_chart_inches)
525 pp.fig.subplots_adjust(right=StyleProfile.subplot_adjust_r)
526
527 if pp.style.extra_io_spine:
528 ax3.grid(False)
529