Move common storage, plot and statistic code to cephlib
diff --git a/wally/plot.py b/wally/plot.py
index 6729584..857a594 100644
--- a/wally/plot.py
+++ b/wally/plot.py
@@ -1,15 +1,7 @@
import logging
-from io import BytesIO
-from functools import wraps
-from typing import Tuple, cast, List, Callable, Optional, Any
+from typing import List
import numpy
-import scipy.stats
-import matplotlib.axis
-import matplotlib.style
-from matplotlib.ticker import FuncFormatter
-from matplotlib.figure import Figure
-import matplotlib.pyplot as plt
# to make seaborn styles available
import warnings
@@ -17,387 +9,16 @@
warnings.simplefilter("ignore")
import seaborn
-from cephlib.plot import process_heatmap_data, hmap_from_2d, do_plot_hmap_with_histo
+from cephlib.units import unit_conversion_coef_f
+from cephlib.plot import PlotParams, provide_plot
-from .hlstorage import ResultStorage
-from .utils import unit_conversion_coef
-from .statistic import moving_average, moving_dev, hist_outliers_perc, find_ouliers_ts, approximate_curve
-from .result_classes import StatProps, DataSource, TimeSeries, NormStatProps
-from .report_profiles import StyleProfile, ColorProfile
+from .report_profiles import StyleProfile
from .resources import IOSummary
logger = logging.getLogger("wally")
-# -------------- PLOT HELPERS FUNCTIONS ------------------------------------------------------------------------------
-
-def get_emb_image(fig: Figure, file_format: str, **opts) -> bytes:
- bio = BytesIO()
- if file_format == 'svg':
- fig.savefig(bio, format='svg', **opts)
- img_start = "<!-- Created with matplotlib (http://matplotlib.org/) -->"
- return bio.getvalue().decode("utf8").split(img_start, 1)[1].encode("utf8")
- else:
- fig.savefig(bio, format=file_format, **opts)
- return bio.getvalue()
-
-
-class PlotParams:
- def __init__(self, fig: Figure, ax: Any, title: str,
- style: StyleProfile, colors: ColorProfile) -> None:
- self.fig = fig
- self.ax = ax
- self.style = style
- self.colors = colors
- self.title = title
-
-
-def provide_plot(noaxis: bool = False,
- eng: bool = False,
- no_legend: bool = False,
- long_plot: bool = True,
- grid: Any = None,
- style_name: str = 'default',
- noadjust: bool = False) -> Callable[..., Callable[..., str]]:
- def closure1(func: Callable[..., None]) -> Callable[..., str]:
- @wraps(func)
- def closure2(storage: ResultStorage,
- style: StyleProfile,
- colors: ColorProfile,
- path: DataSource,
- title: Optional[str],
- *args, **kwargs) -> str:
- fpath = storage.check_plot_file(path)
- if not fpath:
-
- assert style_name in ('default', 'ioqd')
- mlstyle = style.default_style if style_name == 'default' else style.io_chart_style
- with matplotlib.style.context(mlstyle):
- file_format = path.tag.split(".")[-1]
- fig = plt.figure(figsize=style.figsize_long if long_plot else style.figsize)
-
- if not noaxis:
- xlabel = kwargs.pop('xlabel', None)
- ylabel = kwargs.pop('ylabel', None)
- ax = fig.add_subplot(111)
-
- if xlabel is not None:
- ax.set_xlabel(xlabel)
-
- if ylabel is not None:
- ax.set_ylabel(ylabel)
-
- if grid:
- ax.grid(axis=grid)
- else:
- ax = None
-
- if title:
- fig.suptitle(title, fontsize=style.title_font_size)
-
- pp = PlotParams(fig, ax, title, style, colors)
- func(pp, *args, **kwargs)
- apply_style(pp, eng=eng, no_legend=no_legend, noadjust=noadjust)
-
- fpath = storage.put_plot_file(get_emb_image(fig, file_format=file_format, dpi=style.dpi), path)
- logger.debug("Plot %s saved to %r", path, fpath)
- plt.close(fig)
- return fpath
- return closure2
- return closure1
-
-
-def apply_style(pp: PlotParams, eng: bool = True, no_legend: bool = False, noadjust: bool = False) -> None:
-
- if (pp.style.legend_for_eng or not eng) and not no_legend:
- if not noadjust:
- pp.fig.subplots_adjust(right=StyleProfile.subplot_adjust_r)
- legend_location = "center left"
- legend_bbox_to_anchor = (1.03, 0.81)
-
- for ax in pp.fig.axes:
- ax.legend(loc=legend_location, bbox_to_anchor=legend_bbox_to_anchor)
- elif not noadjust:
- pp.fig.subplots_adjust(right=StyleProfile.subplot_adjust_r_no_legend)
-
- if pp.style.tide_layout:
- pp.fig.set_tight_layout(True)
-
-
-# -------------- PLOT FUNCTIONS --------------------------------------------------------------------------------------
-
-
-@provide_plot(eng=True)
-def plot_hist(pp: PlotParams, units: str, prop: StatProps) -> None:
-
- normed_bins = prop.bins_populations / prop.bins_populations.sum()
- bar_width = prop.bins_edges[1] - prop.bins_edges[0]
- pp.ax.bar(prop.bins_edges, normed_bins, color=pp.colors.box_color, width=bar_width, label="Real data")
-
- pp.ax.set(xlabel=units, ylabel="Value probability")
-
- if isinstance(prop, NormStatProps):
- nprop = cast(NormStatProps, prop)
- stats = scipy.stats.norm(nprop.average, nprop.deviation)
-
- new_edges, step = numpy.linspace(prop.bins_edges[0], prop.bins_edges[-1],
- len(prop.bins_edges) * 10, retstep=True)
-
- ypoints = stats.cdf(new_edges) * 11
- ypoints = [nextpt - prevpt for (nextpt, prevpt) in zip(ypoints[1:], ypoints[:-1])]
- xpoints = (new_edges[1:] + new_edges[:-1]) / 2
-
- pp.ax.plot(xpoints, ypoints, color=pp.colors.primary_color, label="Expected from\nnormal\ndistribution")
-
- pp.ax.set_xlim(left=prop.bins_edges[0])
- if prop.log_bins:
- pp.ax.set_xscale('log')
-
-
-@provide_plot(grid='y')
-def plot_simple_over_time(pp: PlotParams, tss: List[Tuple[str, numpy.ndarray]], average: bool = False) -> None:
- max_len = 0
- for name, arr in tss:
- if average:
- avg_vals = moving_average(arr, pp.style.avg_range)
- if pp.style.approx_average_no_points:
- time_points = numpy.arange(len(avg_vals))
- avg_vals = approximate_curve(cast(List[int], time_points),
- avg_vals,
- cast(List[int], time_points),
- pp.style.curve_approx_level)
- arr = avg_vals
- pp.ax.plot(arr, label=name)
- max_len = max(max_len, len(arr))
- pp.ax.set_xlim(-5, max_len + 5)
-
-
-@provide_plot(no_legend=True, grid='x', noadjust=True)
-def plot_simple_bars(pp: PlotParams,
- names: List[str],
- values: List[float],
- errs: List[float] = None,
- x_formatter: Callable[[float, float], str] = None,
- one_point_zero_line: bool = True) -> None:
-
- ind = numpy.arange(len(names))
- width = 0.35
- pp.ax.barh(ind, values, width, xerr=errs)
-
- pp.ax.set_yticks(ind)
- pp.ax.set_yticklabels(names)
- pp.ax.set_xlim(0, max(val + err for val, err in zip(values, errs)) * 1.1)
-
- if one_point_zero_line:
- pp.ax.axvline(x=1.0, color='r', linestyle='--', linewidth=1, alpha=0.5)
-
- if x_formatter:
- pp.ax.xaxis.set_major_formatter(FuncFormatter(x_formatter))
-
- pp.fig.subplots_adjust(left=0.2)
-
-
-@provide_plot(no_legend=True, long_plot=True, noaxis=True)
-def plot_hmap_from_2d(pp: PlotParams, data2d: numpy.ndarray, xlabel: str, ylabel: str,
- bins: numpy.ndarray = None) -> None:
- ioq1d, ranges = hmap_from_2d(data2d)
- heatmap, bins = process_heatmap_data(ioq1d, bin_ranges=ranges, bins=bins)
- bins_populations, _ = numpy.histogram(ioq1d, bins)
-
- ax, _ = do_plot_hmap_with_histo(pp.fig,
- heatmap,
- bins_populations,
- bins,
- cmap=pp.colors.hmap_cmap,
- cbar=pp.style.heatmap_colorbar,
- histo_grid=pp.style.histo_grid)
- ax.set(ylabel=ylabel, xlabel=xlabel)
-
-
-@provide_plot(eng=True, grid='y')
-def plot_v_over_time(pp: PlotParams, units: str, ts: TimeSeries,
- plot_avg_dev: bool = True, plot_points: bool = True) -> None:
-
- min_time = min(ts.times)
-
- # convert time to ms
- coef = float(unit_conversion_coef(ts.time_units, 's'))
- time_points = numpy.array([(val_time - min_time) * coef for val_time in ts.times])
-
- outliers_idxs = find_ouliers_ts(ts.data, cut_range=pp.style.outliers_q_nd)
- outliers_4q_idxs = find_ouliers_ts(ts.data, cut_range=pp.style.outliers_hide_q_nd)
- normal_idxs = numpy.logical_not(outliers_idxs)
- outliers_idxs = outliers_idxs & numpy.logical_not(outliers_4q_idxs)
- # hidden_outliers_count = numpy.count_nonzero(outliers_4q_idxs)
-
- data = ts.data[normal_idxs]
- data_times = time_points[normal_idxs]
- outliers = ts.data[outliers_idxs]
- outliers_times = time_points[outliers_idxs]
-
- if plot_points:
- alpha = pp.colors.noise_alpha if plot_avg_dev else 1.0
- pp.ax.plot(data_times, data, pp.style.point_shape, color=pp.colors.primary_color, alpha=alpha, label="Data")
- pp.ax.plot(outliers_times, outliers, pp.style.err_point_shape, color=pp.colors.err_color, label="Outliers")
-
- has_negative_dev = False
- plus_minus = "\xb1"
-
- if plot_avg_dev and len(data) < pp.style.avg_range * 2:
- logger.warning("Array %r to small to plot average over %s points", pp.title, pp.style.avg_range)
- elif plot_avg_dev:
- avg_vals = moving_average(data, pp.style.avg_range)
- dev_vals = moving_dev(data, pp.style.avg_range)
- avg_times = moving_average(data_times, pp.style.avg_range)
-
- if (plot_points and pp.style.approx_average) or (not plot_points and pp.style.approx_average_no_points):
- avg_vals = approximate_curve(avg_times, avg_vals, avg_times, pp.style.curve_approx_level)
- dev_vals = approximate_curve(avg_times, dev_vals, avg_times, pp.style.curve_approx_level)
-
- pp.ax.plot(avg_times, avg_vals, c=pp.colors.suppl_color1, label="Average")
-
- low_vals_dev = avg_vals - dev_vals * pp.style.dev_range_x
- hight_vals_dev = avg_vals + dev_vals * pp.style.dev_range_x
- if (pp.style.dev_range_x - int(pp.style.dev_range_x)) < 0.01:
- pp.ax.plot(avg_times, low_vals_dev, c=pp.colors.suppl_color2,
- label="{}{}*stdev".format(plus_minus, int(pp.style.dev_range_x)))
- else:
- pp.ax.plot(avg_times, low_vals_dev, c=pp.colors.suppl_color2,
- label="{}{}*stdev".format(plus_minus, pp.style.dev_range_x))
- pp.ax.plot(avg_times, hight_vals_dev, c=pp.colors.suppl_color2)
- has_negative_dev = low_vals_dev.min() < 0
-
- pp.ax.set_xlim(-5, max(time_points) + 5)
- pp.ax.set_xlabel("Time, seconds from test begin")
-
- if plot_avg_dev:
- pp.ax.set_ylabel("{}. Average and {}stddev over {} points".format(units, plus_minus, pp.style.avg_range))
- else:
- pp.ax.set_ylabel(units)
-
- if has_negative_dev:
- pp.ax.set_ylim(bottom=0)
-
-
-@provide_plot(eng=True, no_legend=True, grid='y', noadjust=True)
-def plot_lat_over_time(pp: PlotParams, ts: TimeSeries) -> None:
- times = ts.times - min(ts.times)
- step = len(times) / pp.style.lat_samples
- points = [times[int(i * step + 0.5)] for i in range(pp.style.lat_samples)]
- points.append(times[-1])
- bounds = list(zip(points[:-1], points[1:]))
- agg_data = []
- positions = []
- labels = []
-
- for begin, end in bounds:
- agg_hist = ts.data[begin:end].sum(axis=0)
-
- if pp.style.violin_instead_of_box:
- # cut outliers
- idx1, idx2 = hist_outliers_perc(agg_hist, pp.style.outliers_lat)
- agg_hist = agg_hist[idx1:idx2]
- curr_bins_vals = ts.histo_bins[idx1:idx2]
-
- correct_coef = pp.style.violin_point_count / sum(agg_hist)
- if correct_coef > 1:
- correct_coef = 1
- else:
- curr_bins_vals = ts.histo_bins
- correct_coef = 1
-
- vals = numpy.empty(shape=[numpy.sum(agg_hist)], dtype='float32')
- cidx = 0
-
- non_zero, = agg_hist.nonzero()
- for pos in non_zero:
- count = int(agg_hist[pos] * correct_coef + 0.5)
-
- if count != 0:
- vals[cidx: cidx + count] = curr_bins_vals[pos]
- cidx += count
-
- agg_data.append(vals[:cidx])
- positions.append((end + begin) / 2)
- labels.append(str((end + begin) // 2))
-
- if pp.style.violin_instead_of_box:
- patches = pp.ax.violinplot(agg_data, positions=positions, showmeans=True, showmedians=True, widths=step / 2)
- patches['cmeans'].set_color("blue")
- patches['cmedians'].set_color("green")
- if pp.style.legend_for_eng:
- legend_location = "center left"
- legend_bbox_to_anchor = (1.03, 0.81)
- pp.ax.legend([patches['cmeans'], patches['cmedians']], ["mean", "median"],
- loc=legend_location, bbox_to_anchor=legend_bbox_to_anchor)
- else:
- pp.ax.boxplot(agg_data, 0, '', positions=positions, labels=labels, widths=step / 4)
-
- pp.ax.set_xlim(min(times), max(times))
- pp.ax.set_xlabel("Time, seconds from test begin, sampled for ~{} seconds".format(int(step)))
- pp.fig.subplots_adjust(right=pp.style.subplot_adjust_r)
-
-
-@provide_plot(eng=True, no_legend=True, noaxis=True, long_plot=True)
-def plot_histo_heatmap(pp: PlotParams, ts: TimeSeries, ylabel: str, xlabel: str = "time, s") -> None:
-
- # only histogram-based ts can be plotted
- assert len(ts.data.shape) == 2
-
- # Find global outliers. As load is expected to be stable during one job
- # outliers range can be detected globally
- total_hist = ts.data.sum(axis=0)
- idx1, idx2 = hist_outliers_perc(total_hist,
- bounds_perc=pp.style.outliers_lat,
- min_bins_left=pp.style.hm_hist_bins_count)
-
- # merge outliers with most close non-outliers cell
- orig_data = ts.data[:, idx1:idx2].copy()
- if idx1 > 0:
- orig_data[:, 0] += ts.data[:, :idx1].sum(axis=1)
-
- if idx2 < ts.data.shape[1]:
- orig_data[:, -1] += ts.data[:, idx2:].sum(axis=1)
-
- bins_vals = ts.histo_bins[idx1:idx2]
-
- # rebin over X axis
- # aggregate some lines in ts.data to plot ~style.hm_x_slots x bins
- agg_idx = float(len(orig_data)) / pp.style.hm_x_slots
- if agg_idx >= 2:
- idxs = list(map(int, numpy.round(numpy.arange(0, len(orig_data) + 1, agg_idx))))
- assert len(idxs) > 1
- data = numpy.empty([len(idxs) - 1, orig_data.shape[1]], dtype=numpy.float32) # type: List[numpy.ndarray]
- for idx, (sidx, eidx) in enumerate(zip(idxs[:-1], idxs[1:])):
- data[idx] = orig_data[sidx:eidx,:].sum(axis=0) / (eidx - sidx)
- else:
- data = orig_data
-
- # rebin over Y axis
- # =================
-
- # don't using rebin_histogram here, as we need apply same bins for many arrays
- step = (bins_vals[-1] - bins_vals[0]) / pp.style.hm_hist_bins_count
- new_bins_edges = numpy.arange(pp.style.hm_hist_bins_count) * step + bins_vals[0]
- bin_mapping = numpy.clip(numpy.searchsorted(new_bins_edges, bins_vals) - 1, 0, len(new_bins_edges) - 1)
-
- # map origin bins ranges to heatmap bins, iterate over rows
- cmap = []
- for line in data:
- curr_bins = [0] * pp.style.hm_hist_bins_count
- for idx, count in zip(bin_mapping, line):
- curr_bins[idx] += count
- cmap.append(curr_bins)
- ncmap = numpy.array(cmap)
-
- histo = ncmap.sum(axis=0).reshape((-1,))
- ax, _ = do_plot_hmap_with_histo(pp.fig, ncmap, histo, new_bins_edges,
- cmap=pp.colors.hmap_cmap,
- cbar=pp.style.heatmap_colorbar, avg_labels=True)
- ax.set(ylabel=ylabel, xlabel=xlabel)
-
-
@provide_plot(eng=False, no_legend=True, grid='y', style_name='ioqd', noadjust=True)
def io_chart(pp: PlotParams,
legend: str,
@@ -430,8 +51,8 @@
block_size = iosums[0].block_size
xpos = numpy.arange(1, len(iosums) + 1, dtype='uint')
- coef_mb = float(unit_conversion_coef(iosums[0].bw.units, "MiBps"))
- coef_iops = float(unit_conversion_coef(iosums[0].bw.units, "KiBps")) / block_size
+ coef_mb = unit_conversion_coef_f(iosums[0].bw.units, "MiBps")
+ coef_iops = unit_conversion_coef_f(iosums[0].bw.units, "KiBps") / block_size
iops_primary = block_size < pp.style.large_blocks
@@ -476,7 +97,7 @@
ax2 = pp.ax.twinx()
# plot median and 95 perc latency
- lat_coef_ms = float(unit_conversion_coef(iosums[0].lat.units, "ms"))
+ lat_coef_ms = unit_conversion_coef_f(iosums[0].lat.units, "ms")
ax2.plot(xpos, [iosum.lat.perc_50 * lat_coef_ms for iosum in iosums], label="lat med")
ax2.plot(xpos, [iosum.lat.perc_95 * lat_coef_ms for iosum in iosums], label="lat 95%")