Blame - wally/report.py - mcp/cvp-wally

2017-01-19 20:17:16 +0200

[diff] [blame]

1

import os

koder aka kdanilov

2016-12-26 01:34:23 +0200

[diff] [blame]

2

import abc

koder aka kdanilov

a047e1b

2015-04-21 23:16:59 +0300

[diff] [blame]

3

import logging

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

4

from io import BytesIO

5

from functools import wraps

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

6

from typing import Dict, Any, Iterator, Tuple, cast, List, Callable, Set, Optional

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

7

from collections import defaultdict

koder aka kdanilov

cff7b2e

2015-04-18 20:48:15 +0300

[diff] [blame]

8

koder aka kdanilov

ffaf48d

2016-12-27 02:25:29 +0200

[diff] [blame]

9

import numpy

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

10

import scipy.stats

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

11

import matplotlib.pyplot as plt

koder aka kdanilov

be8f89f

2015-04-28 14:51:51 +0300

[diff] [blame]

12

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

13

import wally

koder aka kdanilov

ffaf48d

2016-12-27 02:25:29 +0200

[diff] [blame]

14

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

15

from . import html

koder aka kdanilov

2016-12-17 15:15:26 +0200

[diff] [blame]

16

from .stage import Stage, StepOrder

17

from .test_run_class import TestRun

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

18

from .hlstorage import ResultStorage

19

from .node_interfaces import NodeInfo

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

20

from .utils import b2ssize, b2ssize_10, STORAGE_ROLES

21

from .statistic import (calc_norm_stat_props, calc_histo_stat_props, moving_average, moving_dev,

22

hist_outliers_perc, ts_hist_outliers_perc, find_ouliers_ts, approximate_curve,

23

rebin_histogram)

24

from .result_classes import (StatProps, DataSource, TimeSeries, NormStatProps, HistoStatProps, SuiteConfig,

25

IResultStorage)

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

26

from .suits.io.fio_hist import get_lat_vals, expected_lat_bins

27

from .suits.io.fio import FioTest, FioJobConfig

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

28

from .suits.io.fio_job import FioJobParams

29

from .suits.job import JobConfig

koder aka kdanilov

cff7b2e

2015-04-18 20:48:15 +0300

[diff] [blame]

30

koder aka kdanilov

4a510ee

2015-04-21 18:50:42 +0300

[diff] [blame]

31

koder aka kdanilov

962ee5f

2016-12-19 02:40:08 +0200

[diff] [blame]

32

logger = logging.getLogger("wally")

koder aka kdanilov

a047e1b

2015-04-21 23:16:59 +0300

[diff] [blame]

33

34

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

35

# ---------------- CONSTS ---------------------------------------------------------------------------------------------

koder aka kdanilov

2016-12-17 15:15:26 +0200

[diff] [blame]

36

koder aka kdanilov

2016-12-26 01:34:23 +0200

[diff] [blame]

37

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

DEBUG = False

LARGE_BLOCKS = 256

MiB2KiB = 1024

MS2S = 1000

koder aka kdanilov

2016-12-17 15:15:26 +0200

[diff] [blame]

42

koder aka kdanilov

2016-12-17 15:15:26 +0200

[diff] [blame]

43

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

44

# ---------------- PROFILES ------------------------------------------------------------------------------------------

45

46

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

47

# this is default values, real values is loaded from config

48

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

49

class ColorProfile:

50

primary_color = 'b'

51

suppl_color1 = 'teal'

52

suppl_color2 = 'magenta'

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

53

suppl_color3 = 'orange'

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

54

box_color = 'y'

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

55

err_color = 'red'

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

noise_alpha = 0.3

subinfo_alpha = 0.7

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

60

imshow_colormap = None # type: str

61

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

class StyleProfile:

grid = True

tide_layout = True

hist_boxes = 10

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

67

hist_lat_boxes = 25

68

hm_hist_bins_count = 25

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

69

min_points_for_dev = 5

dev_range_x = 2.0

dev_perc = 95

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

74

point_shape = 'o'

75

err_point_shape = '*'

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

76

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

77

avg_range = 20

78

approx_average = True

79

80

curve_approx_level = 6

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

81

curve_approx_points = 100

82

assert avg_range >= min_points_for_dev

83

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

84

# figure size in inches

85

figsize = (10, 6)

86

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

87

extra_io_spine = True

88

89

legend_for_eng = True

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

90

heatmap_interpolation = '1d'

91

heatmap_interpolation_points = 300

92

outliers_q_nd = 3.0

93

outliers_hide_q_nd = 4.0

94

outliers_lat = (0.01, 0.995)

95

96

violin_instead_of_box = True

97

violin_point_count = 30000

98

99

heatmap_colorbar = False

100

101

min_iops_vs_qd_jobs = 3

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

102

103

units = {

104

'bw': ("MiBps", MiB2KiB, "bandwith"),

105

'iops': ("IOPS", 1, "iops"),

106

'lat': ("ms", 1, "latency")

}

# ---------------- STRUCTS -------------------------------------------------------------------------------------------

koder aka kdanilov

2016-12-17 15:15:26 +0200

[diff] [blame]

111

koder aka kdanilov

2016-12-26 01:34:23 +0200

[diff] [blame]

112

113

# TODO: need to be revised, have to user StatProps fields instead

114

class StoragePerfSummary:

115

def __init__(self, name: str) -> None:

116

self.direct_iops_r_max = 0 # type: int

117

self.direct_iops_w_max = 0 # type: int

118

119

# 64 used instead of 4k to faster feed caches

120

self.direct_iops_w64_max = 0 # type: int

121

122

self.rws4k_10ms = 0 # type: int

123

self.rws4k_30ms = 0 # type: int

124

self.rws4k_100ms = 0 # type: int

125

self.bw_write_max = 0 # type: int

126

self.bw_read_max = 0 # type: int

127

128

self.bw = None # type: float

129

self.iops = None # type: float

130

self.lat = None # type: float

131

self.lat_50 = None # type: float

132

self.lat_95 = None # type: float

133

134

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

class IOSummary:

def __init__(self,

qd: int,

block_size: int,

nodes_count:int,

bw: NormStatProps,

lat: HistoStatProps) -> None:

142

143

self.qd = qd

144

self.nodes_count = nodes_count

145

self.block_size = block_size

self.bw = bw

self.lat = lat

# -------------- AGGREGATION AND STAT FUNCTIONS ----------------------------------------------------------------------

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

152

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

153

def make_iosum(rstorage: ResultStorage, suite: SuiteConfig, job: FioJobConfig) -> IOSummary:

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

154

lat = get_aggregated(rstorage, suite, job, "lat")

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

155

bins_edges = numpy.array(get_lat_vals(lat.data.shape[1]), dtype='float32') / 1000

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

156

io = get_aggregated(rstorage, suite, job, "bw")

157

158

return IOSummary(job.qd,

159

nodes_count=len(suite.nodes_ids),

160

block_size=job.bsize,

161

lat=calc_histo_stat_props(lat, bins_edges, StyleProfile.hist_boxes),

162

bw=calc_norm_stat_props(io, StyleProfile.hist_boxes))

163

164

#

165

# def iter_io_results(rstorage: ResultStorage,

166

# qds: List[int] = None,

167

# op_types: List[str] = None,

168

# sync_types: List[str] = None,

169

# block_sizes: List[int] = None) -> Iterator[Tuple[TestSuiteConfig, FioJobConfig]]:

170

#

171

# for suite in rstorage.iter_suite(FioTest.name):

172

# for job in rstorage.iter_job(suite):

173

# fjob = cast(FioJobConfig, job)

174

# assert int(fjob.vals['numjobs']) == 1

175

#

176

# if sync_types is not None and fjob.sync_mode in sync_types:

177

# continue

178

#

179

# if block_sizes is not None and fjob.bsize not in block_sizes:

180

# continue

181

#

182

# if op_types is not None and fjob.op_type not in op_types:

183

# continue

184

#

185

# if qds is not None and fjob.qd not in qds:

# continue

#

# yield suite, fjob

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

AGG_TAG = 'ALL'

def get_aggregated(rstorage: ResultStorage, suite: SuiteConfig, job: FioJobConfig, metric: str) -> TimeSeries:

195

tss = list(rstorage.iter_ts(suite, job, sensor=metric))

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

196

ds = DataSource(suite_id=suite.storage_id,

197

job_id=job.storage_id,

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

node_id=AGG_TAG,

sensor='fio',

dev=AGG_TAG,

metric=metric,

tag='csv')

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

203

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

204

agg_ts = TimeSeries(metric,

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

205

raw=None,

206

source=ds,

207

data=numpy.zeros(tss[0].data.shape, dtype=tss[0].data.dtype),

208

times=tss[0].times.copy(),

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

209

units=tss[0].units)

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

210

211

for ts in tss:

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

212

if metric == 'lat' and (len(ts.data.shape) != 2 or ts.data.shape[1] != expected_lat_bins):

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

213

logger.error("Sensor %s.%s on node %s has" +

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

214

"shape=%s. Can only process sensors with shape=[X, %s].",

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

215

ts.source.dev, ts.source.sensor, ts.source.node_id,

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

216

ts.data.shape, expected_lat_bins)

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

217

continue

218

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

219

if metric != 'lat' and len(ts.data.shape) != 1:

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

220

logger.error("Sensor %s.%s on node %s has" +

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

221

"shape=%s. Can only process 1D sensors.",

222

ts.source.dev, ts.source.sensor, ts.source.node_id, ts.data.shape)

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

223

continue

224

225

# TODO: match times on different ts

226

agg_ts.data += ts.data

return agg_ts

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

231

def is_sensor_numarray(sensor: str, metric: str) -> bool:

232

"""Returns True if sensor provides one-dimension array of numeric values. One number per one measurement."""

return True

LEVEL_SENSORS = {("block-io", "io_queue"),

237

("system-cpu", "procs_blocked"),

238

("system-cpu", "procs_queue")}

239

240

241

def is_level_sensor(sensor: str, metric: str) -> bool:

242

"""Returns True if sensor measure level of any kind, E.g. queue depth."""

243

return (sensor, metric) in LEVEL_SENSORS

244

245

246

def is_delta_sensor(sensor: str, metric: str) -> bool:

247

"""Returns True if sensor provides deltas for cumulative value. E.g. io completed in given period"""

248

return not is_level_sensor(sensor, metric)

249

250

251

def get_sensor_for_time_range(storage: IResultStorage,

node_id: str,

sensor: str,

dev: str,

metric: str,

time_range: Tuple[int, int]) -> numpy.array:

257

"""Return sensor values for given node for given period. Return per second estimated values array

258

259

Raise an error if required range is not full covered by data in storage.

260

First it finds range of results from sensor, which fully covers requested range.

261

...."""

262

263

ds = DataSource(node_id=node_id, sensor=sensor, dev=dev, metric=metric)

264

sensor_data = storage.load_sensor(ds)

265

assert sensor_data.time_units == 'us'

266

267

# collected_at is array of pairs (collection_started_at, collection_finished_at)

268

# extract start time from each pair

269

collection_start_at = sensor_data.times[::2] # type: numpy.array

MICRO = 1000000

# convert seconds to us

274

begin = time_range[0] * MICRO

275

end = time_range[1] * MICRO

276

277

if begin < collection_start_at[0] or end > collection_start_at[-1] or end <= begin:

278

raise AssertionError(("Incorrect data for get_sensor - time_range={!r}, collected_at=[{}, ..., {}]," +

279

"sensor = {}_{}.{}.{}").format(time_range,

280

sensor_data.times[0] // MICRO,

281

sensor_data.times[-1] // MICRO,

282

node_id, sensor, dev, metric))

283

284

pos1, pos2 = numpy.searchsorted(collection_start_at, (begin, end))

285

286

# current real data time chunk begin time

287

edge_it = iter(collection_start_at[pos1 - 1: pos2 + 1])

288

289

# current real data value

290

val_it = iter(sensor_data.data[pos1 - 1: pos2 + 1])

291

292

# result array, cumulative value per second

293

result = numpy.zeros((end - begin) // MICRO)

idx = 0

curr_summ = 0

# end of current time slot

298

results_cell_ends = begin + MICRO

299

300

# hack to unify looping

301

real_data_end = next(edge_it)

302

while results_cell_ends <= end:

303

real_data_start = real_data_end

304

real_data_end = next(edge_it)

305

real_val_left = next(val_it)

306

307

# real data "speed" for interval [real_data_start, real_data_end]

308

real_val_ps = float(real_val_left) / (real_data_end - real_data_start)

309

310

while real_data_end >= results_cell_ends and results_cell_ends <= end:

311

# part of current real value, which is fit into current result cell

312

curr_real_chunk = int((results_cell_ends - real_data_start) * real_val_ps)

313

314

# calculate rest of real data for next result cell

315

real_val_left -= curr_real_chunk

316

result[idx] = curr_summ + curr_real_chunk

idx += 1

curr_summ = 0

# adjust real data start time

321

real_data_start = results_cell_ends

322

results_cell_ends += MICRO

323

324

# don't lost any real data

325

curr_summ += real_val_left

return result

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

330

# -------------- PLOT HELPERS FUNCTIONS ------------------------------------------------------------------------------

331

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

332

def get_emb_data_svg(plt: Any, format: str = 'svg') -> bytes:

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

333

bio = BytesIO()

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

334

if format in ('png', 'jpg'):

335

plt.savefig(bio, format=format)

336

return bio.getvalue()

337

elif format == 'svg':

338

plt.savefig(bio, format='svg')

339

img_start = ""

340

return bio.getvalue().decode("utf8").split(img_start, 1)[1].encode("utf8")

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

341

342

343

def provide_plot(func: Callable[..., None]) -> Callable[..., str]:

344

@wraps(func)

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

345

def closure1(storage: ResultStorage,

346

path: DataSource,

347

*args, **kwargs) -> str:

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

348

fpath = storage.check_plot_file(path)

349

if not fpath:

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

350

format = path.tag.split(".")[-1]

351

352

plt.figure(figsize=StyleProfile.figsize)

353

plt.subplots_adjust(right=0.66)

354

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

355

func(*args, **kwargs)

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

356

fpath = storage.put_plot_file(get_emb_data_svg(plt, format=format), path)

357

logger.debug("Plot %s saved to %r", path, fpath)

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

358

plt.clf()

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

359

plt.close('all')

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

return fpath

return closure1

def apply_style(style: StyleProfile, eng: bool = True, no_legend: bool = False) -> None:

if style.grid:

plt.grid(True)

if (style.legend_for_eng or not eng) and not no_legend:

369

legend_location = "center left"

370

legend_bbox_to_anchor = (1.03, 0.81)

371

plt.legend(loc=legend_location, bbox_to_anchor=legend_bbox_to_anchor)

372

373

374

# -------------- PLOT FUNCTIONS --------------------------------------------------------------------------------------

@provide_plot

def plot_hist(title: str, units: str,

379

prop: StatProps,

380

colors: Any = ColorProfile,

381

style: Any = StyleProfile) -> None:

382

383

# TODO: unit should came from ts

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

384

normed_bins = prop.bins_populations / prop.bins_populations.sum()

385

bar_width = prop.bins_edges[1] - prop.bins_edges[0]

386

plt.bar(prop.bins_edges, normed_bins, color=colors.box_color, width=bar_width, label="Real data")

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

387

388

plt.xlabel(units)

389

plt.ylabel("Value probability")

plt.title(title)

dist_plotted = False

if isinstance(prop, NormStatProps):

394

nprop = cast(NormStatProps, prop)

395

stats = scipy.stats.norm(nprop.average, nprop.deviation)

396

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

397

new_edges, step = numpy.linspace(prop.bins_edges[0], prop.bins_edges[-1],

398

len(prop.bins_edges) * 10, retstep=True)

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

399

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

400

ypoints = stats.cdf(new_edges) * 11

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

401

ypoints = [next - prev for (next, prev) in zip(ypoints[1:], ypoints[:-1])]

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

402

xpoints = (new_edges[1:] + new_edges[:-1]) / 2

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

403

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

404

plt.plot(xpoints, ypoints, color=colors.primary_color, label="Expected from\nnormal\ndistribution")

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

405

dist_plotted = True

406

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

407

plt.gca().set_xlim(left=prop.bins_edges[0])

if prop.log_bins:

plt.xscale('log')

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

411

apply_style(style, eng=True, no_legend=not dist_plotted)

@provide_plot

def plot_v_over_time(title: str, units: str,

416

ts: TimeSeries,

417

plot_avg_dev: bool = True,

418

colors: Any = ColorProfile, style: Any = StyleProfile) -> None:

419

420

min_time = min(ts.times)

421

422

# /1000 is us to ms conversion

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

423

time_points = numpy.array([(val_time - min_time) / 1000 for val_time in ts.times])

424

425

outliers_idxs = find_ouliers_ts(ts.data, cut_range=style.outliers_q_nd)

426

outliers_4q_idxs = find_ouliers_ts(ts.data, cut_range=style.outliers_hide_q_nd)

427

normal_idxs = numpy.logical_not(outliers_idxs)

428

outliers_idxs = outliers_idxs & numpy.logical_not(outliers_4q_idxs)

429

hidden_outliers_count = numpy.count_nonzero(outliers_4q_idxs)

430

431

data = ts.data[normal_idxs]

432

data_times = time_points[normal_idxs]

433

outliers = ts.data[outliers_idxs]

434

outliers_times = time_points[outliers_idxs]

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

435

436

alpha = colors.noise_alpha if plot_avg_dev else 1.0

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

437

plt.plot(data_times, data, style.point_shape,

438

color=colors.primary_color, alpha=alpha, label="Data")

439

plt.plot(outliers_times, outliers, style.err_point_shape,

440

color=colors.err_color, label="Outliers")

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

441

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

442

has_negative_dev = False

443

plus_minus = "\xb1"

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

444

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

445

if plot_avg_dev and len(data) < style.avg_range * 2:

446

logger.warning("Array %r to small to plot average over %s points", title, style.avg_range)

447

elif plot_avg_dev:

448

avg_vals = moving_average(data, style.avg_range)

449

dev_vals = moving_dev(data, style.avg_range)

450

avg_times = moving_average(data_times, style.avg_range)

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

451

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

452

if style.approx_average:

453

avg_vals = approximate_curve(avg_times, avg_vals, avg_times, style.curve_approx_level)

454

dev_vals = approximate_curve(avg_times, dev_vals, avg_times, style.curve_approx_level)

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

455

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

456

plt.plot(avg_times, avg_vals, c=colors.suppl_color1, label="Average")

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

457

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

458

low_vals_dev = avg_vals - dev_vals * style.dev_range_x

459

hight_vals_dev = avg_vals + dev_vals * style.dev_range_x

460

if style.dev_range_x - int(style.dev_range_x) < 0.01:

461

plt.plot(avg_times, low_vals_dev, c=colors.suppl_color2,

462

label="{}{}*stdev".format(plus_minus, int(style.dev_range_x)))

463

else:

464

plt.plot(avg_times, low_vals_dev, c=colors.suppl_color2,

465

label="{}{}*stdev".format(plus_minus, style.dev_range_x))

466

plt.plot(avg_times, hight_vals_dev, c=colors.suppl_color2)

467

has_negative_dev = low_vals_dev.min() < 0

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

468

469

plt.xlim(-5, max(time_points) + 5)

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

470

plt.xlabel("Time, seconds from test begin")

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

471

plt.ylabel("{}. Average and {}stddev over {} points".format(units, plus_minus, style.avg_range))

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

472

plt.title(title)

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

473

474

if has_negative_dev:

475

plt.gca().set_ylim(bottom=0)

476

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

477

apply_style(style, eng=True)

@provide_plot

def plot_lat_over_time(title: str, ts: TimeSeries, bins_vals: List[int], samples: int = 5,

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

482

colors: Any = ColorProfile,

483

style: Any = StyleProfile) -> None:

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

484

485

min_time = min(ts.times)

486

times = [int(tm - min_time + 500) // 1000 for tm in ts.times]

487

ts_len = len(times)

488

step = ts_len / samples

489

points = [times[int(i * step + 0.5)] for i in range(samples)]

490

points.append(times[-1])

491

bounds = list(zip(points[:-1], points[1:]))

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

agg_data = []

positions = []

labels = []

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

496

for begin, end in bounds:

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

497

agg_hist = ts.data[begin:end].sum(axis=0)

498

499

if style.violin_instead_of_box:

500

# cut outliers

501

idx1, idx2 = hist_outliers_perc(agg_hist, style.outliers_lat)

502

agg_hist = agg_hist[idx1:idx2]

503

curr_bins_vals = bins_vals[idx1:idx2]

504

505

correct_coef = style.violin_point_count / sum(agg_hist)

if correct_coef > 1:

correct_coef = 1

else:

curr_bins_vals = bins_vals

510

correct_coef = 1

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

511

512

vals = numpy.empty(shape=(numpy.sum(agg_hist),), dtype='float32')

513

cidx = 0

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

514

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

515

non_zero, = agg_hist.nonzero()

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

516

for pos in non_zero:

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

517

count = int(agg_hist[pos] * correct_coef + 0.5)

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

518

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

519

if count != 0:

520

vals[cidx: cidx + count] = curr_bins_vals[pos]

521

cidx += count

522

523

agg_data.append(vals[:cidx])

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

524

positions.append((end + begin) / 2)

525

labels.append(str((end + begin) // 2))

526

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

527

if style.violin_instead_of_box:

528

patches = plt.violinplot(agg_data,

positions=positions,

showmeans=True,

showmedians=True,

widths=step / 2)

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

533

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

534

patches['cmeans'].set_color("blue")

535

patches['cmedians'].set_color("green")

536

if style.legend_for_eng:

537

legend_location = "center left"

538

legend_bbox_to_anchor = (1.03, 0.81)

539

plt.legend([patches['cmeans'], patches['cmedians']], ["mean", "median"],

540

loc=legend_location, bbox_to_anchor=legend_bbox_to_anchor)

541

else:

542

plt.boxplot(agg_data, 0, '', positions=positions, labels=labels, widths=step / 4)

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

543

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

544

plt.xlim(min(times), max(times))

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

545

plt.xlabel("Time, seconds from test begin, sampled for ~{} seconds".format(int(step)))

546

plt.ylabel("Latency, ms")

547

plt.title(title)

548

apply_style(style, eng=True, no_legend=True)

549

550

551

@provide_plot

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

552

def plot_heatmap(title: str,

553

ts: TimeSeries,

554

bins_vals: List[int],

555

colors: Any = ColorProfile,

556

style: Any = StyleProfile) -> None:

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

557

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

558

assert len(ts.data.shape) == 2

559

assert ts.data.shape[1] == len(bins_vals)

560

561

total_hist = ts.data.sum(axis=0)

562

563

# idx1, idx2 = hist_outliers_perc(total_hist, style.outliers_lat)

564

idx1, idx2 = ts_hist_outliers_perc(ts.data, bounds_perc=style.outliers_lat)

565

566

# don't cut too many bins

567

min_bins_left = style.hm_hist_bins_count

568

if idx2 - idx1 < min_bins_left:

569

missed = min_bins_left - (idx2 - idx1) // 2

570

idx2 = min(len(total_hist), idx2 + missed)

571

idx1 = max(0, idx1 - missed)

572

573

data = ts.data[:, idx1:idx2]

574

bins_vals = bins_vals[idx1:idx2]

575

576

# don't using rebin_histogram here, as we need apply same bins for many arrays

577

step = (bins_vals[-1] - bins_vals[0]) / style.hm_hist_bins_count

578

new_bins_edges = numpy.arange(style.hm_hist_bins_count) * step + bins_vals[0]

579

bin_mapping = numpy.clip(numpy.searchsorted(new_bins_edges, bins_vals) - 1, 0, len(new_bins_edges) - 1)

580

581

# map origin bins ranges to heatmap bins, iterate over rows

582

cmap = []

583

for line in data:

584

curr_bins = [0] * style.hm_hist_bins_count

585

for idx, count in zip(bin_mapping, line):

586

curr_bins[idx] += count

587

cmap.append(curr_bins)

588

ncmap = numpy.array(cmap)

589

590

xmin = 0

591

xmax = (ts.times[-1] - ts.times[0]) / 1000 + 1

592

ymin = new_bins_edges[0]

593

ymax = new_bins_edges[-1]

594

595

fig, ax = plt.subplots(figsize=style.figsize)

596

597

if style.heatmap_interpolation == '1d':

598

interpolation = 'none'

599

res = []

600

for column in ncmap:

601

new_x = numpy.linspace(0, len(column), style.heatmap_interpolation_points)

602

old_x = numpy.arange(len(column)) + 0.5

603

new_vals = numpy.interp(new_x, old_x, column)

604

res.append(new_vals)

605

ncmap = numpy.array(res)

606

else:

607

interpolation = style.heatmap_interpolation

608

609

ax.imshow(ncmap[:,::-1].T,

610

interpolation=interpolation,

611

extent=(xmin, xmax, ymin, ymax),

612

cmap=colors.imshow_colormap)

613

614

ax.set_aspect((xmax - xmin) / (ymax - ymin) * (6 / 9))

615

ax.set_ylabel("Latency, ms")

616

ax.set_xlabel("Test time, s")

plt.title(title)

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

620

621

@provide_plot

622

def io_chart(title: str,

623

legend: str,

624

iosums: List[IOSummary],

625

iops_log_spine: bool = False,

626

lat_log_spine: bool = False,

627

colors: Any = ColorProfile,

628

style: Any = StyleProfile) -> None:

629

630

# -------------- MAGIC VALUES ---------------------

# IOPS bar width

width = 0.35

# offset from center of bar to deviation/confidence range indicator

635

err_x_offset = 0.05

636

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

637

# extra space on top and bottom, comparing to maximal tight layout

638

extra_y_space = 0.05

639

640

# additional spine for BW/IOPS on left side of plot

641

extra_io_spine_x_offset = -0.1

642

643

# extra space on left and right sides

644

extra_x_space = 0.5

645

646

# legend location settings

647

legend_location = "center left"

648

legend_bbox_to_anchor = (1.1, 0.81)

649

650

# plot box size adjust (only plot, not spines and legend)

651

plot_box_adjust = {'right': 0.66}

652

# -------------- END OF MAGIC VALUES ---------------------

653

654

block_size = iosums[0].block_size

655

lc = len(iosums)

656

xt = list(range(1, lc + 1))

657

658

# x coordinate of middle of the bars

659

xpos = [i - width / 2 for i in xt]

660

661

# import matplotlib.gridspec as gridspec

662

# gs = gridspec.GridSpec(1, 3, width_ratios=[1, 4, 1])

663

# p1 = plt.subplot(gs[1])

664

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

665

fig, p1 = plt.subplots(figsize=StyleProfile.figsize)

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

666

667

# plot IOPS/BW bars

668

if block_size >= LARGE_BLOCKS:

669

iops_primary = False

670

coef = MiB2KiB

671

p1.set_ylabel("BW (MiBps)")

else:

iops_primary = True

coef = block_size

p1.set_ylabel("IOPS")

676

677

p1.bar(xpos, [iosum.bw.average / coef for iosum in iosums], width=width, color=colors.box_color, label=legend)

678

679

# set correct x limits for primary IO spine

680

min_io = min(iosum.bw.average - iosum.bw.deviation * style.dev_range_x for iosum in iosums)

681

max_io = max(iosum.bw.average + iosum.bw.deviation * style.dev_range_x for iosum in iosums)

682

border = (max_io - min_io) * extra_y_space

683

io_lims = (min_io - border, max_io + border)

684

685

p1.set_ylim(io_lims[0] / coef, io_lims[-1] / coef)

686

687

# plot deviation and confidence error ranges

688

err1_legend = err2_legend = None

689

for pos, iosum in zip(xpos, iosums):

690

err1_legend = p1.errorbar(pos + width / 2 - err_x_offset,

691

iosum.bw.average / coef,

692

iosum.bw.deviation * style.dev_range_x / coef,

693

alpha=colors.subinfo_alpha,

694

color=colors.suppl_color1) # 'magenta'

695

err2_legend = p1.errorbar(pos + width / 2 + err_x_offset,

696

iosum.bw.average / coef,

697

iosum.bw.confidence / coef,

698

alpha=colors.subinfo_alpha,

699

color=colors.suppl_color2) # 'teal'

if style.grid:

p1.grid(True)

handles1, labels1 = p1.get_legend_handles_labels()

705

706

handles1 += [err1_legend, err2_legend]

707

labels1 += ["{}% dev".format(style.dev_perc),

708

"{}% conf".format(int(100 * iosums[0].bw.confidence_level))]

709

710

# extra y spine for latency on right side

711

p2 = p1.twinx()

712

713

# plot median and 95 perc latency

714

p2.plot(xt, [iosum.lat.perc_50 for iosum in iosums], label="lat med")

715

p2.plot(xt, [iosum.lat.perc_95 for iosum in iosums], label="lat 95%")

716

717

# limit and label x spine

718

plt.xlim(extra_x_space, lc + extra_x_space)

719

plt.xticks(xt, ["{0} * {1}".format(iosum.qd, iosum.nodes_count) for iosum in iosums])

720

p1.set_xlabel("QD * Test node count")

721

722

# apply log scales for X spines, if set

if iops_log_spine:

p1.set_yscale('log')

if lat_log_spine:

p2.set_yscale('log')

# extra y spine for BW/IOPS on left side

730

if style.extra_io_spine:

p3 = p1.twinx()

if iops_log_spine:

p3.set_yscale('log')

if iops_primary:

p3.set_ylabel("BW (MiBps)")

737

p3.set_ylim(io_lims[0] / MiB2KiB, io_lims[1] / MiB2KiB)

738

else:

739

p3.set_ylabel("IOPS")

740

p3.set_ylim(io_lims[0] / block_size, io_lims[1] / block_size)

741

742

p3.spines["left"].set_position(("axes", extra_io_spine_x_offset))

743

p3.spines["left"].set_visible(True)

744

p3.yaxis.set_label_position('left')

745

p3.yaxis.set_ticks_position('left')

746

747

p2.set_ylabel("Latency (ms)")

plt.title(title)

# legend box

handles2, labels2 = p2.get_legend_handles_labels()

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

753

plt.legend(handles1 + handles2, labels1 + labels2,

754

loc=legend_location,

755

bbox_to_anchor=legend_bbox_to_anchor)

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

756

757

# adjust central box size to fit legend

758

plt.subplots_adjust(**plot_box_adjust)

759

apply_style(style, eng=False, no_legend=True)

760

761

762

# -------------------- REPORT HELPERS --------------------------------------------------------------------------------

763

764

koder aka kdanilov

2016-12-26 01:34:23 +0200

[diff] [blame]

765

class HTMLBlock:

766

data = None # type: str

767

js_links = [] # type: List[str]

768

css_links = [] # type: List[str]

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

769

order_attr = None # type: Any

770

771

def __init__(self, data: str, order_attr: Any = None) -> None:

772

self.data = data

773

self.order_attr = order_attr

774

775

def __eq__(self, o: object) -> bool:

776

return o.order_attr == self.order_attr # type: ignore

777

778

def __lt__(self, o: object) -> bool:

779

return o.order_attr > self.order_attr # type: ignore

class Table:

def __init__(self, header: List[str]) -> None:

self.header = header

self.data = []

def add_line(self, values: List[str]) -> None:

788

self.data.append(values)

789

790

def html(self):

791

return html.table("", self.header, self.data)

koder aka kdanilov

2016-12-26 01:34:23 +0200

[diff] [blame]

792

793

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

794

class Menu1st:

795

engineering = "Engineering"

796

summary = "Summary"

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

797

per_job = "Per Job"

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

class Menu2ndEng:

iops_time = "IOPS(time)"

802

hist = "IOPS/lat overall histogram"

803

lat_time = "Lat(time)"

class Menu2ndSumm:

io_lat_qd = "IO & Lat vs QD"

808

809

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

810

menu_1st_order = [Menu1st.summary, Menu1st.engineering, Menu1st.per_job]

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

811

812

813

# -------------------- REPORTS --------------------------------------------------------------------------------------

814

815

koder aka kdanilov

2016-12-26 01:34:23 +0200

[diff] [blame]

816

class Reporter(metaclass=abc.ABCMeta):

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

817

suite_types = set() # type: Set[str]

818

koder aka kdanilov

2016-12-26 01:34:23 +0200

[diff] [blame]

819

@abc.abstractmethod

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

820

def get_divs(self, suite: SuiteConfig, storage: ResultStorage) -> Iterator[Tuple[str, str, HTMLBlock]]:

pass

class JobReporter(metaclass=abc.ABCMeta):

825

suite_type = set() # type: Set[str]

@abc.abstractmethod

def get_divs(self,

suite: SuiteConfig,

job: JobConfig,

storage: ResultStorage) -> Iterator[Tuple[str, str, HTMLBlock]]:

koder aka kdanilov

2016-12-26 01:34:23 +0200

[diff] [blame]

pass

# Main performance report

836

class PerformanceSummary(Reporter):

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

837

"""Aggregated summary fro storage"""

koder aka kdanilov

2016-12-26 01:34:23 +0200

[diff] [blame]

838

839

840

# Main performance report

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

841

class IO_QD(Reporter):

koder aka kdanilov

2016-12-26 01:34:23 +0200

[diff] [blame]

842

"""Creates graph, which show how IOPS and Latency depend on QD"""

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

843

suite_types = {'fio'}

844

845

def get_divs(self, suite: SuiteConfig, rstorage: ResultStorage) -> Iterator[Tuple[str, str, HTMLBlock]]:

846

ts_map = defaultdict(list) # type: Dict[FioJobParams, List[Tuple[SuiteConfig, FioJobConfig]]]

847

str_summary = {} # type: Dict[FioJobParams, List[IOSummary]]

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

848

for job in rstorage.iter_job(suite):

849

fjob = cast(FioJobConfig, job)

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

850

fjob_no_qd = cast(FioJobParams, fjob.params.copy(qd=None))

851

str_summary[fjob_no_qd] = (fjob_no_qd.summary, fjob_no_qd.long_summary)

852

ts_map[fjob_no_qd].append((suite, fjob))

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

853

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

854

for tpl, suites_jobs in ts_map.items():

855

if len(suites_jobs) > StyleProfile.min_iops_vs_qd_jobs:

856

iosums = [make_iosum(rstorage, suite, job) for suite, job in suites_jobs]

857

iosums.sort(key=lambda x: x.qd)

858

summary, summary_long = str_summary[tpl]

859

ds = DataSource(suite_id=suite.storage_id,

job_id=summary,

node_id=AGG_TAG,

sensor="fio",

dev=AGG_TAG,

metric="io_over_qd",

tag="svg")

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

866

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

867

title = "IOPS, BW, Lat vs. QD.\n" + summary_long

868

fpath = io_chart(rstorage, ds, title=title, legend="IOPS/BW", iosums=iosums) # type: str

869

yield Menu1st.summary, Menu2ndSumm.io_lat_qd, HTMLBlock(html.img(fpath))

koder aka kdanilov

2016-12-26 01:34:23 +0200

[diff] [blame]

870

871

872

# Linearization report

873

class IOPS_Bsize(Reporter):

874

"""Creates graphs, which show how IOPS and Latency depend on block size"""

875

876

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

877

def summ_sensors(rstorage: ResultStorage,

nodes: List[str],

sensor: str,

metric: str,

time_range: Tuple[int, int]) -> Optional[numpy.array]:

882

883

res = None # type: Optional[numpy.array]

884

for node_id in nodes:

885

for _, groups in rstorage.iter_sensors(node_id=node_id, sensor=sensor, metric=metric):

886

data = get_sensor_for_time_range(rstorage,

node_id=node_id,

sensor=sensor,

dev=groups['dev'],

metric=metric,

time_range=time_range)

if res is None:

res = data

else:

res += data

return res

koder aka kdanilov

2016-12-26 01:34:23 +0200

[diff] [blame]

899

# IOPS/latency distribution

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

900

class StatInfo(JobReporter):

901

"""Statistic info for job results"""

902

suite_types = {'fio'}

903

904

def get_divs(self, suite: SuiteConfig, job: JobConfig,

905

rstorage: ResultStorage) -> Iterator[Tuple[str, str, HTMLBlock]]:

906

907

fjob = cast(FioJobConfig, job)

908

io_sum = make_iosum(rstorage, suite, fjob)

909

910

summary_data = [

911

["Summary", job.params.long_summary],

912

]

913

914

res = html.H2(html.center("Test summary"))

915

res += html.table("Test info", None, summary_data)

916

stat_data_headers = ["Name", "Average ~ Dev", "Conf interval", "Mediana", "Mode", "Kurt / Skew", "95%", "99%"]

917

918

KB = 1024

919

bw_data = ["Bandwidth",

920

"{}Bps ~ {}Bps".format(b2ssize(io_sum.bw.average * KB), b2ssize(io_sum.bw.deviation * KB)),

921

b2ssize(io_sum.bw.confidence * KB) + "Bps",

922

b2ssize(io_sum.bw.perc_50 * KB) + "Bps",

923

"-",

924

"{:.2f} / {:.2f}".format(io_sum.bw.kurt, io_sum.bw.skew),

925

b2ssize(io_sum.bw.perc_5 * KB) + "Bps",

926

b2ssize(io_sum.bw.perc_1 * KB) + "Bps"]

927

928

iops_data = ["IOPS",

929

"{}IOPS ~ {}IOPS".format(b2ssize_10(io_sum.bw.average / fjob.bsize),

930

b2ssize_10(io_sum.bw.deviation / fjob.bsize)),

931

b2ssize_10(io_sum.bw.confidence / fjob.bsize) + "IOPS",

932

b2ssize_10(io_sum.bw.perc_50 / fjob.bsize) + "IOPS",

933

"-",

934

"{:.2f} / {:.2f}".format(io_sum.bw.kurt, io_sum.bw.skew),

935

b2ssize_10(io_sum.bw.perc_5 / fjob.bsize) + "IOPS",

936

b2ssize_10(io_sum.bw.perc_1 / fjob.bsize) + "IOPS"]

MICRO = 1000000

# latency

lat_data = ["Latency",

941

"-",

942

"-",

943

b2ssize_10(io_sum.bw.perc_50 / MICRO) + "s",

944

"-",

945

"-",

946

b2ssize_10(io_sum.bw.perc_95 / MICRO) + "s",

947

b2ssize_10(io_sum.bw.perc_99 / MICRO) + "s"]

948

949

# sensor usage

950

stat_data = [iops_data, bw_data, lat_data]

951

res += html.table("Load stats info", stat_data_headers, stat_data)

952

953

resource_headers = ["Resource", "Usage count", "Proportional to work done"]

954

955

io_transfered = io_sum.bw.data.sum() * KB

956

resource_data = [

957

["IO made", b2ssize_10(io_transfered / KB / fjob.bsize) + "OP", "-"],

958

["Data transfered", b2ssize(io_transfered) + "B", "-"]

]

storage = rstorage.storage

963

nodes = storage.load_list(NodeInfo, 'all_nodes') # type: List[NodeInfo]

964

965

storage_nodes = [node.node_id for node in nodes if node.roles.intersection(STORAGE_ROLES)]

966

test_nodes = [node.node_id for node in nodes if "testnode" in node.roles]

967

968

trange = [job.reliable_info_range[0] / 1000, job.reliable_info_range[1] / 1000]

969

ops_done = io_transfered / fjob.bsize / KB

970

971

all_metrics = [

972

("Test nodes net send", 'net-io', 'send_bytes', b2ssize, test_nodes, "B", io_transfered),

973

("Test nodes net recv", 'net-io', 'recv_bytes', b2ssize, test_nodes, "B", io_transfered),

974

975

("Test nodes disk write", 'block-io', 'sectors_written', b2ssize, test_nodes, "B", io_transfered),

976

("Test nodes disk read", 'block-io', 'sectors_read', b2ssize, test_nodes, "B", io_transfered),

977

("Test nodes writes", 'block-io', 'writes_completed', b2ssize_10, test_nodes, "OP", ops_done),

978

("Test nodes reads", 'block-io', 'reads_completed', b2ssize_10, test_nodes, "OP", ops_done),

979

980

("Storage nodes net send", 'net-io', 'send_bytes', b2ssize, storage_nodes, "B", io_transfered),

981

("Storage nodes net recv", 'net-io', 'recv_bytes', b2ssize, storage_nodes, "B", io_transfered),

982

983

("Storage nodes disk write", 'block-io', 'sectors_written', b2ssize, storage_nodes, "B", io_transfered),

984

("Storage nodes disk read", 'block-io', 'sectors_read', b2ssize, storage_nodes, "B", io_transfered),

985

("Storage nodes writes", 'block-io', 'writes_completed', b2ssize_10, storage_nodes, "OP", ops_done),

986

("Storage nodes reads", 'block-io', 'reads_completed', b2ssize_10, storage_nodes, "OP", ops_done),

]

all_agg = {}

for descr, sensor, metric, ffunc, nodes, units, denom in all_metrics:

if not nodes:

continue

res_arr = summ_sensors(rstorage, nodes=nodes, sensor=sensor, metric=metric, time_range=trange)

if res_arr is None:

continue

agg = res_arr.sum()

resource_data.append([descr, ffunc(agg) + units, "{:.1f}".format(agg / denom)])

all_agg[descr] = agg

cums = [

("Test nodes writes", "Test nodes reads", "Total test ops", b2ssize_10, "OP", ops_done),

1006

("Storage nodes writes", "Storage nodes reads", "Total storage ops", b2ssize_10, "OP", ops_done),

1007

("Storage nodes disk write", "Storage nodes disk read", "Total storage IO size", b2ssize,

1008

"B", io_transfered),

1009

("Test nodes disk write", "Test nodes disk read", "Total test nodes IO size", b2ssize, "B", io_transfered),

1010

]

1011

1012

for name1, name2, descr, ffunc, units, denom in cums:

1013

if name1 in all_agg and name2 in all_agg:

1014

agg = all_agg[name1] + all_agg[name2]

1015

resource_data.append([descr, ffunc(agg) + units, "{:.1f}".format(agg / denom)])

1016

1017

res += html.table("Resources usage", resource_headers, resource_data)

1018

1019

yield Menu1st.per_job, job.summary, HTMLBlock(res)

1020

1021

1022

# IOPS/latency distribution

1023

class IOHist(JobReporter):

koder aka kdanilov

2016-12-26 01:34:23 +0200

[diff] [blame]

1024

"""IOPS.latency distribution histogram"""

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

1025

suite_types = {'fio'}

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

1026

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

def get_divs(self,

suite: SuiteConfig,

job: JobConfig,

rstorage: ResultStorage) -> Iterator[Tuple[str, str, HTMLBlock]]:

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

1031

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

1032

fjob = cast(FioJobConfig, job)

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

1033

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

1034

yield Menu1st.per_job, fjob.summary, HTMLBlock(html.H2(html.center("Load histograms")))

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

1035

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

1036

agg_lat = get_aggregated(rstorage, suite, fjob, "lat")

1037

bins_edges = numpy.array(get_lat_vals(agg_lat.data.shape[1]), dtype='float32') / 1000 # convert us to ms

1038

lat_stat_prop = calc_histo_stat_props(agg_lat, bins_edges, bins_count=StyleProfile.hist_lat_boxes)

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

1039

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

# import IPython

# IPython.embed()

long_summary = cast(FioJobParams, fjob.params).long_summary

1044

1045

title = "Latency distribution"

1046

units = "ms"

1047

1048

fpath = plot_hist(rstorage, agg_lat.source(tag='hist.svg'), title, units, lat_stat_prop) # type: str

1049

yield Menu1st.per_job, fjob.summary, HTMLBlock(html.img(fpath))

1050

1051

agg_io = get_aggregated(rstorage, suite, fjob, "bw")

1052

1053

if fjob.bsize >= LARGE_BLOCKS:

1054

title = "BW distribution"

1055

units = "MiBps"

1056

agg_io.data //= MiB2KiB

1057

else:

1058

title = "IOPS distribution"

1059

agg_io.data //= fjob.bsize

1060

units = "IOPS"

1061

1062

io_stat_prop = calc_norm_stat_props(agg_io, bins_count=StyleProfile.hist_boxes)

1063

fpath = plot_hist(rstorage, agg_io.source(tag='hist.svg'), title, units, io_stat_prop) # type: str

1064

yield Menu1st.per_job, fjob.summary, HTMLBlock(html.img(fpath))

koder aka kdanilov

2016-12-26 01:34:23 +0200

[diff] [blame]

1065

1066

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

1067

# IOPS/latency over test time for each job

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

1068

class IOTime(JobReporter):

koder aka kdanilov

2016-12-26 01:34:23 +0200

[diff] [blame]

1069

"""IOPS/latency during test"""

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

1070

suite_types = {'fio'}

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

1071

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

def get_divs(self,

suite: SuiteConfig,

job: JobConfig,

rstorage: ResultStorage) -> Iterator[Tuple[str, str, HTMLBlock]]:

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

1076

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

1077

fjob = cast(FioJobConfig, job)

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

1078

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

1079

yield Menu1st.per_job, fjob.summary, HTMLBlock(html.H2(html.center("Load over time")))

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

1080

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

1081

agg_io = get_aggregated(rstorage, suite, fjob, "bw")

1082

if fjob.bsize >= LARGE_BLOCKS:

1083

title = "Bandwidth"

1084

units = "MiBps"

1085

agg_io.data //= MiB2KiB

1086

else:

1087

title = "IOPS"

1088

agg_io.data //= fjob.bsize

1089

units = "IOPS"

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

1090

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

1091

fpath = plot_v_over_time(rstorage, agg_io.source(tag='ts.svg'), title, units, agg_io) # type: str

1092

yield Menu1st.per_job, fjob.summary, HTMLBlock(html.img(fpath))

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

1093

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

1094

agg_lat = get_aggregated(rstorage, suite, fjob, "lat")

1095

bins_edges = numpy.array(get_lat_vals(agg_lat.data.shape[1]), dtype='float32') / 1000

1096

title = "Latency"

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

1097

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

1098

fpath = plot_lat_over_time(rstorage, agg_lat.source(tag='ts.svg'), title, agg_lat, bins_edges) # type: str

1099

yield Menu1st.per_job, fjob.summary, HTMLBlock(html.img(fpath))

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

1100

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

1101

title = "Latency heatmap"

1102

fpath = plot_heatmap(rstorage, agg_lat.source(tag='hmap.png'), title, agg_lat, bins_edges) # type: str

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

1103

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

1104

yield Menu1st.per_job, fjob.summary, HTMLBlock(html.img(fpath))

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

class ResourceUsage:

def __init__(self, io_r_ops: int, io_w_ops: int, io_r_kb: int, io_w_kb: int) -> None:

1109

self.io_w_ops = io_w_ops

1110

self.io_r_ops = io_r_ops

1111

self.io_w_kb = io_w_kb

1112

self.io_r_kb = io_r_kb

1113

1114

self.cpu_used_user = None # type: int

1115

self.cpu_used_sys = None # type: int

1116

self.cpu_wait_io = None # type: int

1117

1118

self.net_send_packets = None # type: int

1119

self.net_recv_packets = None # type: int

1120

self.net_send_kb = None # type: int

1121

self.net_recv_kb = None # type: int

koder aka kdanilov

2016-12-26 01:34:23 +0200

[diff] [blame]

1122

1123

1124

# Cluster load over test time

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

1125

class ClusterLoad(JobReporter):

koder aka kdanilov

2016-12-26 01:34:23 +0200

[diff] [blame]

1126

"""IOPS/latency during test"""

1127

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

1128

# TODO: units should came from sensor

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

1129

storage_sensors = [

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

1130

('block-io', 'reads_completed', "Read ops", 'iops'),

1131

('block-io', 'writes_completed', "Write ops", 'iops'),

1132

('block-io', 'sectors_read', "Read kb", 'kb'),

1133

('block-io', 'sectors_written', "Write kb", 'kb'),

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

1134

]

1135

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

def get_divs(self,

suite: SuiteConfig,

job: JobConfig,

rstorage: ResultStorage) -> Iterator[Tuple[str, str, HTMLBlock]]:

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

1140

# split nodes on test and other

1141

storage = rstorage.storage

1142

nodes = storage.load_list(NodeInfo, "all_nodes") # type: List[NodeInfo]

1143

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

1144

yield Menu1st.per_job, job.summary, HTMLBlock(html.H2(html.center("Cluster load")))

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

1145

test_nodes = {node.node_id for node in nodes if 'testnode' in node.roles}

1146

cluster_nodes = {node.node_id for node in nodes if 'testnode' not in node.roles}

1147

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

1148

# convert ms to s

1149

time_range = (job.reliable_info_range[0] // MS2S, job.reliable_info_range[1] // MS2S)

1150

len = time_range[1] - time_range[0]

1151

for sensor, metric, sensor_title, units in self.storage_sensors:

1152

sum_testnode = numpy.zeros((len,))

1153

sum_other = numpy.zeros((len,))

1154

for path, groups in rstorage.iter_sensors(sensor=sensor, metric=metric):

1155

# todo: should return sensor units

1156

data = get_sensor_for_time_range(rstorage,

groups['node_id'],

sensor,

groups['dev'],

metric, time_range)

if groups['node_id'] in test_nodes:

1162

sum_testnode += data

1163

else:

1164

sum_other += data

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

1165

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

1166

ds = DataSource(suite_id=suite.storage_id,

1167

job_id=job.storage_id,

1168

node_id="test_nodes",

sensor=sensor,

dev=AGG_TAG,

metric=metric,

tag="ts.svg")

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

1173

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

1174

# s to ms

1175

ts = TimeSeries(name="",

1176

times=numpy.arange(*time_range) * MS2S,

data=sum_testnode,

raw=None,

units=units,

time_units="us",

source=ds)

fpath = plot_v_over_time(rstorage, ds, sensor_title, sensor_title, ts=ts) # type: str

1183

yield Menu1st.per_job, job.summary, HTMLBlock(html.img(fpath))

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

1184

1185

1186

# Ceph cluster summary

1187

class ResourceConsumption(Reporter):

1188

"""Resources consumption report, only text"""

1189

koder aka kdanilov

2016-12-26 01:34:23 +0200

[diff] [blame]

1190

1191

# Node load over test time

1192

class NodeLoad(Reporter):

1193

"""IOPS/latency during test"""

1194

1195

1196

# Ceph cluster summary

1197

class CephClusterSummary(Reporter):

1198

"""IOPS/latency during test"""

1199

1200

koder aka kdanilov

2016-12-26 01:34:23 +0200

[diff] [blame]

1201

# TODO: Ceph operation breakout report

1202

# TODO: Resource consumption for different type of test

1203

1204

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

1205

# ------------------------------------------ REPORT STAGES -----------------------------------------------------------

1206

1207

1208

class HtmlReportStage(Stage):

1209

priority = StepOrder.REPORT

1210

1211

def run(self, ctx: TestRun) -> None:

1212

rstorage = ResultStorage(ctx.storage)

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

1213

1214

job_reporters = [StatInfo(), IOTime(), IOHist(), ClusterLoad()] # type: List[JobReporter]

1215

reporters = [IO_QD()] # type: List[Reporter]

1216

1217

# job_reporters = [ClusterLoad()]

1218

# reporters = []

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

1219

1220

root_dir = os.path.dirname(os.path.dirname(wally.__file__))

1221

doc_templ_path = os.path.join(root_dir, "report_templates/index.html")

1222

report_template = open(doc_templ_path, "rt").read()

1223

css_file_src = os.path.join(root_dir, "report_templates/main.css")

1224

css_file = open(css_file_src, "rt").read()

menu_block = []

content_block = []

link_idx = 0

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

1230

# matplotlib.rcParams.update(ctx.config.reporting.matplotlib_params.raw())

1231

# ColorProfile.__dict__.update(ctx.config.reporting.colors.raw())

1232

# StyleProfile.__dict__.update(ctx.config.reporting.style.raw())

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

1233

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

1234

items = defaultdict(lambda: defaultdict(list)) # type: Dict[str, Dict[str, List[HTMLBlock]]]

1235

1236

# TODO: filter reporters

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

1237

for suite in rstorage.iter_suite(FioTest.name):

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

1238

all_jobs = list(rstorage.iter_job(suite))

1239

all_jobs.sort(key=lambda job: job.params)

1240

for job in all_jobs:

1241

for reporter in job_reporters:

1242

for block, item, html in reporter.get_divs(suite, job, rstorage):

1243

items[block][item].append(html)

if DEBUG:

break

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

1247

for reporter in reporters:

1248

for block, item, html in reporter.get_divs(suite, rstorage):

1249

items[block][item].append(html)

1250

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

if DEBUG:

break

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

1254

for idx_1st, menu_1st in enumerate(sorted(items, key=lambda x: menu_1st_order.index(x))):

1255

menu_block.append(

1256

'<a href="#item{}" class="nav-group" data-toggle="collapse" data-parent="#MainMenu">{}</a>'

1257

.format(idx_1st, menu_1st)

1258

)

1259

menu_block.append('<div class="collapse" id="item{}">'.format(idx_1st))

1260

for menu_2nd in sorted(items[menu_1st]):

1261

menu_block.append(' <a href="#content{}" class="nav-group-item">{}</a>'

1262

.format(link_idx, menu_2nd))

1263

content_block.append('<div id="content{}">'.format(link_idx))

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame^]

1264

content_block.extend(" " + x.data for x in items[menu_1st][menu_2nd])

koder aka kdanilov