wally/report.py - mcp/cvp-wally - Gitiles

 import os
 import math
 import bisect
 import logging

 try:
     import matplotlib.pyplot as plt
 except ImportError:
     plt = None

 import wally
 from wally import charts
 from wally.utils import parse_creds, ssize_to_b
 from wally.statistic import round_3_digit, round_deviation
 from wally.suits.io.results_loader import process_disk_info
 from wally.meta_info import total_lab_info, collect_lab_data


 logger = logging.getLogger("wally.report")


 class DiskInfo(object):
     def __init__(self):
         self.direct_iops_r_max = 0
         self.direct_iops_w_max = 0
         self.rws4k_10ms = 0
         self.rws4k_30ms = 0
         self.rws4k_100ms = 0
         self.bw_write_max = 0
         self.bw_read_max = 0


 report_funcs = []


 def report(name, required_fields):
     def closure(func):
         report_funcs.append((required_fields.split(","), name, func))
         return func
     return closure


 def linearity_report(processed_results, path, lab_info):
     names = {}
     for tp1 in ('rand', 'seq'):
         for oper in ('read', 'write'):
             for sync in ('sync', 'direct', 'async'):
                 sq = (tp1, oper, sync)
                 name = "{0} {1} {2}".format(*sq)
                 names["".join(word[0] for word in sq)] = name

     colors = ['red', 'green', 'blue', 'cyan',
               'magenta', 'black', 'yellow', 'burlywood']
     markers = ['*', '^', 'x', 'o', '+', '.']
     color = 0
     marker = 0

     plot_data = {}

     name_pref = 'linearity_test_rrd'

     for res in processed_results.values():
         if res.name.startswith(name_pref):
             iotime = 1000000. / res.iops
             iotime_max = iotime * (1 + res.dev * 3)
             bsize = ssize_to_b(res.raw['blocksize'])
             plot_data[bsize] = (iotime, iotime_max)

     min_sz = min(plot_data)
     min_iotime, _ = plot_data.pop(min_sz)

     x = []
     y = []
     e = []

     for k, (v, vmax) in sorted(plot_data.items()):
         # y.append(math.log10(v - min_iotime))
         # x.append(math.log10(k))
         # e.append(y[-1] - math.log10(vmax - min_iotime))
         y.append(v - min_iotime)
         x.append(k)
         e.append(y[-1] - (vmax - min_iotime))

     print e

     tp = 'rrd'
     plt.errorbar(x, y, e, linestyle='None', label=names[tp],
                  color=colors[color], ecolor="black",
                  marker=markers[marker])
     plt.yscale('log')
     plt.xscale('log')
     plt.show()

     # ynew = approximate_line(ax, ay, ax, True)
     # plt.plot(ax, ynew, color=colors[color])
     # color += 1
     # marker += 1
     # plt.legend(loc=2)
     # plt.title("Linearity test by %i dots" % (len(vals)))


 if plt:
     linearity_report = report('linearity', 'linearity_test')(linearity_report)


 def render_hdd_html(dest, info, lab_description):
     very_root_dir = os.path.dirname(os.path.dirname(wally.__file__))
     templ_dir = os.path.join(very_root_dir, 'report_templates')
     templ_file = os.path.join(templ_dir, "report_hdd.html")
     templ = open(templ_file, 'r').read()

     for name, val in info.__dict__.items():
         if not name.startswith('__'):
             if val is None:
                 info.__dict__[name] = '-'
             else:
                 info.__dict__[name] = round_3_digit(val)

     data = info.__dict__.copy()
     for k, v in data.items():
         if v is None:
             data[k] = "-"

     report = templ.format(lab_info=lab_description, **data)
     open(dest, 'w').write(report)


 def render_ceph_html(dest, info, lab_description):
     very_root_dir = os.path.dirname(os.path.dirname(wally.__file__))
     templ_dir = os.path.join(very_root_dir, 'report_templates')
     templ_file = os.path.join(templ_dir, "report_ceph.html")
     templ = open(templ_file, 'r').read()

     for name, val in info.__dict__.items():
         if not name.startswith('__') and isinstance(val, (int, long, float)):
             setattr(info, name, round_3_digit(val))

     data = info.__dict__.copy()
     for k, v in data.items():
         if v is None:
             data[k] = "-"

     report = templ.format(lab_info=lab_description, **data)
     open(dest, 'w').write(report)


 def io_chart(title, concurence, latv, iops_or_bw, iops_or_bw_dev,
              legend, fname):
     bar_data, bar_dev = iops_or_bw, iops_or_bw_dev
     legend = [legend]

     iops_or_bw_per_vm = []
     for i in range(len(concurence)):
         iops_or_bw_per_vm.append(iops_or_bw[i] / concurence[i])

     bar_dev_bottom = []
     bar_dev_top = []
     for i in range(len(bar_data)):
         bar_dev_top.append(bar_data[i] + bar_dev[i])
         bar_dev_bottom.append(bar_data[i] - bar_dev[i])

     latv = [lat / 1000 for lat in latv]
     ch = charts.render_vertical_bar(title, legend, [bar_data], [bar_dev_top],
                                     [bar_dev_bottom], file_name=fname,
                                     scale_x=concurence, label_x="clients",
                                     label_y=legend[0],
                                     lines=[
                                         (latv, "msec", "rr", "lat"),
                                         (iops_or_bw_per_vm, None, None,
                                          legend[0] + " per client")
                                     ])
     return str(ch)


 def make_hdd_plots(processed_results, path):
     plots = [
         ('hdd_test_rrd4k', 'rand_read_4k', 'Random read 4k direct IOPS'),
         ('hdd_test_rws4k', 'rand_write_4k', 'Random write 4k sync IOPS')
     ]
     make_plots(processed_results, path, plots)


 def make_ceph_plots(processed_results, path):
     plots = [
         ('ceph_test_rrd4k', 'rand_read_4k', 'Random read 4k direct IOPS'),
         ('ceph_test_rws4k', 'rand_write_4k', 'Random write 4k sync IOPS'),
         ('ceph_test_rrd16m', 'rand_read_16m', 'Random read 16m direct MiBps'),
         ('ceph_test_rwd16m', 'rand_write_16m',
             'Random write 16m direct MiBps'),
     ]
     make_plots(processed_results, path, plots)


 def make_plots(processed_results, path, plots):
     for name_pref, fname, desc in plots:
         chart_data = []

         for res in processed_results.values():
             if res.name.startswith(name_pref):
                 chart_data.append(res)

         if len(chart_data) == 0:
             raise ValueError("Can't found any date for " + name_pref)

         use_bw = ssize_to_b(chart_data[0].raw['blocksize']) > 16 * 1024

         chart_data.sort(key=lambda x: x.raw['concurence'])

         lat = [x.lat for x in chart_data]
         vm_count = x.meta['testnodes_count']
         concurence = [x.raw['concurence'] * vm_count for x in chart_data]

         if use_bw:
             data = [x.bw for x in chart_data]
             data_dev = [x.bw * x.dev for x in chart_data]
             name = "BW"
         else:
             data = [x.iops for x in chart_data]
             data_dev = [x.iops * x.dev for x in chart_data]
             name = "IOPS"

         io_chart(desc, concurence, lat, data, data_dev, name, fname)


 def find_max_where(processed_results, sync_mode, blocksize, rw, iops=True):
     result = [0, 0]
     attr = 'iops' if iops else 'bw'
     for measurement in processed_results.values():
         ok = measurement.raw['sync_mode'] == sync_mode
         ok = ok and (measurement.raw['blocksize'] == blocksize)
         ok = ok and (measurement.raw['rw'] == rw)

         if ok:
             if getattr(measurement, attr) > result[0]:
                 result = [getattr(measurement, attr), measurement.dev]
     return result


 def get_disk_info(processed_results):
     di = DiskInfo()
     rws4k_iops_lat_th = []

     di.direct_iops_w_max = find_max_where(processed_results,
                                           'd', '4k', 'randwrite')
     di.direct_iops_r_max = find_max_where(processed_results,
                                           'd', '4k', 'randread')
     di.bw_write_max = find_max_where(processed_results,
                                      'd', '16m', 'randwrite', False)
     di.bw_read_max = find_max_where(processed_results,
                                     'd', '16m', 'randread', False)

     for res in processed_results.values():
         if res.raw['sync_mode'] == 's' and res.raw['blocksize'] == '4k':
             if res.raw['rw'] != 'randwrite':
                 continue
             rws4k_iops_lat_th.append((res.iops, res.lat,
                                       res.raw['concurence']))

     di.bw_write_max[0] /= 1000
     di.bw_read_max[0] /= 1000

     rws4k_iops_lat_th.sort(key=lambda (_1, _2, conc): conc)

     latv = [lat for _, lat, _ in rws4k_iops_lat_th]

     for tlatv_ms in [10, 30, 100]:
         tlat = tlatv_ms * 1000
         pos = bisect.bisect_left(latv, tlat)
         if 0 == pos:
             iops3 = 0
         elif pos == len(latv):
             iops3 = latv[-1]
         else:
             lat1 = latv[pos - 1]
             lat2 = latv[pos]

             th1 = rws4k_iops_lat_th[pos - 1][2]
             th2 = rws4k_iops_lat_th[pos][2]

             iops1 = rws4k_iops_lat_th[pos - 1][0]
             iops2 = rws4k_iops_lat_th[pos][0]

             th_lat_coef = (th2 - th1) / (lat2 - lat1)
             th3 = th_lat_coef * (tlat - lat1) + th1

             th_iops_coef = (iops2 - iops1) / (th2 - th1)
             iops3 = th_iops_coef * (th3 - th1) + iops1
         setattr(di, 'rws4k_{}ms'.format(tlatv_ms), int(iops3))

     hdi = DiskInfo()

     def pp(x):
         med, dev = round_deviation((x[0], x[1] * x[0]))
         # 3 sigma in %
         dev = int(float(dev) / med * 100)
         return (med, dev)

     hdi.direct_iops_r_max = pp(di.direct_iops_r_max)
     hdi.direct_iops_w_max = pp(di.direct_iops_w_max)
     hdi.bw_write_max = pp(di.bw_write_max)
     hdi.bw_read_max = pp(di.bw_read_max)

     hdi.rws4k_10ms = di.rws4k_10ms if 0 != di.rws4k_10ms else None
     hdi.rws4k_30ms = di.rws4k_30ms if 0 != di.rws4k_30ms else None
     hdi.rws4k_100ms = di.rws4k_100ms if 0 != di.rws4k_100ms else None
     return hdi


 @report('HDD', 'hdd_test_rrd4k,hdd_test_rws4k')
 def make_hdd_report(processed_results, path, lab_info):
     make_hdd_plots(processed_results, path)
     di = get_disk_info(processed_results)
     render_hdd_html(path, di, lab_info)


 @report('Ceph', 'ceph_test')
 def make_ceph_report(processed_results, path, lab_info):
     make_ceph_plots(processed_results, path)
     di = get_disk_info(processed_results)
     render_ceph_html(path, di, lab_info)


 def make_io_report(results, path, lab_url=None, creds=None):
     lab_info = None

     # if lab_url is not None:
     #     username, password, tenant_name = parse_creds(creds)
     #     creds = {'username': username,
     #              'password': password,
     #              "tenant_name": tenant_name}
     #     try:
     #         data = collect_lab_data(lab_url, creds)
     #         lab_info = total_lab_info(data)
     #     except Exception as exc:
     #         logger.warning("Can't collect lab data: {0!s}".format(exc))

     if lab_info is None:
         lab_info = {
             "total_disk": "None",
             "total_memory": "None",
             "nodes_count": "None",
             "processor_count": "None"
         }

     try:
         processed_results = process_disk_info(results)
         res_fields = sorted(processed_results.keys())
         for fields, name, func in report_funcs:
             for field in fields:
                 pos = bisect.bisect_left(res_fields, field)

                 if pos == len(res_fields):
                     break

                 if not res_fields[pos].startswith(field):
                     break
             else:
                 hpath = path.format(name)
                 logger.debug("Generatins report " + name + " into " + hpath)
                 func(processed_results, hpath, lab_info)
                 break
         else:
             logger.warning("No report generator found for this load")

     except Exception as exc:
         import traceback
         traceback.print_exc()
         logger.error("Failed to generate html report:" + str(exc))
	import os
	import math
	import bisect
	import logging

	try:
	import matplotlib.pyplot as plt
	except ImportError:
	plt = None

	import wally
	from wally import charts
	from wally.utils import parse_creds, ssize_to_b
	from wally.statistic import round_3_digit, round_deviation
	from wally.suits.io.results_loader import process_disk_info
	from wally.meta_info import total_lab_info, collect_lab_data


	logger = logging.getLogger("wally.report")


	class DiskInfo(object):
	def __init__(self):
	self.direct_iops_r_max = 0
	self.direct_iops_w_max = 0
	self.rws4k_10ms = 0
	self.rws4k_30ms = 0
	self.rws4k_100ms = 0
	self.bw_write_max = 0
	self.bw_read_max = 0


	report_funcs = []


	def report(name, required_fields):
	def closure(func):
	report_funcs.append((required_fields.split(","), name, func))
	return func
	return closure


	def linearity_report(processed_results, path, lab_info):
	names = {}
	for tp1 in ('rand', 'seq'):
	for oper in ('read', 'write'):
	for sync in ('sync', 'direct', 'async'):
	sq = (tp1, oper, sync)
	name = "{0} {1} {2}".format(*sq)
	names["".join(word[0] for word in sq)] = name

	colors = ['red', 'green', 'blue', 'cyan',
	'magenta', 'black', 'yellow', 'burlywood']
	markers = ['*', '^', 'x', 'o', '+', '.']
	color = 0
	marker = 0

	plot_data = {}

	name_pref = 'linearity_test_rrd'

	for res in processed_results.values():
	if res.name.startswith(name_pref):
	iotime = 1000000. / res.iops
	iotime_max = iotime * (1 + res.dev * 3)
	bsize = ssize_to_b(res.raw['blocksize'])
	plot_data[bsize] = (iotime, iotime_max)

	min_sz = min(plot_data)
	min_iotime, _ = plot_data.pop(min_sz)

	x = []
	y = []
	e = []

	for k, (v, vmax) in sorted(plot_data.items()):
	# y.append(math.log10(v - min_iotime))
	# x.append(math.log10(k))
	# e.append(y[-1] - math.log10(vmax - min_iotime))
	y.append(v - min_iotime)
	x.append(k)
	e.append(y[-1] - (vmax - min_iotime))

	print e

	tp = 'rrd'
	plt.errorbar(x, y, e, linestyle='None', label=names[tp],
	color=colors[color], ecolor="black",
	marker=markers[marker])
	plt.yscale('log')
	plt.xscale('log')
	plt.show()

	# ynew = approximate_line(ax, ay, ax, True)
	# plt.plot(ax, ynew, color=colors[color])
	# color += 1
	# marker += 1
	# plt.legend(loc=2)
	# plt.title("Linearity test by %i dots" % (len(vals)))


	if plt:
	linearity_report = report('linearity', 'linearity_test')(linearity_report)


	def render_hdd_html(dest, info, lab_description):
	very_root_dir = os.path.dirname(os.path.dirname(wally.__file__))
	templ_dir = os.path.join(very_root_dir, 'report_templates')
	templ_file = os.path.join(templ_dir, "report_hdd.html")
	templ = open(templ_file, 'r').read()

	for name, val in info.__dict__.items():
	if not name.startswith('__'):
	if val is None:
	info.__dict__[name] = '-'
	else:
	info.__dict__[name] = round_3_digit(val)

	data = info.__dict__.copy()
	for k, v in data.items():
	if v is None:
	data[k] = "-"

	report = templ.format(lab_info=lab_description, **data)
	open(dest, 'w').write(report)


	def render_ceph_html(dest, info, lab_description):
	very_root_dir = os.path.dirname(os.path.dirname(wally.__file__))
	templ_dir = os.path.join(very_root_dir, 'report_templates')
	templ_file = os.path.join(templ_dir, "report_ceph.html")
	templ = open(templ_file, 'r').read()

	for name, val in info.__dict__.items():
	if not name.startswith('__') and isinstance(val, (int, long, float)):
	setattr(info, name, round_3_digit(val))

	data = info.__dict__.copy()
	for k, v in data.items():
	if v is None:
	data[k] = "-"

	report = templ.format(lab_info=lab_description, **data)
	open(dest, 'w').write(report)


	def io_chart(title, concurence, latv, iops_or_bw, iops_or_bw_dev,
	legend, fname):
	bar_data, bar_dev = iops_or_bw, iops_or_bw_dev
	legend = [legend]

	iops_or_bw_per_vm = []
	for i in range(len(concurence)):
	iops_or_bw_per_vm.append(iops_or_bw[i] / concurence[i])

	bar_dev_bottom = []
	bar_dev_top = []
	for i in range(len(bar_data)):
	bar_dev_top.append(bar_data[i] + bar_dev[i])
	bar_dev_bottom.append(bar_data[i] - bar_dev[i])

	latv = [lat / 1000 for lat in latv]
	ch = charts.render_vertical_bar(title, legend, [bar_data], [bar_dev_top],
	[bar_dev_bottom], file_name=fname,
	scale_x=concurence, label_x="clients",
	label_y=legend[0],
	lines=[
	(latv, "msec", "rr", "lat"),
	(iops_or_bw_per_vm, None, None,
	legend[0] + " per client")
	])
	return str(ch)


	def make_hdd_plots(processed_results, path):
	plots = [
	('hdd_test_rrd4k', 'rand_read_4k', 'Random read 4k direct IOPS'),
	('hdd_test_rws4k', 'rand_write_4k', 'Random write 4k sync IOPS')
	]
	make_plots(processed_results, path, plots)


	def make_ceph_plots(processed_results, path):
	plots = [
	('ceph_test_rrd4k', 'rand_read_4k', 'Random read 4k direct IOPS'),
	('ceph_test_rws4k', 'rand_write_4k', 'Random write 4k sync IOPS'),
	('ceph_test_rrd16m', 'rand_read_16m', 'Random read 16m direct MiBps'),
	('ceph_test_rwd16m', 'rand_write_16m',
	'Random write 16m direct MiBps'),
	]
	make_plots(processed_results, path, plots)


	def make_plots(processed_results, path, plots):
	for name_pref, fname, desc in plots:
	chart_data = []

	for res in processed_results.values():
	if res.name.startswith(name_pref):
	chart_data.append(res)

	if len(chart_data) == 0:
	raise ValueError("Can't found any date for " + name_pref)

	use_bw = ssize_to_b(chart_data[0].raw['blocksize']) > 16 * 1024

	chart_data.sort(key=lambda x: x.raw['concurence'])

	lat = [x.lat for x in chart_data]
	vm_count = x.meta['testnodes_count']
	concurence = [x.raw['concurence'] * vm_count for x in chart_data]

	if use_bw:
	data = [x.bw for x in chart_data]
	data_dev = [x.bw * x.dev for x in chart_data]
	name = "BW"
	else:
	data = [x.iops for x in chart_data]
	data_dev = [x.iops * x.dev for x in chart_data]
	name = "IOPS"

	io_chart(desc, concurence, lat, data, data_dev, name, fname)


	def find_max_where(processed_results, sync_mode, blocksize, rw, iops=True):
	result = [0, 0]
	attr = 'iops' if iops else 'bw'
	for measurement in processed_results.values():
	ok = measurement.raw['sync_mode'] == sync_mode
	ok = ok and (measurement.raw['blocksize'] == blocksize)
	ok = ok and (measurement.raw['rw'] == rw)

	if ok:
	if getattr(measurement, attr) > result[0]:
	result = [getattr(measurement, attr), measurement.dev]
	return result


	def get_disk_info(processed_results):
	di = DiskInfo()
	rws4k_iops_lat_th = []

	di.direct_iops_w_max = find_max_where(processed_results,
	'd', '4k', 'randwrite')
	di.direct_iops_r_max = find_max_where(processed_results,
	'd', '4k', 'randread')
	di.bw_write_max = find_max_where(processed_results,
	'd', '16m', 'randwrite', False)
	di.bw_read_max = find_max_where(processed_results,
	'd', '16m', 'randread', False)

	for res in processed_results.values():
	if res.raw['sync_mode'] == 's' and res.raw['blocksize'] == '4k':
	if res.raw['rw'] != 'randwrite':
	continue
	rws4k_iops_lat_th.append((res.iops, res.lat,
	res.raw['concurence']))

	di.bw_write_max[0] /= 1000
	di.bw_read_max[0] /= 1000

	rws4k_iops_lat_th.sort(key=lambda (_1, _2, conc): conc)

	latv = [lat for _, lat, _ in rws4k_iops_lat_th]

	for tlatv_ms in [10, 30, 100]:
	tlat = tlatv_ms * 1000
	pos = bisect.bisect_left(latv, tlat)
	if 0 == pos:
	iops3 = 0
	elif pos == len(latv):
	iops3 = latv[-1]
	else:
	lat1 = latv[pos - 1]
	lat2 = latv[pos]

	th1 = rws4k_iops_lat_th[pos - 1][2]
	th2 = rws4k_iops_lat_th[pos][2]

	iops1 = rws4k_iops_lat_th[pos - 1][0]
	iops2 = rws4k_iops_lat_th[pos][0]

	th_lat_coef = (th2 - th1) / (lat2 - lat1)
	th3 = th_lat_coef * (tlat - lat1) + th1

	th_iops_coef = (iops2 - iops1) / (th2 - th1)
	iops3 = th_iops_coef * (th3 - th1) + iops1
	setattr(di, 'rws4k_{}ms'.format(tlatv_ms), int(iops3))

	hdi = DiskInfo()

	def pp(x):
	med, dev = round_deviation((x[0], x[1] * x[0]))
	# 3 sigma in %
	dev = int(float(dev) / med * 100)
	return (med, dev)

	hdi.direct_iops_r_max = pp(di.direct_iops_r_max)
	hdi.direct_iops_w_max = pp(di.direct_iops_w_max)
	hdi.bw_write_max = pp(di.bw_write_max)
	hdi.bw_read_max = pp(di.bw_read_max)

	hdi.rws4k_10ms = di.rws4k_10ms if 0 != di.rws4k_10ms else None
	hdi.rws4k_30ms = di.rws4k_30ms if 0 != di.rws4k_30ms else None
	hdi.rws4k_100ms = di.rws4k_100ms if 0 != di.rws4k_100ms else None
	return hdi


	@report('HDD', 'hdd_test_rrd4k,hdd_test_rws4k')
	def make_hdd_report(processed_results, path, lab_info):
	make_hdd_plots(processed_results, path)
	di = get_disk_info(processed_results)
	render_hdd_html(path, di, lab_info)


	@report('Ceph', 'ceph_test')
	def make_ceph_report(processed_results, path, lab_info):
	make_ceph_plots(processed_results, path)
	di = get_disk_info(processed_results)
	render_ceph_html(path, di, lab_info)


	def make_io_report(results, path, lab_url=None, creds=None):
	lab_info = None

	# if lab_url is not None:
	# username, password, tenant_name = parse_creds(creds)
	# creds = {'username': username,
	# 'password': password,
	# "tenant_name": tenant_name}
	# try:
	# data = collect_lab_data(lab_url, creds)
	# lab_info = total_lab_info(data)
	# except Exception as exc:
	# logger.warning("Can't collect lab data: {0!s}".format(exc))

	if lab_info is None:
	lab_info = {
	"total_disk": "None",
	"total_memory": "None",
	"nodes_count": "None",
	"processor_count": "None"
	}

	try:
	processed_results = process_disk_info(results)
	res_fields = sorted(processed_results.keys())
	for fields, name, func in report_funcs:
	for field in fields:
	pos = bisect.bisect_left(res_fields, field)

	if pos == len(res_fields):
	break

	if not res_fields[pos].startswith(field):
	break
	else:
	hpath = path.format(name)
	logger.debug("Generatins report " + name + " into " + hpath)
	func(processed_results, hpath, lab_info)
	break
	else:
	logger.warning("No report generator found for this load")

	except Exception as exc:
	import traceback
	traceback.print_exc()
	logger.error("Failed to generate html report:" + str(exc))