a lot of chenges

commit: 4af1c1da85fb279038a520c8229e1c8584f97e5e [log] [tgz]
author: koder aka kdanilov <kdanilov@mirantis.com> Mon May 18 15:48:58 2015 +0300
committer: koder aka kdanilov <kdanilov@mirantis.com> Mon May 18 15:49:06 2015 +0300
tree: cda5a78025d371abf1236ce7db6261512080efb9
parent: 415447a25f175f2a50cfaaa06a95135f617fc336 [diff] [blame]
diff --git a/wally/report.py b/wally/report.py
index f46352d..04577a9 100644
--- a/wally/report.py
+++ b/wally/report.py

@@ -2,16 +2,19 @@
 import bisect
 import logging
 import collections
+from cStringIO import StringIO
 
 try:
+    import numpy
+    import scipy
     import matplotlib.pyplot as plt
 except ImportError:
     plt = None
 
 import wally
-from wally import charts
 from wally.utils import ssize2b
 from wally.statistic import round_3_digit, data_property
+from wally.suits.io.fio_task_parser import get_test_sync_mode
 
 
 logger = logging.getLogger("wally.report")
@@ -31,35 +34,43 @@
 report_funcs = []
 
 
+class Attrmapper(object):
+    def __init__(self, dct):
+        self.__dct = dct
+
+    def __getattr__(self, name):
+        try:
+            return self.__dct[name]
+        except KeyError:
+            raise AttributeError(name)
+
+
 class PerfInfo(object):
-    def __init__(self, name, intervals, params, testnodes_count):
+    def __init__(self, name, summary, intervals, params, testnodes_count):
         self.name = name
         self.bw = None
         self.iops = None
         self.lat = None
+
+        self.raw_bw = []
+        self.raw_iops = []
+        self.raw_lat = []
+
         self.params = params
         self.intervals = intervals
         self.testnodes_count = testnodes_count
+        self.summary = summary
+        self.p = Attrmapper(self.params.vals)
 
-
-def split_and_add(data, block_size):
-    assert len(data) % block_size == 0
-    res = [0] * block_size
-
-    for idx, val in enumerate(data):
-        res[idx % block_size] += val
-
-    return res
+        self.sync_mode = get_test_sync_mode(self.params)
+        self.concurence = self.params.vals.get('numjobs', 1)
 
 
 def group_by_name(test_data):
     name_map = collections.defaultdict(lambda: [])
 
-    for block in test_data:
-        for data in block['res']:
-            data = data.copy()
-            data['__meta__'] = block['__meta__']
-            name_map[data['name']].append(data)
+    for data in test_data:
+        name_map[(data.config.name, data.summary())].append(data)
 
     return name_map
 
@@ -67,37 +78,27 @@
 def process_disk_info(test_data):
     name_map = group_by_name(test_data)
     data = {}
-    for name, results in name_map.items():
-        testnodes_count_set = set(dt['__meta__']['testnodes_count']
-                                  for dt in results)
+    for (name, summary), results in name_map.items():
+        testnodes_count_set = set(dt.vm_count for dt in results)
 
         assert len(testnodes_count_set) == 1
         testnodes_count, = testnodes_count_set
         assert len(results) % testnodes_count == 0
 
-        block_count = len(results) // testnodes_count
-        intervals = [result['run_interval'] for result in results]
+        intervals = [result.run_interval for result in results]
+        p = results[0].config
+        pinfo = PerfInfo(p.name, result.summary(), intervals,
+                         p, testnodes_count)
 
-        p = results[0]['params'].copy()
-        rt = p.pop('ramp_time', 0)
+        pinfo.raw_bw = [result.results['bw'] for result in results]
+        pinfo.raw_iops = [result.results['iops'] for result in results]
+        pinfo.raw_lat = [result.results['lat'] for result in results]
 
-        for result in results[1:]:
-            tp = result['params'].copy()
-            tp.pop('ramp_time', None)
-            assert tp == p
+        pinfo.bw = data_property(map(sum, zip(*pinfo.raw_bw)))
+        pinfo.iops = data_property(map(sum, zip(*pinfo.raw_iops)))
+        pinfo.lat = data_property(sum(pinfo.raw_lat, []))
 
-        p['ramp_time'] = rt
-        pinfo = PerfInfo(name, intervals, p, testnodes_count)
-
-        bw = [result['results']['bw'] for result in results]
-        iops = [result['results']['iops'] for result in results]
-        lat = [result['results']['lat'] for result in results]
-
-        pinfo.bw = data_property(split_and_add(bw, block_count))
-        pinfo.iops = data_property(split_and_add(iops, block_count))
-        pinfo.lat = data_property(lat)
-
-        data[name] = pinfo
+        data[(p.name, summary)] = pinfo
     return data
 
 
@@ -108,70 +109,138 @@
     return closure
 
 
-def linearity_report(processed_results, path, lab_info):
-    names = {}
-    for tp1 in ('rand', 'seq'):
-        for oper in ('read', 'write'):
-            for sync in ('sync', 'direct', 'async'):
-                sq = (tp1, oper, sync)
-                name = "{0} {1} {2}".format(*sq)
-                names["".join(word[0] for word in sq)] = name
+def get_test_lcheck_params(pinfo):
+    res = [{
+        's': 'sync',
+        'd': 'direct',
+        'a': 'async',
+        'x': 'sync direct'
+    }[pinfo.sync_mode]]
 
-    colors = ['red', 'green', 'blue', 'cyan',
-              'magenta', 'black', 'yellow', 'burlywood']
-    markers = ['*', '^', 'x', 'o', '+', '.']
-    color = 0
-    marker = 0
+    res.append(pinfo.p.rw)
 
-    plot_data = {}
-
-    name_pref = 'linearity_test_rrd'
-
-    for res in processed_results.values():
-        if res.name.startswith(name_pref):
-            iotime = 1000000. / res.iops
-            iotime_max = iotime * (1 + res.dev * 3)
-            bsize = ssize2b(res.raw['blocksize'])
-            plot_data[bsize] = (iotime, iotime_max)
-
-    min_sz = min(plot_data)
-    min_iotime, _ = plot_data.pop(min_sz)
-
-    x = []
-    y = []
-    e = []
-
-    for k, (v, vmax) in sorted(plot_data.items()):
-        y.append(v - min_iotime)
-        x.append(k)
-        e.append(y[-1] - (vmax - min_iotime))
-
-    tp = 'rrd'
-    plt.errorbar(x, y, e, linestyle='None', label=names[tp],
-                 color=colors[color], ecolor="black",
-                 marker=markers[marker])
-    plt.yscale('log')
-    plt.xscale('log')
-    # plt.show()
-
-    # ynew = approximate_line(ax, ay, ax, True)
-    # plt.plot(ax, ynew, color=colors[color])
-    # color += 1
-    # marker += 1
-    # plt.legend(loc=2)
-    # plt.title("Linearity test by %i dots" % (len(vals)))
+    return " ".join(res)
 
 
-if plt:
-    linearity_report = report('linearity', 'linearity_test')(linearity_report)
+def get_emb_data_svg(plt):
+    sio = StringIO()
+    plt.savefig(sio, format='svg')
+    img_start = "<!-- Created with matplotlib (http://matplotlib.org/) -->"
+    return sio.getvalue().split(img_start, 1)[1]
 
 
-def render_all_html(dest, info, lab_description, img_ext, templ_name):
+def get_template(templ_name):
     very_root_dir = os.path.dirname(os.path.dirname(wally.__file__))
     templ_dir = os.path.join(very_root_dir, 'report_templates')
     templ_file = os.path.join(templ_dir, templ_name)
-    templ = open(templ_file, 'r').read()
+    return open(templ_file, 'r').read()
 
+
+@report('linearity', 'linearity_test')
+def linearity_report(processed_results, path, lab_info):
+    labels_and_data = []
+
+    vls = processed_results.values()[0].params.vals.copy()
+    del vls['blocksize']
+
+    for res in processed_results.values():
+        if res.name.startswith('linearity_test'):
+            iotimes = [1000. / val for val in res.iops.raw]
+            labels_and_data.append([res.p.blocksize, res.iops.raw, iotimes])
+            cvls = res.params.vals.copy()
+            del cvls['blocksize']
+            assert cvls == vls
+
+    labels_and_data.sort(key=lambda x: ssize2b(x[0]))
+    _, ax1 = plt.subplots()
+
+    labels, data, iotimes = zip(*labels_and_data)
+    plt.boxplot(iotimes)
+
+    if len(labels_and_data) > 2 and ssize2b(labels_and_data[-2][0]) >= 4096:
+        xt = range(1, len(labels) + 1)
+
+        def io_time(sz, bw, initial_lat):
+            return sz / bw + initial_lat
+
+        x = numpy.array(map(ssize2b, labels))
+        y = numpy.array([sum(dt) / len(dt) for dt in iotimes])
+        popt, _ = scipy.optimize.curve_fit(io_time, x, y, p0=(100., 1.))
+
+        y1 = io_time(x, *popt)
+        plt.plot(xt, y1, linestyle='--', label='LS linear approxomation')
+
+        for idx, (sz, _, _) in enumerate(labels_and_data):
+            if ssize2b(sz) >= 4096:
+                break
+
+        bw = (x[-1] - x[idx]) / (y[-1] - y[idx])
+        lat = y[-1] - x[-1] / bw
+        y2 = io_time(x, bw, lat)
+
+        plt.plot(xt, y2, linestyle='--',
+                 label='(4k & max) linear approxomation')
+
+    plt.setp(ax1, xticklabels=labels)
+
+    plt.xlabel("Block size")
+    plt.ylabel("IO time, ms")
+
+    plt.legend(loc=0)
+    plt.grid()
+    iotime_plot = get_emb_data_svg(plt)
+
+    _, ax1 = plt.subplots()
+    plt.boxplot(data)
+    plt.setp(ax1, xticklabels=labels)
+
+    plt.xlabel("Block size")
+    plt.ylabel("IOPS")
+    plt.grid()
+
+    iops_plot = get_emb_data_svg(plt)
+
+    res1 = processed_results.values()[0]
+    descr = {
+        'vm_count': res1.testnodes_count,
+        'concurence': res1.concurence,
+        'oper_descr': get_test_lcheck_params(res1).capitalize()
+    }
+
+    params_map = {'iotime_vs_size': iotime_plot,
+                  'iops_vs_size': iops_plot,
+                  'descr': descr}
+
+    with open(path, 'w') as fd:
+        fd.write(get_template('report_linearity.html').format(**params_map))
+
+
+@report('lat_vs_iops', 'lat_vs_iops')
+def lat_vs_iops(processed_results, path, lab_info):
+    lat_iops = collections.defaultdict(lambda: [])
+    for res in processed_results.values():
+        if res.name.startswith('lat_vs_iops'):
+            lat_iops[res.concurence].append((res.lat.average / 1000.0,
+                                             res.lat.deviation / 1000.0,
+                                             res.iops.average,
+                                             res.iops.deviation))
+
+    colors = ['red', 'green', 'blue', 'orange', 'magenta', "teal"][::-1]
+    for conc, lat_iops in sorted(lat_iops.items()):
+        lat, dev, iops, iops_dev = zip(*lat_iops)
+        plt.errorbar(iops, lat, xerr=iops_dev, yerr=dev, fmt='ro',
+                     label=str(conc) + " threads",
+                     color=colors.pop())
+
+    plt.xlabel("IOPS")
+    plt.ylabel("Latency, ms")
+    plt.grid()
+    plt.legend(loc=0)
+    plt.show()
+    exit(1)
+
+
+def render_all_html(dest, info, lab_description, images, templ_name):
     data = info.__dict__.copy()
     for name, val in data.items():
         if not name.startswith('__'):
@@ -185,62 +254,25 @@
     data['bw_write_max'] = (data['bw_write_max'][0] // 1024,
                             data['bw_write_max'][1])
 
-    report = templ.format(lab_info=lab_description, img_ext=img_ext,
-                          **data)
-    open(dest, 'w').write(report)
+    images.update(data)
+    report = get_template(templ_name).format(lab_info=lab_description,
+                                             **images)
 
-
-def render_hdd_html(dest, info, lab_description, img_ext):
-    render_all_html(dest, info, lab_description, img_ext,
-                    "report_hdd.html")
-
-
-def render_ceph_html(dest, info, lab_description, img_ext):
-    render_all_html(dest, info, lab_description, img_ext,
-                    "report_ceph.html")
+    with open(dest, 'w') as fd:
+        fd.write(report)
 
 
 def io_chart(title, concurence,
              latv, latv_min, latv_max,
-             iops_or_bw, iops_or_bw_dev,
-             legend, fname):
-    bar_data = iops_or_bw
-    bar_dev = iops_or_bw_dev
-    legend = [legend]
-
-    iops_or_bw_per_vm = []
-    for iops, conc in zip(iops_or_bw, concurence):
-        iops_or_bw_per_vm.append(iops / conc)
-
-    bar_dev_bottom = []
-    bar_dev_top = []
-    for val, err in zip(bar_data, bar_dev):
-        bar_dev_top.append(val + err)
-        bar_dev_bottom.append(val - err)
-
-    charts.render_vertical_bar(title, legend, [bar_data], [bar_dev_top],
-                               [bar_dev_bottom], file_name=fname,
-                               scale_x=concurence, label_x="clients",
-                               label_y=legend[0],
-                               lines=[
-                                    (latv, "msec", "rr", "lat"),
-                                    # (latv_min, None, None, "lat_min"),
-                                    # (latv_max, None, None, "lat_max"),
-                                    (iops_or_bw_per_vm, None, None,
-                                     legend[0] + " per client")
-                                ])
-
-
-def io_chart_mpl(title, concurence,
-                 latv, latv_min, latv_max,
-                 iops_or_bw, iops_or_bw_err,
-                 legend, fname, log=False):
+             iops_or_bw, iops_or_bw_err,
+             legend, log=False,
+             boxplots=False):
     points = " MiBps" if legend == 'BW' else ""
     lc = len(concurence)
     width = 0.35
     xt = range(1, lc + 1)
 
-    op_per_vm = [v / c for v, c in zip(iops_or_bw, concurence)]
+    op_per_vm = [v / (vm * th) for v, (vm, th) in zip(iops_or_bw, concurence)]
     fig, p1 = plt.subplots()
     xpos = [i - width / 2 for i in xt]
 
@@ -252,7 +284,7 @@
            label=legend)
 
     p1.grid(True)
-    p1.plot(xt, op_per_vm, '--', label=legend + "/vm", color='black')
+    p1.plot(xt, op_per_vm, '--', label=legend + "/thread", color='black')
     handles1, labels1 = p1.get_legend_handles_labels()
 
     p2 = p1.twinx()
@@ -261,8 +293,8 @@
     p2.plot(xt, latv_min, label="lat min")
 
     plt.xlim(0.5, lc + 0.5)
-    plt.xticks(xt, map(str, concurence))
-    p1.set_xlabel("Threads")
+    plt.xticks(xt, ["{0} * {1}".format(vm, th) for (vm, th) in concurence])
+    p1.set_xlabel("VM Count * Thread per VM")
     p1.set_ylabel(legend + points)
     p2.set_ylabel("Latency ms")
     plt.title(title)
@@ -270,39 +302,17 @@
 
     plt.legend(handles1 + handles2, labels1 + labels2,
                loc='center left', bbox_to_anchor=(1.1, 0.81))
-    # fontsize='small')
 
     if log:
         p1.set_yscale('log')
         p2.set_yscale('log')
-    plt.subplots_adjust(right=0.7)
-    # plt.show()  # bbox_extra_artists=(leg,), bbox_inches='tight')
-    # exit(1)
-    plt.savefig(fname, format=fname.split('.')[-1])
+    plt.subplots_adjust(right=0.68)
+
+    return get_emb_data_svg(plt)
 
 
-def make_hdd_plots(processed_results, charts_dir):
-    plots = [
-        ('hdd_test_rrd4k', 'rand_read_4k', 'Random read 4k direct IOPS'),
-        ('hdd_test_rws4k', 'rand_write_4k', 'Random write 4k sync IOPS')
-    ]
-    return make_plots(processed_results, charts_dir, plots)
-
-
-def make_ceph_plots(processed_results, charts_dir):
-    plots = [
-        ('ceph_test_rrd4k', 'rand_read_4k', 'Random read 4k direct IOPS'),
-        ('ceph_test_rws4k', 'rand_write_4k', 'Random write 4k sync IOPS'),
-        ('ceph_test_rrd16m', 'rand_read_16m',
-         'Random read 16m direct MiBps'),
-        ('ceph_test_rwd16m', 'rand_write_16m',
-            'Random write 16m direct MiBps'),
-    ]
-    return make_plots(processed_results, charts_dir, plots)
-
-
-def make_plots(processed_results, charts_dir, plots):
-    file_ext = None
+def make_plots(processed_results, plots):
+    files = {}
     for name_pref, fname, desc in plots:
         chart_data = []
 
@@ -313,9 +323,9 @@
         if len(chart_data) == 0:
             raise ValueError("Can't found any date for " + name_pref)
 
-        use_bw = ssize2b(chart_data[0].params['blocksize']) > 16 * 1024
+        use_bw = ssize2b(chart_data[0].p.blocksize) > 16 * 1024
 
-        chart_data.sort(key=lambda x: x.params['concurence'])
+        chart_data.sort(key=lambda x: x.concurence)
 
         #  if x.lat.average < max_lat]
         lat = [x.lat.average / 1000 for x in chart_data]
@@ -323,7 +333,7 @@
         lat_max = [x.lat.max / 1000 for x in chart_data]
 
         testnodes_count = x.testnodes_count
-        concurence = [x.params['concurence'] * testnodes_count
+        concurence = [(testnodes_count, x.concurence)
                       for x in chart_data]
 
         if use_bw:
@@ -335,25 +345,24 @@
             data_dev = [x.iops.confidence for x in chart_data]
             name = "IOPS"
 
-        fname = os.path.join(charts_dir, fname)
-        if plt is not None:
-            io_chart_mpl(desc, concurence, lat, lat_min, lat_max,
-                         data, data_dev, name, fname + '.svg')
-            file_ext = 'svg'
-        else:
-            io_chart(desc, concurence, lat, lat_min, lat_max,
-                     data, data_dev, name, fname + '.png')
-            file_ext = 'png'
-    return file_ext
+        fc = io_chart(title=desc,
+                      concurence=concurence,
+                      latv=lat, latv_min=lat_min, latv_max=lat_max,
+                      iops_or_bw=data,
+                      iops_or_bw_err=data_dev,
+                      legend=name)
+        files[fname] = fc
+
+    return files
 
 
 def find_max_where(processed_results, sync_mode, blocksize, rw, iops=True):
     result = None
     attr = 'iops' if iops else 'bw'
     for measurement in processed_results.values():
-        ok = measurement.params['sync_mode'] == sync_mode
-        ok = ok and (measurement.params['blocksize'] == blocksize)
-        ok = ok and (measurement.params['rw'] == rw)
+        ok = measurement.sync_mode == sync_mode
+        ok = ok and (measurement.p.blocksize == blocksize)
+        ok = ok and (measurement.p.rw == rw)
 
         if ok:
             field = getattr(measurement, attr)
@@ -388,12 +397,12 @@
                                         'd', '1m', 'read', False)
 
     for res in processed_results.values():
-        if res.params['sync_mode'] == 's' and res.params['blocksize'] == '4k':
-            if res.params['rw'] != 'randwrite':
+        if res.sync_mode == 's' and res.p.blocksize == '4k':
+            if res.p.rw != 'randwrite':
                 continue
             rws4k_iops_lat_th.append((res.iops.average,
                                       res.lat.average,
-                                      res.params['concurence']))
+                                      res.concurence))
 
     rws4k_iops_lat_th.sort(key=lambda (_1, _2, conc): conc)
 
@@ -438,21 +447,33 @@
     return hdi
 
 
-@report('HDD', 'hdd_test_rrd4k,hdd_test_rws4k')
-def make_hdd_report(processed_results, path, charts_path, lab_info):
-    img_ext = make_hdd_plots(processed_results, charts_path)
+@report('HDD', 'hdd_test')
+def make_hdd_report(processed_results, path, lab_info):
+    plots = [
+        ('hdd_test_rrd4k', 'rand_read_4k', 'Random read 4k direct IOPS'),
+        ('hdd_test_rws4k', 'rand_write_4k', 'Random write 4k sync IOPS')
+    ]
+    images = make_plots(processed_results, plots)
     di = get_disk_info(processed_results)
-    render_hdd_html(path, di, lab_info, img_ext)
+    render_all_html(path, di, lab_info, images, "report_hdd.html")
 
 
 @report('Ceph', 'ceph_test')
-def make_ceph_report(processed_results, path, charts_path, lab_info):
-    img_ext = make_ceph_plots(processed_results, charts_path)
+def make_ceph_report(processed_results, path, lab_info):
+    plots = [
+        ('ceph_test_rrd4k', 'rand_read_4k', 'Random read 4k direct IOPS'),
+        ('ceph_test_rws4k', 'rand_write_4k', 'Random write 4k sync IOPS'),
+        ('ceph_test_rrd16m', 'rand_read_16m', 'Random read 16m direct MiBps'),
+        ('ceph_test_rwd16m', 'rand_write_16m',
+         'Random write 16m direct MiBps'),
+    ]
+
+    images = make_plots(processed_results, plots)
     di = get_disk_info(processed_results)
-    render_ceph_html(path, di, lab_info, img_ext)
+    render_all_html(path, di, lab_info, images, "report_ceph.html")
 
 
-def make_io_report(dinfo, results, path, charts_path, lab_info=None):
+def make_io_report(dinfo, results, path, lab_info=None):
     lab_info = {
         "total_disk": "None",
         "total_memory": "None",
@@ -461,7 +482,8 @@
     }
 
     try:
-        res_fields = sorted(dinfo.keys())
+        res_fields = sorted(v.name for v in dinfo.values())
+
         for fields, name, func in report_funcs:
             for field in fields:
                 pos = bisect.bisect_left(res_fields, field)
@@ -474,7 +496,7 @@
             else:
                 hpath = path.format(name)
                 logger.debug("Generatins report " + name + " into " + hpath)
-                func(dinfo, hpath, charts_path, lab_info)
+                func(dinfo, hpath, lab_info)
                 break
         else:
             logger.warning("No report generator found for this load")
commit	4af1c1da85fb279038a520c8229e1c8584f97e5e	[log] [tgz]
author	koder aka kdanilov <kdanilov@mirantis.com>	Mon May 18 15:48:58 2015 +0300
committer	koder aka kdanilov <kdanilov@mirantis.com>	Mon May 18 15:49:06 2015 +0300
tree	cda5a78025d371abf1236ce7db6261512080efb9
parent	415447a25f175f2a50cfaaa06a95135f617fc336 [diff] [blame]