a lot of fixes and improvements

commit: 6e2ae79b7ccf3e694b6ed332c5d7eadab3361b57 [log] [tgz]
author: koder aka kdanilov <kdanilov@mirantis.com> Wed Mar 04 18:02:24 2015 -0800
committer: koder aka kdanilov <kdanilov@mirantis.com> Wed Mar 04 18:02:31 2015 -0800
tree: c0be94138c9942a2a06efb44eabb48946a917e94
parent: dae5712aa99e9965ff9d4e3189f59822de3030d5 [diff] [blame]
diff --git a/scripts/data.py b/scripts/data.py
index b0c8f27..7424711 100644
--- a/scripts/data.py
+++ b/scripts/data.py

@@ -2,30 +2,89 @@
 import sys
 import json
 
+from disk_perf_test_tool.utils import kb_to_ssize
+
 splitter_rr = "(?ms)=====+\n"
 
+test_time_rr = r"""
+(?ims)(?P<start_time>[:0-9]{8}) - DEBUG - io-perf-tool - Passing barrier, starting test
+(?P<finish_time>[:0-9]{8}) - DEBUG - io-perf-tool - Done\. Closing connection
+"""
+
+test_time_rr = test_time_rr.strip().replace('\n', '\\s+')
+test_time_rr = test_time_rr.strip().replace(' ', '\\s+')
+test_time_re = re.compile(test_time_rr)
+
+
+def to_sec(val):
+    assert val.count(":") == 2
+    h, m, s = val.split(":")
+    return int(h) * 3600 + int(m) * 60 + int(s)
+
+
+def to_min_sec(val):
+    return "{0:2d}:{1:02d}".format(val / 60, val % 60)
+
+
+def get_test_time(block):
+    time_m = test_time_re.search(block)
+    if time_m is None:
+        raise ValueError("Can't found time")
+
+    start_time = to_sec(time_m.group('start_time'))
+    finish_time = to_sec(time_m.group('finish_time'))
+    test_time = finish_time - start_time
+
+    if test_time < 0:
+        # ..... really need print UTC to logs
+        test_time += 24 * 60 * 60
+    return test_time
+
+
+run_test_params_rr = r"(?ims)Run\s+test\s+with" + \
+                     r"\s+'.*?--iosize\s+(?P<size>[^ ]*)"
+run_test_params_re = re.compile(run_test_params_rr)
+
+
+def get_orig_size(block):
+    orig_size = run_test_params_re.search(block)
+    if orig_size is None:
+        print block
+        raise ValueError("Can't find origin size")
+    return orig_size.group(1)
+
 
 def get_data_from_output(fname):
     results = {}
+    results_meta = {}
     fc = open(fname).read()
+    prev_block = None
 
     for block in re.split(splitter_rr, fc):
         block = block.strip()
 
-        if not block.startswith("[{u'__meta__':"):
-            continue
+        if block.startswith("[{u'__meta__':"):
 
-        for val in eval(block):
-            meta = val['__meta__']
+            for val in eval(block):
+                meta = val['__meta__']
 
-            if meta['sync']:
-                meta['sync'] = 's'
-            elif meta['direct_io']:
-                meta['sync'] = 'd'
-            else:
-                meta['sync'] = 'a'
-            key = "{action} {sync} {blocksize}k {concurence}".format(**meta)
-            results.setdefault(key, []).append(val['bw_mean'])
+                if meta['sync']:
+                    meta['sync'] = 's'
+                elif meta['direct_io']:
+                    meta['sync'] = 'd'
+                else:
+                    meta['sync'] = 'a'
+
+                meta['fsize'] = kb_to_ssize(meta['size'] * meta['concurence'])
+                key = ("{action} {sync} {blocksize}k " +
+                       "{concurence} {fsize}").format(**meta)
+                results.setdefault(key, []).append(val['bw_mean'])
+
+                cmeta = results_meta.setdefault(key, {})
+                cmeta.setdefault('times', []).append(get_test_time(prev_block))
+                cmeta['orig_size'] = get_orig_size(prev_block)
+
+        prev_block = block
 
     processed_res = {}
 
@@ -34,12 +93,14 @@
         med = float(sum(v)) / len(v)
         ran = sum(abs(x - med) for x in v) / len(v)
         processed_res[k] = (int(med), int(ran))
+        t = results_meta[k]['times']
+        results_meta[k]['times'] = int(float(sum(t)) / len(t))
 
-    return meta, processed_res
+    return processed_res, results_meta
 
 
 def ksort(x):
-    op, sync, sz, conc = x.split(" ")
+    op, sync, sz, conc, fsize = x.split(" ")
     return (op, sync, int(sz[:-1]), int(conc))
 
 
@@ -51,25 +112,35 @@
     return json.dumps(row)
 
 
+LINES_PER_HEADER = 20
+
+
 def show_data(*pathes):
-    begin = "|  {:>10}  {:>6}  {:>5} {:>3}"
-    first_file_templ = "  |  {:>6} ~ {:>5} {:>2}% {:>5}"
-    other_file_templ = "  |  {:>6} ~ {:>5} {:>2}% {:>5} ----  {:>6}%"
+    begin = "|  {:>10}  {:>6}  {:>5} {:>3} {:>5} {:>7}"
+    first_file_templ = "  |  {:>6} ~ {:>5} {:>2}% {:>5} {:>6}"
+    other_file_templ = first_file_templ + " ----  {:>6}%"
 
     line_templ = begin + first_file_templ + \
         other_file_templ * (len(pathes) - 1) + "  |"
 
     header_ln = line_templ.replace("<", "^").replace(">", "^")
 
-    params = ["Oper", "Sync", "BSZ", "CC", "BW1", "DEV1", "%", "IOPS1"]
+    params = ["Oper", "Sync", "BSZ", "CC", "DSIZE", "OSIZE",
+              "BW1", "DEV1", "%", "IOPS1", "TIME"]
     for pos in range(1, len(pathes)):
-        params += "BW{0}+DEV{0}+%+IOPS{0}+DIFF %".format(pos).split("+")
+        params += "BW{0}+DEV{0}+%+IOPS{0}+DIFF %+TTIME".format(pos).split("+")
 
     header_ln = header_ln.format(*params)
 
     sep = '-' * len(header_ln)
 
-    results = [get_data_from_output(path)[1] for path in pathes]
+    results = []
+    metas = []
+
+    for path in pathes:
+        result, meta = get_data_from_output(path)
+        results.append(result)
+        metas.append(meta)
 
     print sep
     print header_ln
@@ -81,32 +152,43 @@
     for result in results[1:]:
         common_keys &= set(result.keys())
 
+    lcount = 0
     for k in sorted(common_keys, key=ksort):
-        tp = k.rsplit(" ", 2)[0]
-        op, s, sz, conc = k.split(" ")
+        tp = k.rsplit(" ", 3)[0]
+        op, s, sz, conc, fsize = k.split(" ")
 
         s = {'a': 'async', "s": "sync", "d": "direct"}[s]
 
         if tp != prev_tp and prev_tp is not None:
             print sep
 
+            if lcount > LINES_PER_HEADER:
+                print header_ln
+                print sep
+                lcount = 0
+
         prev_tp = tp
 
-        results0 = results[0]
-        m0, d0 = results0[k]
+        m0, d0 = results[0][k]
         iops0 = m0 / int(sz[:-1])
         perc0 = int(d0 * 100.0 / m0 + 0.5)
 
-        data = [op, s, sz, conc, m0, d0, perc0, iops0]
+        data = [op, s, sz, conc, fsize,
+                metas[0][k]['orig_size'],
+                m0, d0, perc0, iops0,
+                to_min_sec(metas[0][k]['times'])]
 
-        for result in results[1:]:
+        for meta, result in zip(metas[1:], results[1:]):
             m, d = result[k]
             iops = m / int(sz[:-1])
             perc = int(d * 100.0 / m + 0.5)
             avg_diff = int(((m - m0) * 100.) / m + 0.5)
-            data.extend([m, d, perc, iops, avg_diff])
+
+            dtime = to_min_sec(meta[k]['times'])
+            data.extend([m, d, perc, iops, avg_diff, dtime])
 
         print line_templ.format(*data)
+        lcount += 1
 
     print sep
commit	6e2ae79b7ccf3e694b6ed332c5d7eadab3361b57	[log] [tgz]
author	koder aka kdanilov <kdanilov@mirantis.com>	Wed Mar 04 18:02:24 2015 -0800
committer	koder aka kdanilov <kdanilov@mirantis.com>	Wed Mar 04 18:02:31 2015 -0800
tree	c0be94138c9942a2a06efb44eabb48946a917e94
parent	dae5712aa99e9965ff9d4e3189f59822de3030d5 [diff] [blame]