a lot of changes
diff --git a/wally/suits/io/__init__.py b/wally/suits/io/__init__.py
index 978fa46..e548395 100644
--- a/wally/suits/io/__init__.py
+++ b/wally/suits/io/__init__.py
@@ -33,7 +33,8 @@
             'raw_result': self.raw_result,
             'run_interval': self.run_interval,
             'vm_count': self.vm_count,
-            'test_name': self.test_name
+            'test_name': self.test_name,
+            'files': self.files
         }
 
     @classmethod
@@ -44,7 +45,8 @@
 
         return cls(sec, data['params'], data['results'],
                    data['raw_result'], data['run_interval'],
-                   data['vm_count'], data['test_name'])
+                   data['vm_count'], data['test_name'],
+                   files=data.get('files', {}))
 
 
 def get_slice_parts_offset(test_slice, real_inteval):
@@ -121,8 +123,8 @@
                 # take largest size
                 files[fname] = max(files.get(fname, 0), msz)
 
-        cmd_templ = "dd oflag=direct " + \
-                    "if=/dev/zero of={0} bs={1} count={2}"
+        cmd_templ = "fio --name=xxx --filename={0} --direct=1" + \
+                    " --bs=4m --size={1}m --rw=write"
 
         if self.use_sudo:
             cmd_templ = "sudo " + cmd_templ
@@ -131,10 +133,16 @@
         stime = time.time()
 
         for fname, curr_sz in files.items():
-            cmd = cmd_templ.format(fname, 1024 ** 2, curr_sz)
+            cmd = cmd_templ.format(fname, curr_sz)
             ssize += curr_sz
             self.run_over_ssh(cmd, timeout=curr_sz)
 
+        # if self.use_sudo:
+        #     self.run_over_ssh("sudo echo 3 > /proc/sys/vm/drop_caches",
+        #                       timeout=5)
+        # else:
+        #     logging.warning("Can't flush caches as sudo us disabled")
+
         ddtime = time.time() - stime
         if ddtime > 1E-3:
             fill_bw = int(ssize / ddtime)
@@ -225,10 +233,24 @@
                     logger.info("Will run tests: " + ", ".join(msgs))
 
                 nolog = (pos != 0) or not self.is_primary
-                out_err, interval = self.do_run(barrier, fio_cfg_slice, pos,
-                                                nolog=nolog)
+
+                max_retr = 3 if self.total_nodes_count == 1 else 1
+
+                for idx in range(max_retr):
+                    try:
+                        out_err, interval, files = self.do_run(barrier, fio_cfg_slice, pos,
+                                                               nolog=nolog)
+                        break
+                    except Exception as exc:
+                        logger.exception("During fio run")
+                        if idx == max_retr - 1:
+                            raise StopTestError("Fio failed", exc)
+                    logger.info("Sleeping 30s and retrying")
+                    time.sleep(30)
 
                 try:
+                    # HACK
+                    out_err = "{" + out_err.split("{", 1)[1]
                     full_raw_res = json.loads(out_err)
 
                     res = {"bw": [], "iops": [], "lat": [],
@@ -246,27 +268,32 @@
                     first = fio_cfg_slice[0]
                     p1 = first.vals.copy()
                     p1.pop('ramp_time', 0)
+                    p1.pop('offset', 0)
 
                     for nxt in fio_cfg_slice[1:]:
                         assert nxt.name == first.name
                         p2 = nxt.vals
                         p2.pop('_ramp_time', 0)
-
+                        p2.pop('offset', 0)
                         assert p1 == p2
 
+                    tname = os.path.basename(self.config_fname)
+                    if tname.endswith('.cfg'):
+                        tname = tname[:-4]
+
                     tres = IOTestResults(first,
                                          self.config_params, res,
                                          full_raw_res, interval,
-                                         vm_count=self.total_nodes_count)
-                    tres.test_name = os.path.basename(self.config_fname)
-                    if tres.test_name.endswith('.cfg'):
-                        tres.test_name = tres.test_name[:-4]
+                                         test_name=tname,
+                                         vm_count=self.total_nodes_count,
+                                         files=files)
                     self.on_result_cb(tres)
-                except (OSError, StopTestError):
+                except StopTestError:
                     raise
                 except Exception as exc:
-                    msg_templ = "Error during postprocessing results: {0!s}"
-                    raise RuntimeError(msg_templ.format(exc))
+                    msg_templ = "Error during postprocessing results"
+                    logger.exception(msg_templ)
+                    raise StopTestError(msg_templ.format(exc), exc)
 
         finally:
             barrier.exit()
@@ -379,20 +406,22 @@
             with open(os.path.join(self.log_directory, fname), "w") as fd:
                 fd.write(result)
 
+            files = {}
+
             for fname in log_files:
                 try:
                     fc = read_from_remote(sftp, fname)
                 except:
                     continue
                 sftp.remove(fname)
-
-                loc_fname = "{0}_{1}_{2}".format(pos, fconn_id,
-                                                 fname.split('_')[-1])
+                ftype = fname.split('_')[-1].split(".")[0]
+                loc_fname = "{0}_{1}_{2}.log".format(pos, fconn_id, ftype)
+                files.setdefault(ftype, []).append(loc_fname)
                 loc_path = os.path.join(self.log_directory, loc_fname)
                 with open(loc_path, "w") as fd:
                     fd.write(fc)
 
-        return result, (begin, end)
+        return result, (begin, end), files
 
     @classmethod
     def merge_results(cls, results):
diff --git a/wally/suits/io/ceph.cfg b/wally/suits/io/ceph.cfg
index 26aa65f..4a651e6 100644
--- a/wally/suits/io/ceph.cfg
+++ b/wally/suits/io/ceph.cfg
@@ -3,17 +3,14 @@
 
 NUMJOBS={% 1, 5, 10, 15, 40 %}
 NUMJOBS_SHORT={% 1, 2, 3, 10 %}
-TEST_FILE_SIZE=100G
 
-size={TEST_FILE_SIZE}
 ramp_time=15
 runtime=60
-NUM_ROUNDS=7
 
 # ---------------------------------------------------------------------
 # check different thread count, sync mode. (latency, iops) = func(th_count)
 # ---------------------------------------------------------------------
-[ceph_test_{TEST_SUMM}]
+[ceph_{TEST_SUMM}]
 blocksize=4k
 rw=randwrite
 sync=1
@@ -22,7 +19,7 @@
 # ---------------------------------------------------------------------
 # direct write
 # ---------------------------------------------------------------------
-[ceph_test_{TEST_SUMM}]
+[ceph_{TEST_SUMM}]
 blocksize=4k
 rw=randwrite
 direct=1
@@ -32,7 +29,7 @@
 # check different thread count, direct read mode. (latency, iops) = func(th_count)
 # also check iops for randread
 # ---------------------------------------------------------------------
-[ceph_test_{TEST_SUMM}]
+[ceph_{TEST_SUMM}]
 blocksize=4k
 rw=randread
 direct=1
@@ -42,7 +39,7 @@
 # this is essentially sequential write/read operations
 # we can't use sequential with numjobs > 1 due to caching and block merging
 # ---------------------------------------------------------------------
-[ceph_test_{TEST_SUMM}]
+[ceph_{TEST_SUMM}]
 blocksize=16m
 rw={% randread, randwrite %}
 direct=1
diff --git a/wally/suits/io/check_distribution.cfg b/wally/suits/io/check_distribution.cfg
index 9475cde..7c06813 100644
--- a/wally/suits/io/check_distribution.cfg
+++ b/wally/suits/io/check_distribution.cfg
@@ -1,13 +1,10 @@
 [global]
 include defaults.cfg
-NUM_ROUNDS=301
 
 [distrubution_test_{TEST_SUMM}]
 blocksize=4k
 rw=randwrite
 direct=1
-
+sync=1
 ramp_time=5
 runtime=30
-
-size=200G
diff --git a/wally/suits/io/check_linearity.cfg b/wally/suits/io/check_linearity.cfg
index 6af5fcc..42618d4 100644
--- a/wally/suits/io/check_linearity.cfg
+++ b/wally/suits/io/check_linearity.cfg
@@ -1,9 +1,7 @@
 [global]
-
 include defaults.cfg
-NUM_ROUNDS=7
 
-size={TEST_FILE_SIZE}
+direct=1
 ramp_time=5
 runtime=30
 BLOCK_SIZES={% 512,1k,2k,4k,8k,16k,32k,128k,256k,512k,1m %}
@@ -11,10 +9,9 @@
 # ---------------------------------------------------------------------
 # check read and write linearity. oper_time = func(size)
 # ---------------------------------------------------------------------
-# [linearity_test_{TEST_SUMM}]
-# blocksize={BLOCK_SIZES}
-# rw={% randwrite, randread %}
-# direct=1
+[linearity_test_{TEST_SUMM}]
+blocksize={BLOCK_SIZES}
+rw=randread
 
 # ---------------------------------------------------------------------
 # check sync write linearity. oper_time = func(size)
diff --git a/wally/suits/io/check_th_count.cfg b/wally/suits/io/check_th_count.cfg
index 1607634..745f189 100644
--- a/wally/suits/io/check_th_count.cfg
+++ b/wally/suits/io/check_th_count.cfg
@@ -1,18 +1,10 @@
-[defaults]
+[global]
+include defaults.cfg
 
-# this is critical for correct results in multy-node run
-randrepeat=0
-
-NUM_ROUNDS=7
 ramp_time=5
-buffered=0
-wait_for_previous
-filename={FILENAME}
-iodepth=1
-size=10G
-time_based
 runtime=30
-group_reporting
+direct=1
+
 numjobs={% 1, 2, 5, 10, 15, 20, 25, 30, 35, 40 %}
 
 # ---------------------------------------------------------------------
@@ -31,20 +23,15 @@
 # 1m + read      + direct
 #
 # ---------------------------------------------------------------------
-[concurrence_test_{TEST_SUMM} * {NUM_ROUNDS}]
-blocksize=4k
-rw={% randread %}
-direct=1
-sync=0
-
-[concurrence_test_{TEST_SUMM} * {NUM_ROUNDS}]
+[concurrence_{TEST_SUMM}]
 blocksize=4k
 rw=randwrite
-direct=0
+
+[concurrence_{TEST_SUMM}]
+blocksize=4k
+rw={% randread, randwrite %}
 sync=1
 
-[concurrence_test_{TEST_SUMM} * {NUM_ROUNDS}]
+[concurrence_{TEST_SUMM}]
 blocksize=1m
 rw={% write, read %}
-direct=1
-sync=0
diff --git a/wally/suits/io/cinder_iscsi.cfg b/wally/suits/io/cinder_iscsi.cfg
new file mode 100644
index 0000000..4d19dd9
--- /dev/null
+++ b/wally/suits/io/cinder_iscsi.cfg
@@ -0,0 +1,56 @@
+[global]
+include defaults.cfg
+
+# NUMJOBS={% 1, 5, 10, 15, 20, 30, 40, 80 %}
+
+NUMJOBS={% 1, 3, 5, 10, 20, 40 %}
+
+direct=1
+ramp_time=5
+runtime=30
+
+# ---------------------------------------------------------------------
+# check different thread count, sync mode. (latency, iops) = func(th_count)
+# ---------------------------------------------------------------------
+[cinder_iscsi_{TEST_SUMM}]
+blocksize=4k
+rw=randwrite
+sync=1
+numjobs={NUMJOBS}
+
+# ---------------------------------------------------------------------
+# check different thread count, direct read mode. (latency, iops) = func(th_count)
+# also check iops for randread
+# ---------------------------------------------------------------------
+[cinder_iscsi_{TEST_SUMM}]
+blocksize=4k
+rw=randread
+numjobs={NUMJOBS}
+
+# ---------------------------------------------------------------------
+# check IOPS randwrite.
+# ---------------------------------------------------------------------
+[cinder_iscsi_{TEST_SUMM}]
+blocksize=64k
+rw=randwrite
+ramp_time=180
+runtime=120
+
+# ---------------------------------------------------------------------
+# No reason for th count > 1 in case of sequantial operations
+# ot they became random
+# ---------------------------------------------------------------------
+[cinder_iscsi_{TEST_SUMM}]
+blocksize=1m
+rw={% read,write %}
+offset={UNIQ_OFFSET}
+ramp_time=90
+runtime=30
+
+# [cinder_iscsi_{TEST_SUMM}]
+# blocksize=64m
+# rw={% randread,randwrite %}
+# direct=1
+# ramp_time=30
+# runtime=30
+#
diff --git a/wally/suits/io/defaults.cfg b/wally/suits/io/defaults.cfg
index 51a8145..9aff22c 100644
--- a/wally/suits/io/defaults.cfg
+++ b/wally/suits/io/defaults.cfg
@@ -1,7 +1,9 @@
 buffered=0
 group_reporting=1
 iodepth=1
-softrandommap=1
+
+norandommap=1
+
 thread=1
 time_based=1
 wait_for_previous=1
@@ -11,4 +13,11 @@
 
 filename={FILENAME}
 
+size={TEST_FILE_SIZE}
+
+write_lat_log=fio_log
+write_iops_log=fio_log
+write_bw_log=fio_log
+log_avg_msec=500
+
 
diff --git a/wally/suits/io/fio_task_parser.py b/wally/suits/io/fio_task_parser.py
index 52c4bb3..aca0254 100644
--- a/wally/suits/io/fio_task_parser.py
+++ b/wally/suits/io/fio_task_parser.py
@@ -7,7 +7,7 @@
 from collections import OrderedDict, namedtuple
 
 
-from wally.utils import sec_to_str
+from wally.utils import sec_to_str, ssize2b
 
 
 SECTION = 0
@@ -50,20 +50,6 @@
         return res
 
 
-def to_bytes(sz):
-    sz = sz.lower()
-    try:
-        return int(sz)
-    except ValueError:
-        if sz[-1] == 'm':
-            return (1024 ** 2) * int(sz[:-1])
-        if sz[-1] == 'k':
-            return 1024 * int(sz[:-1])
-        if sz[-1] == 'g':
-            return (1024 ** 3) * int(sz[:-1])
-        raise
-
-
 class ParseError(ValueError):
     def __init__(self, msg, fname, lineno, line_cont=""):
         ValueError.__init__(self, msg)
@@ -265,11 +251,30 @@
             elif val.name in processed_vals:
                 val = processed_vals[val.name]
         processed_vals[name] = val
+
     sec = sec.copy()
     sec.vals = processed_vals
     return sec
 
 
+MAGIC_OFFSET = 0.1885
+
+
+def abbv_name_to_full(name):
+    assert len(name) == 3
+
+    smode = {
+        'a': 'async',
+        's': 'sync',
+        'd': 'direct',
+        'x': 'sync direct'
+    }
+    off_mode = {'s': 'sequential', 'r': 'random'}
+    oper = {'r': 'read', 'w': 'write'}
+    return smode[name[2]] + " " + \
+        off_mode[name[0]] + " " + oper[name[1]]
+
+
 def finall_process(sec, counter=[0]):
     sec = sec.copy()
 
@@ -279,6 +284,16 @@
 
     sec.vals['unified_rw_reporting'] = '1'
 
+    sz = ssize2b(sec.vals['size'])
+    offset = sz * ((MAGIC_OFFSET * counter[0]) % 1.0)
+    offset = int(offset) // 1024 ** 2
+    new_vars = {'UNIQ_OFFSET': str(offset) + "m"}
+
+    for name, val in sec.vals.items():
+        if isinstance(val, Var):
+            if val.name in new_vars:
+                sec.vals[name] = new_vars[val.name]
+
     params = sec.vals.copy()
     params['UNIQ'] = 'UN{0}'.format(counter[0])
     params['COUNTER'] = str(counter[0])
diff --git a/wally/suits/io/formatter.py b/wally/suits/io/formatter.py
index 84b0a13..59691b2 100644
--- a/wally/suits/io/formatter.py
+++ b/wally/suits/io/formatter.py
@@ -2,22 +2,27 @@
 
 from wally.utils import ssize2b
 from wally.statistic import round_3_digit
-from .fio_task_parser import get_test_summary, get_test_sync_mode
+from .fio_task_parser import get_test_sync_mode
+
+
+def getconc(data):
+    th_count = data.params.vals.get('numjobs')
+
+    if th_count is None:
+        th_count = data.params.vals.get('concurence', 1)
+    return th_count
 
 
 def key_func(data):
     p = data.params.vals
 
-    th_count = data.params.vals.get('numjobs')
+    th_count = getconc(data)
 
-    if th_count is None:
-        th_count = data.params.vals.get('concurence', 1)
-
-    return (p['rw'],
+    return (data.name.rsplit("_", 1)[0],
+            p['rw'],
             get_test_sync_mode(data.params),
             ssize2b(p['blocksize']),
-            int(th_count) * data.testnodes_count,
-            data.name)
+            int(th_count) * data.testnodes_count)
 
 
 def format_results_for_console(dinfo):
@@ -36,8 +41,7 @@
               "Cnf\n95%", "Dev%", "iops\nper vm", "KiBps\nper vm", "lat\nms"]
 
     for data in items:
-
-        curr_k = key_func(data)[:3]
+        curr_k = key_func(data)[:4]
 
         if prev_k is not None:
             if prev_k != curr_k:
diff --git a/wally/suits/io/hdd.cfg b/wally/suits/io/hdd.cfg
index 0eb85a6..6d3107a 100644
--- a/wally/suits/io/hdd.cfg
+++ b/wally/suits/io/hdd.cfg
@@ -5,18 +5,14 @@
 
 NUMJOBS={% 1, 3, 5, 10, 20, 40 %}
 
-write_lat_log=fio_log
-write_iops_log=fio_log
-log_avg_msec=500
-
-size={TEST_FILE_SIZE}
 ramp_time=5
 runtime=30
+direct=1
 
 # ---------------------------------------------------------------------
 # check different thread count, sync mode. (latency, iops) = func(th_count)
 # ---------------------------------------------------------------------
-[hdd_test_{TEST_SUMM}]
+[hdd_{TEST_SUMM}]
 blocksize=4k
 rw=randwrite
 sync=1
@@ -26,25 +22,22 @@
 # check different thread count, direct read mode. (latency, iops) = func(th_count)
 # also check iops for randread
 # ---------------------------------------------------------------------
-[hdd_test_{TEST_SUMM}]
+[hdd_{TEST_SUMM}]
 blocksize=4k
 rw=randread
-direct=1
 numjobs={NUMJOBS}
 
 # ---------------------------------------------------------------------
 # check IOPS randwrite.
 # ---------------------------------------------------------------------
-[hdd_test_{TEST_SUMM}]
+[hdd_{TEST_SUMM}]
 blocksize=4k
 rw=randwrite
-direct=1
 
 # ---------------------------------------------------------------------
 # No reason for th count > 1 in case of sequantial operations
-# They became random
+# ot they became random
 # ---------------------------------------------------------------------
-[hdd_test_{TEST_SUMM}]
+[hdd_{TEST_SUMM}]
 blocksize=1m
 rw={% read, write %}
-direct=1
diff --git a/wally/suits/io/lat_vs_iops.cfg b/wally/suits/io/lat_vs_iops.cfg
index dbafcbb..16e73e2 100644
--- a/wally/suits/io/lat_vs_iops.cfg
+++ b/wally/suits/io/lat_vs_iops.cfg
@@ -1,15 +1,13 @@
 [global]
 include defaults.cfg
 
-TEST_FILE_SIZE=100G
-size={TEST_FILE_SIZE}
-
 ramp_time=5
 runtime=30
 
 blocksize=4k
 rw=randwrite
 sync=1
+direct=1
 
 # ---------------------------------------------------------------------
 # latency as function from IOPS
diff --git a/wally/suits/io/long_test.cfg b/wally/suits/io/long_test.cfg
index fd420d8..a304b8b 100644
--- a/wally/suits/io/long_test.cfg
+++ b/wally/suits/io/long_test.cfg
@@ -1,31 +1,22 @@
-[defaults]
-
-# this is critical for correct results in multy-node run
-randrepeat=0
+[global]
+include defaults.cfg
 
 # 24h test
 NUM_ROUNDS1=270
 NUM_ROUNDS2=261
 
-buffered=0
-wait_for_previous
-filename={FILENAME}
-iodepth=1
-size=50G
-time_based
-runtime=300
+direct=1
+blocksize=128k
+rw=randwrite
 
 # ---------------------------------------------------------------------
 # check read and write linearity. oper_time = func(size)
 # ---------------------------------------------------------------------
-[24h_test * {NUM_ROUNDS1}]
-blocksize=128k
-rw=randwrite
-direct=1
+[24h_test]
 runtime=30
+NUM_ROUND={NUM_ROUNDS1}
 
-[24h_test * {NUM_ROUNDS2}]
-blocksize=128k
-rw=randwrite
-direct=1
+[24h_test]
+runtime=300
+NUM_ROUND={NUM_ROUNDS2}
 
diff --git a/wally/suits/io/rrd.cfg b/wally/suits/io/rrd.cfg
index 8eaffea..78e1f0e 100644
--- a/wally/suits/io/rrd.cfg
+++ b/wally/suits/io/rrd.cfg
@@ -5,10 +5,6 @@
 ramp_time=5
 runtime=40
 
-write_lat_log=fio_log
-write_iops_log=fio_log
-log_avg_msec=500
-
 # ---------------------------------------------------------------------
 [rws_{TEST_SUMM}]
 blocksize=4k
diff --git a/wally/suits/io/verify.cfg b/wally/suits/io/verify.cfg
index b3162eb..92c78e5 100644
--- a/wally/suits/io/verify.cfg
+++ b/wally/suits/io/verify.cfg
@@ -5,10 +5,6 @@
 ramp_time=5
 runtime=10
 
-write_lat_log=fio_log
-write_iops_log=fio_log
-log_avg_msec=500
-
 # ---------------------------------------------------------------------
 [verify_{TEST_SUMM}]
 blocksize=4k
diff --git a/wally/suits/io/vm_count_ec2.cfg b/wally/suits/io/vm_count_ec2.cfg
index c6fc56c..3efcf00 100644
--- a/wally/suits/io/vm_count_ec2.cfg
+++ b/wally/suits/io/vm_count_ec2.cfg
@@ -1,37 +1,26 @@
-[defaults]
-buffered=0
-wait_for_previous=1
-filename={FILENAME}
-iodepth=1
-size=10G
+[global]
+include defaults.cfg
 
-# this is critical for correct results in multy-node run
-randrepeat=0
-
-time_based=1
 ramp_time=5
 runtime=30
 
-group_reporting=1
 BW_LIMIT=60m
 IOPS_LIMIT=100
 
+direct=1
 NUMJOBS=1
-NUM_ROUNDS=7
 
 # ---------------------------------------------------------------------
 # check different thread count. (latency, bw) = func(th_count)
 # ---------------------------------------------------------------------
-[vm_count_{TEST_SUMM} * {NUM_ROUNDS}]
-blocksize=16m
+[vmcount_{TEST_SUMM}]
+blocksize=4m
 rw={% randwrite, randread %}
-direct=1
 numjobs={NUMJOBS}
 rate={BW_LIMIT}
 
-[vm_count_{TEST_SUMM} * {NUM_ROUNDS}]
+[vmcount_{TEST_SUMM}]
 blocksize=4k
 rw={% randwrite,randread %}
-direct=1
 numjobs={NUMJOBS}
 rate_iops={IOPS_LIMIT}