fixes
diff --git a/report_templates/report_cinder_iscsi.html b/report_templates/report_cinder_iscsi.html
index 364ea52..7bd1f5b 100644
--- a/report_templates/report_cinder_iscsi.html
+++ b/report_templates/report_cinder_iscsi.html
@@ -27,8 +27,8 @@
                         <td><div align="right">{direct_iops_r_max[0]} ~ {direct_iops_r_max[1]}%</div></td>
                     </tr>
                     <tr>
-                        <td>Write 64KiB</td>
-                        <td><div align="right">{direct_iops_w64_max[0]} ~ {direct_iops_w64_max[1]}%</div></td>
+                        <td>Write 4KiB</td>
+                        <td><div align="right">{direct_iops_w_max[0]} ~ {direct_iops_w_max[1]}%</div></td>
                     </tr>
                 </table>
             </td><td>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</td><td>
diff --git a/wally/discover/discover.py b/wally/discover/discover.py
index 2f41562..957ecbe 100644
--- a/wally/discover/discover.py
+++ b/wally/discover/discover.py
@@ -89,7 +89,7 @@
                 with open(fuel_openrc_fname, "w") as fd:
                     fd.write(openrc_templ.format(**ctx.fuel_openstack_creds))
                     msg = "Openrc for cluster {0} saves into {1}"
-                    logger.debug(msg.format(env_name, fuel_openrc_fname))
+                    logger.info(msg.format(env_name, fuel_openrc_fname))
             nodes_to_run.extend(nodes)
 
         elif cluster == "ceph":
diff --git a/wally/report.py b/wally/report.py
index cb871b3..39950a4 100644
--- a/wally/report.py
+++ b/wally/report.py
@@ -505,8 +505,11 @@
             break
 
     if di.bw_write_max is None:
-        di.bw_write_max = find_max_where(processed_results,
-                                         'd', '1m', 'write', False)
+        for sz in ('1m', '2m', '4m', '8m'):
+            di.bw_write_max = find_max_where(processed_results,
+                                             'd', sz, 'write', False)
+            if di.bw_write_max is not None:
+                break
 
     for sz in ('16m', '64m'):
         di.bw_read_max = find_max_where(processed_results,
@@ -532,14 +535,13 @@
 
     latv = [lat for _, lat, _ in rws4k_iops_lat_th]
 
-    for tlatv_ms in [10, 30, 100]:
-        tlat = tlatv_ms * 1000
+    for tlat in [10, 30, 100]:
         pos = bisect.bisect_left(latv, tlat)
         if 0 == pos:
-            setattr(di, 'rws4k_{}ms'.format(tlatv_ms), 0)
+            setattr(di, 'rws4k_{}ms'.format(tlat), 0)
         elif pos == len(latv):
             iops3, _, _ = rws4k_iops_lat_th[-1]
-            setattr(di, 'rws4k_{}ms'.format(tlatv_ms), ">=" + str(iops3))
+            setattr(di, 'rws4k_{}ms'.format(tlat), ">=" + str(iops3))
         else:
             lat1 = latv[pos - 1]
             lat2 = latv[pos]
@@ -552,14 +554,14 @@
 
             th_iops_coef = (iops2 - iops1) / (th2 - th1)
             iops3 = th_iops_coef * (th3 - th1) + iops1
-            setattr(di, 'rws4k_{}ms'.format(tlatv_ms), int(iops3))
+            setattr(di, 'rws4k_{}ms'.format(tlat), int(iops3))
 
     hdi = DiskInfo()
 
     def pp(x):
         med, conf = x.rounded_average_conf()
         conf_perc = int(float(conf) / med * 100)
-        return (med, conf_perc)
+        return (round_3_digit(med), conf_perc)
 
     hdi.direct_iops_r_max = pp(di.direct_iops_r_max)
 
@@ -610,6 +612,7 @@
         ]
         images = make_plots(perf_infos, plots)
     di = get_disk_info(perf_infos)
+
     return render_all_html(comment, di, lab_info, images, "report_cinder_iscsi.html")
 
 
diff --git a/wally/run_test.py b/wally/run_test.py
index 0e8f6e9..4484d45 100755
--- a/wally/run_test.py
+++ b/wally/run_test.py
@@ -438,7 +438,11 @@
 
     for group in cfg['tests']:
 
-        assert len(group.items()) == 1
+        if len(group.items()) != 1:
+            msg = "Items in tests section should have len == 1"
+            logger.error(msg)
+            raise utils.StopTestError(msg)
+
         key, config = group.items()[0]
 
         if 'start_test_nodes' == key:
@@ -619,6 +623,9 @@
     descr = "Disk io performance test suite"
     parser = argparse.ArgumentParser(prog='wally', description=descr)
 
+    # subparsers = parser.add_subparsers()
+    # test_parser = subparsers.add_parser('test', help='run tests')
+
     parser.add_argument("-l", dest='extra_logs',
                         action='store_true', default=False,
                         help="print some extra log info")
@@ -730,8 +737,8 @@
 
     opts = parse_args(argv)
 
-    # x = load_data_from_path("/var/wally_results/silky_virgen")
-    # y = load_data_from_path("/var/wally_results/cibarial_jacob")
+    # x = load_data_from_path("/var/wally_results/uncorroborant_dinah")
+    # y = load_data_from_path("/var/wally_results/nonmelting_jamal")
     # print(IOPerfTest.format_diff_for_console([x['io'], y['io']]))
     # exit(1)
 
@@ -746,7 +753,8 @@
         save_run_params()
 
     if cfg_dict.get('logging', {}).get("extra_logs", False) or opts.extra_logs:
-        level = logging.DEBUG
+        # level = logging.DEBUG
+        level = logging.INFO
     else:
         level = logging.WARNING
 
diff --git a/wally/suits/io/ceph.cfg b/wally/suits/io/ceph.cfg
index 330b1bd..48c1ec6 100644
--- a/wally/suits/io/ceph.cfg
+++ b/wally/suits/io/ceph.cfg
@@ -1,11 +1,11 @@
 [global]
 include defaults.cfg
 
-NUMJOBS_R={% 1, 5, 10, 15, 25, 40 %}
-NUMJOBS_W={% 1, 3, 5, 7, 10%}
-NUMJOBS_SEQ_OPS={% 1, 2, 3, 10 %}
+NUMJOBS_R={% 1, 5, 10, 15, 25, 40, 80 %}
+NUMJOBS_W={% 1, 5, 10, 15, 25 %}
+NUMJOBS_SEQ_OPS={% 1, 3, 10 %}
 
-ramp_time=15
+ramp_time=60
 runtime=240
 
 # ---------------------------------------------------------------------
diff --git a/wally/suits/io/cinder_iscsi.cfg b/wally/suits/io/cinder_iscsi.cfg
index fd67090..01439b3 100644
--- a/wally/suits/io/cinder_iscsi.cfg
+++ b/wally/suits/io/cinder_iscsi.cfg
@@ -1,11 +1,9 @@
 [global]
 include defaults.cfg
-
-NUMJOBS={% 1, 5, 10, 15, 25, 40 %}
-
-ramp_time=15
-runtime=240
+ramp_time=30
+runtime=120
 direct=1
+NUMJOBS={% 1, 5, 10, 15, 25, 40 %}
 
 # ---------------------------------------------------------------------
 # check different thread count, sync mode. (latency, iops) = func(th_count)
@@ -26,25 +24,26 @@
 numjobs={NUMJOBS}
 
 # ---------------------------------------------------------------------
-# check IOPS randwrite.
+# Read always sync, with large request latency linear write
+# ......
 # ---------------------------------------------------------------------
 [cinder_iscsi_{TEST_SUMM}]
-blocksize=4k
-rw=randwrite
+blocksize=64m
+rw=randread
 
 # ---------------------------------------------------------------------
 # No reason for th count > 1 in case of sequantial operations
 # ot they became random
 # ---------------------------------------------------------------------
 [cinder_iscsi_{TEST_SUMM}]
-blocksize=16m
-rw={% randread,randwrite %}
-numjobs={% 1, 2, 3, 10 %}
+blocksize=8m
+rw=write
 
-# [cinder_iscsi_{TEST_SUMM}]
-# blocksize=1m
-# rw={% read,write %}
-# offset={UNIQ_OFFSET}
-# ramp_time=90
-# runtime=30
-# 
+# ---------------------------------------------------------------------
+# check IOPS randwrite. This test MUST BE THE LAST ONE
+# 240 seconds needs to clean caches
+# ---------------------------------------------------------------------
+[cinder_iscsi_{TEST_SUMM}]
+ramp_time=240
+blocksize=4k
+rw=randwrite
diff --git a/wally/suits/io/fio.py b/wally/suits/io/fio.py
index a57faff..f9f1d1b 100644
--- a/wally/suits/io/fio.py
+++ b/wally/suits/io/fio.py
@@ -146,6 +146,7 @@
         self.lat = None
         self.lat_50 = None
         self.lat_95 = None
+        self.lat_avg = None
 
         self.raw_bw = []
         self.raw_iops = []
@@ -202,7 +203,7 @@
     """
     def __init__(self, config, fio_task, ts_results, raw_result, run_interval):
 
-        self.name = fio_task.name.split("_")[0]
+        self.name = fio_task.name.rsplit("_", 1)[0]
         self.fio_task = fio_task
 
         self.bw = ts_results.get('bw')
@@ -310,10 +311,11 @@
                 res.append(sum(dt[idx] for dt in arr))
             return res
 
-        # pinfo.raw_lat = map(prepare, self.lat.per_vm())
-        # num_th = sum(map(len, pinfo.raw_lat))
-        # avg_lat = [val / num_th for val in agg_data(pinfo.raw_lat)]
-        # pinfo.lat = data_property(avg_lat)
+        pinfo.raw_lat = map(prepare, self.lat.per_vm())
+        num_th = sum(map(len, pinfo.raw_lat))
+        lat_avg = [val / num_th for val in agg_data(pinfo.raw_lat)]
+        pinfo.lat_avg = data_property(lat_avg).average / 1000  # us to ms
+
         pinfo.lat_50, pinfo.lat_95 = self.get_lat_perc_50_95_multy()
         pinfo.lat = pinfo.lat_50
 
@@ -353,8 +355,10 @@
         bw_per_th = sum(sum(pinfo.raw_bw, []), [])
         if average(bw_per_th) > 10:
             pinfo.bw = bw_log
-        else:
             pinfo.bw2 = bw_report
+        else:
+            pinfo.bw = bw_report
+            pinfo.bw2 = bw_log
 
         self._pinfo = pinfo
 
@@ -421,10 +425,13 @@
 
     # size is megabytes
     def check_prefill_required(self, rossh, fname, size, num_blocks=16):
-        with rossh.connection.open_sftp() as sftp:
-            fstats = sftp.stat(fname)
+        try:
+            with rossh.connection.open_sftp() as sftp:
+                fstats = sftp.stat(fname)
 
-        if stat.S_ISREG(fstats) and fstats.st_size < size * 1024 ** 2:
+            if stat.S_ISREG(fstats.st_mode) and fstats.st_size < size * 1024 ** 2:
+                return True
+        except EnvironmentError:
             return True
 
         cmd = 'python -c "' + \
@@ -624,8 +631,6 @@
         lat_bw_limit_reached = set()
 
         with ThreadPoolExecutor(len(self.config.nodes)) as pool:
-            self.fio_configs.sort(key=lambda x: int(x.vals.get('numjobs', 1)))
-
             for pos, fio_cfg in enumerate(self.fio_configs):
                 test_descr = get_test_summary(fio_cfg.vals).split("th")[0]
                 if test_descr in lat_bw_limit_reached:
@@ -859,10 +864,10 @@
 
         def key_func(data):
             tpl = data.summary_tpl()
-            return (data.name.rsplit("_", 1)[0],
+            return (data.name,
                     tpl.oper,
                     tpl.mode,
-                    tpl.bsize,
+                    ssize2b(tpl.bsize),
                     int(tpl.th_count) * int(tpl.vm_count))
         res = []
 
@@ -878,6 +883,7 @@
 
             lat_50 = round_3_digit(int(test_dinfo.lat_50))
             lat_95 = round_3_digit(int(test_dinfo.lat_95))
+            lat_avg = round_3_digit(int(test_dinfo.lat_avg))
 
             testnodes_count = len(item.config.nodes)
             iops_per_vm = round_3_digit(iops / testnodes_count)
@@ -898,7 +904,8 @@
                         "iops_per_vm": int(iops_per_vm),
                         "bw_per_vm": int(bw_per_vm),
                         "lat_50": lat_50,
-                        "lat_95": lat_95})
+                        "lat_95": lat_95,
+                        "lat_avg": lat_avg})
 
         return res
 
@@ -913,7 +920,8 @@
         Field("iops\n/vm",      "iops_per_vm", "r",  3),
         Field("KiBps\n/vm",     "bw_per_vm",   "r",  6),
         Field("lat ms\nmedian", "lat_50",      "r",  3),
-        Field("lat ms\n95%",    "lat_95",      "r",  3)
+        Field("lat ms\n95%",    "lat_95",      "r",  3),
+        Field("lat\navg",       "lat_avg",     "r",  3),
     ]
 
     fiels_and_header_dct = dict((item.attr, item) for item in fiels_and_header)
diff --git a/wally/suits/io/fio_task_parser.py b/wally/suits/io/fio_task_parser.py
index 9d19f3a..0f788ed 100644
--- a/wally/suits/io/fio_task_parser.py
+++ b/wally/suits/io/fio_task_parser.py
@@ -232,9 +232,25 @@
     if len(cycles) == 0:
         yield sec
     else:
-        for combination in itertools.product(*cycles.values()):
+        # thread should changes faster
+        numjobs = cycles.pop('numjobs', None)
+        items = cycles.items()
+
+        if len(items) > 0:
+            keys, vals = zip(*items)
+            keys = list(keys)
+            vals = list(vals)
+        else:
+            keys = []
+            vals = []
+
+        if numjobs is not None:
+            vals.append(numjobs)
+            keys.append('numjobs')
+
+        for combination in itertools.product(*vals):
             new_sec = sec.copy()
-            new_sec.vals.update(zip(cycles.keys(), combination))
+            new_sec.vals.update(zip(keys, combination))
             yield new_sec
 
 
@@ -354,7 +370,11 @@
     if th_count is None:
         th_count = vals.get('concurence', 1)
 
-    return TestSumm(rw, sync_mode, vals['blocksize'], th_count, vm_count)
+    return TestSumm(rw,
+                    sync_mode,
+                    vals['blocksize'],
+                    th_count,
+                    vm_count)
 
 
 def get_test_summary(sec, vm_count=None):
diff --git a/wally/suits/io/hdd.cfg b/wally/suits/io/hdd.cfg
index ede6de2..b3c6293 100644
--- a/wally/suits/io/hdd.cfg
+++ b/wally/suits/io/hdd.cfg
@@ -3,10 +3,10 @@
 
 # NUMJOBS={% 1, 5, 10, 15, 20, 30, 40, 80 %}
 
-NUMJOBS={% 1, 3, 5, 10, 20, 40 %}
+NUMJOBS={% 1, 5, 10, 15, 25, 40 %}
 
-ramp_time=5
-runtime=90
+ramp_time=30
+runtime=120
 direct=1
 
 # ---------------------------------------------------------------------
@@ -28,16 +28,17 @@
 numjobs={NUMJOBS}
 
 # ---------------------------------------------------------------------
-# check IOPS randwrite.
-# ---------------------------------------------------------------------
-[hdd_{TEST_SUMM}]
-blocksize=4k
-rw=randwrite
-
-# ---------------------------------------------------------------------
 # No reason for th count > 1 in case of sequantial operations
 # ot they became random
 # ---------------------------------------------------------------------
 [hdd_{TEST_SUMM}]
 blocksize=1m
 rw={% read, write %}
+
+# ---------------------------------------------------------------------
+# check IOPS randwrite.
+# ---------------------------------------------------------------------
+[hdd_{TEST_SUMM}]
+ramp_time=240
+blocksize=4k
+rw=randwrite
diff --git a/wally/suits/io/rrd.cfg b/wally/suits/io/rrd.cfg
index 3383dce..86a73c5 100644
--- a/wally/suits/io/rrd.cfg
+++ b/wally/suits/io/rrd.cfg
@@ -1,14 +1,34 @@
 [global]
 include defaults.cfg
+ramp_time=30
+runtime=120
+numjobs={% 1,10 %}
+direct=1
+rw={% randwrite, randread %}
 
 # ---------------------------------------------------------------------
-[rws_{TEST_SUMM}]
-blocksize=4k
-rw=randwrite
-sync=1
-ramp_time=15
-runtime=120
-numjobs={% 50,150 %}
+#[test_{TEST_SUMM}]
+#blocksize=1m
+
+# ---------------------------------------------------------------------
+#[test_{TEST_SUMM}]
+#blocksize=4m
+
+# ---------------------------------------------------------------------
+#[test_{TEST_SUMM}]
+#blocksize=16m
+
+# ---------------------------------------------------------------------
+[test_{TEST_SUMM}]
+blocksize=64m
+rw=randread
+numjobs=1
+
+# ---------------------------------------------------------------------
+[test_{TEST_SUMM}]
+blocksize=4m
+rw=write
+numjobs=1
 
 # ---------------------------------------------------------------------
 # [rws_{TEST_SUMM}]