release preparation
diff --git a/wally/pretty_yaml.py b/wally/pretty_yaml.py
index f078ff2..ff1f3bc 100644
--- a/wally/pretty_yaml.py
+++ b/wally/pretty_yaml.py
@@ -64,7 +64,9 @@
             key_str = dumps_simple(k) + ": "
             val_res = dumpv(v, tab_sz, width - tab_sz, min_width)
 
-            if len(val_res) == 1 and len(key_str + val_res[0]) < width:
+            if len(val_res) == 1 and \
+               len(key_str + val_res[0]) < width and \
+               not isinstance(v, dict):
                 res.append(key_str + val_res[0])
             else:
                 res.append(key_str)
diff --git a/wally/report.py b/wally/report.py
index ea8e943..eeffefc 100644
--- a/wally/report.py
+++ b/wally/report.py
@@ -10,8 +10,8 @@
 
 import wally
 from wally import charts
-from wally.statistic import round_3_digit
 from wally.utils import parse_creds, ssize_to_b
+from wally.statistic import round_3_digit, round_deviation
 from wally.suits.io.results_loader import process_disk_info
 from wally.meta_info import total_lab_info, collect_lab_data
 
@@ -116,7 +116,12 @@
             else:
                 info.__dict__[name] = round_3_digit(val)
 
-    report = templ.format(lab_info=lab_description, **info.__dict__)
+    data = info.__dict__.copy()
+    for k, v in data.items():
+        if v is None:
+            data[k] = "-"
+
+    report = templ.format(lab_info=lab_description, **data)
     open(dest, 'w').write(report)
 
 
@@ -130,7 +135,12 @@
         if not name.startswith('__') and isinstance(val, (int, long, float)):
             setattr(info, name, round_3_digit(val))
 
-    report = templ.format(lab_info=lab_description, **info.__dict__)
+    data = info.__dict__.copy()
+    for k, v in data.items():
+        if v is None:
+            data[k] = "-"
+
+    report = templ.format(lab_info=lab_description, **data)
     open(dest, 'w').write(report)
 
 
@@ -153,11 +163,11 @@
     ch = charts.render_vertical_bar(title, legend, [bar_data], [bar_dev_top],
                                     [bar_dev_bottom], file_name=fname,
                                     scale_x=concurence, label_x="clients",
-                                    label_y="iops",
+                                    label_y=legend[0],
                                     lines=[
                                         (latv, "msec", "rr", "lat"),
                                         (iops_or_bw_per_vm, None, None,
-                                         legend[0] + " per thread")
+                                         legend[0] + " per client")
                                     ])
     return str(ch)
 
@@ -175,8 +185,8 @@
         ('ceph_test_rrd4k', 'rand_read_4k', 'Random read 4k direct IOPS'),
         ('ceph_test_rws4k', 'rand_write_4k', 'Random write 4k sync IOPS'),
         ('ceph_test_rrd16m', 'rand_read_16m', 'Random read 16m direct MiBps'),
-        ('ceph_test_swd1m', 'seq_write_1m',
-            'Sequential write 1m direct MiBps'),
+        ('ceph_test_rwd16m', 'rand_write_16m',
+            'Random write 16m direct MiBps'),
     ]
     make_plots(processed_results, path, plots)
 
@@ -212,37 +222,42 @@
         io_chart(desc, concurence, lat, data, data_dev, name, fname)
 
 
+def find_max_where(processed_results, sync_mode, blocksize, rw, iops=True):
+    result = [0, 0]
+    attr = 'iops' if iops else 'bw'
+    for measurement in processed_results.values():
+        ok = measurement.raw['sync_mode'] == sync_mode
+        ok = ok and (measurement.raw['blocksize'] == blocksize)
+        ok = ok and (measurement.raw['rw'] == rw)
+
+        if ok:
+            if getattr(measurement, attr) > result[0]:
+                result = [getattr(measurement, attr), measurement.dev]
+    return result
+
+
 def get_disk_info(processed_results):
     di = DiskInfo()
     rws4k_iops_lat_th = []
 
+    di.direct_iops_w_max = find_max_where(processed_results,
+                                          'd', '4k', 'randwrite')
+    di.direct_iops_r_max = find_max_where(processed_results,
+                                          'd', '4k', 'randread')
+    di.bw_write_max = find_max_where(processed_results,
+                                     'd', '16m', 'randwrite', False)
+    di.bw_read_max = find_max_where(processed_results,
+                                    'd', '16m', 'randread', False)
+
     for res in processed_results.values():
-        if res.raw['sync_mode'] == 'd' and res.raw['blocksize'] == '4k':
-            if res.raw['rw'] == 'randwrite':
-                di.direct_iops_w_max = max(di.direct_iops_w_max, res.iops)
-            elif res.raw['rw'] == 'randread':
-                di.direct_iops_r_max = max(di.direct_iops_r_max, res.iops)
-        elif res.raw['sync_mode'] == 's' and res.raw['blocksize'] == '4k':
+        if res.raw['sync_mode'] == 's' and res.raw['blocksize'] == '4k':
             if res.raw['rw'] != 'randwrite':
                 continue
-
             rws4k_iops_lat_th.append((res.iops, res.lat,
                                       res.raw['concurence']))
 
-        elif res.raw['sync_mode'] == 'd' and res.raw['blocksize'] == '1m':
-
-            if res.raw['rw'] == 'write':
-                di.bw_write_max = max(di.bw_write_max, res.bw)
-            elif res.raw['rw'] == 'read':
-                di.bw_read_max = max(di.bw_read_max, res.bw)
-        elif res.raw['sync_mode'] == 'd' and res.raw['blocksize'] == '16m':
-            if res.raw['rw'] == 'write' or res.raw['rw'] == 'randwrite':
-                di.bw_write_max = max(di.bw_write_max, res.bw)
-            elif res.raw['rw'] == 'read' or res.raw['rw'] == 'randread':
-                di.bw_read_max = max(di.bw_read_max, res.bw)
-
-    di.bw_write_max /= 1000
-    di.bw_read_max /= 1000
+    di.bw_write_max[0] /= 1000
+    di.bw_read_max[0] /= 1000
 
     rws4k_iops_lat_th.sort(key=lambda (_1, _2, conc): conc)
 
@@ -273,13 +288,21 @@
         setattr(di, 'rws4k_{}ms'.format(tlatv_ms), int(iops3))
 
     hdi = DiskInfo()
-    hdi.direct_iops_r_max = di.direct_iops_r_max
-    hdi.direct_iops_w_max = di.direct_iops_w_max
+
+    def pp(x):
+        med, dev = round_deviation((x[0], x[1] * x[0]))
+        # 3 sigma in %
+        dev = int(float(dev) / med * 100)
+        return (med, dev)
+
+    hdi.direct_iops_r_max = pp(di.direct_iops_r_max)
+    hdi.direct_iops_w_max = pp(di.direct_iops_w_max)
+    hdi.bw_write_max = pp(di.bw_write_max)
+    hdi.bw_read_max = pp(di.bw_read_max)
+
     hdi.rws4k_10ms = di.rws4k_10ms if 0 != di.rws4k_10ms else None
     hdi.rws4k_30ms = di.rws4k_30ms if 0 != di.rws4k_30ms else None
     hdi.rws4k_100ms = di.rws4k_100ms if 0 != di.rws4k_100ms else None
-    hdi.bw_write_max = di.bw_write_max
-    hdi.bw_read_max = di.bw_read_max
     return hdi
 
 
@@ -299,6 +322,7 @@
 
 def make_io_report(results, path, lab_url=None, creds=None):
     lab_info = None
+
     # if lab_url is not None:
     #     username, password, tenant_name = parse_creds(creds)
     #     creds = {'username': username,
diff --git a/wally/ssh_utils.py b/wally/ssh_utils.py
index efae92f..7a37c4b 100644
--- a/wally/ssh_utils.py
+++ b/wally/ssh_utils.py
@@ -251,7 +251,7 @@
 class URIsNamespace(object):
     class ReParts(object):
         user_rr = "[^:]*?"
-        host_rr = "[^:]*?"
+        host_rr = "[^:@]*?"
         port_rr = "\\d+"
         key_file_rr = "[^:@]*"
         passwd_rr = ".*?"
@@ -267,10 +267,13 @@
 
     templs = [
         "^{host_rr}$",
+        "^{host_rr}:{port_rr}$",
+        "^{user_rr}@{host_rr}$",
+        "^{user_rr}@{host_rr}:{port_rr}$",
         "^{user_rr}@{host_rr}::{key_file_rr}$",
         "^{user_rr}@{host_rr}:{port_rr}:{key_file_rr}$",
-        "^{user_rr}:{passwd_rr}@@{host_rr}$",
-        "^{user_rr}:{passwd_rr}@@{host_rr}:{port_rr}$",
+        "^{user_rr}:{passwd_rr}@{host_rr}$",
+        "^{user_rr}:{passwd_rr}@{host_rr}:{port_rr}$",
     ]
 
     for templ in templs:
@@ -278,8 +281,8 @@
 
 
 def parse_ssh_uri(uri):
-    # user:passwd@@ip_host:port
-    # user:passwd@@ip_host
+    # user:passwd@ip_host:port
+    # user:passwd@ip_host
     # user@ip_host:port
     # user@ip_host
     # ip_host:port
diff --git a/wally/start_vms.py b/wally/start_vms.py
index af7df71..6ce91f8 100644
--- a/wally/start_vms.py
+++ b/wally/start_vms.py
@@ -99,6 +99,15 @@
     cmd = "bash {spath} >/dev/null".format(spath=spath)
     subprocess.check_call(cmd, shell=True, env=env)
 
+    conn = nova_connect(name, passwd, tenant, auth_url)
+    while True:
+        status = conn.images.find(name='wally_ubuntu').status
+        if status == 'ACTIVE':
+            break
+        msg = "Image {0} is still in {1} state. Waiting 10 more seconds"
+        logger.info(msg.format('wally_ubuntu', status))
+        time.sleep(10)
+
 
 def prepare_os(nova, params):
     allow_ssh(nova, params['security_group'])
diff --git a/wally/statistic.py b/wally/statistic.py
index f3fcd6a..a02033d 100644
--- a/wally/statistic.py
+++ b/wally/statistic.py
@@ -19,6 +19,12 @@
     return round_deviation((val, val / 10.0))[0]
 
 
+def round_deviation_p(med_dev):
+    med, dev = med_dev
+    med, dev = round_deviation((med, med * dev))
+    return [med, float(dev) / med]
+
+
 def round_deviation(med_dev):
     med, dev = med_dev
 
@@ -28,8 +34,8 @@
     dev_div = 10.0 ** (math.floor(math.log10(dev)) - 1)
     dev = int(dev / dev_div) * dev_div
     med = int(med / dev_div) * dev_div
-    return (type(med_dev[0])(med),
-            type(med_dev[1])(dev))
+    return [type(med_dev[0])(med),
+            type(med_dev[1])(dev)]
 
 
 def groupby_globally(data, key_func):
diff --git a/wally/suits/io/agent.py b/wally/suits/io/agent.py
index 0a84ed1..51eb2fd 100644
--- a/wally/suits/io/agent.py
+++ b/wally/suits/io/agent.py
@@ -148,7 +148,7 @@
                 raise ValueError(msg.format(count[1:-1],
                                  count.format(**sec.format_params)))
 
-            yield sec
+            yield sec.copy()
 
             if 'ramp_time' in sec.vals:
                 sec = sec.copy()
@@ -217,8 +217,10 @@
                 assert isinstance(val, (int, float)) or val is None
 
         params['UNIQ'] = 'UN{0}'.format(counter[0])
+        params['COUNTER'] = str(counter[0])
         counter[0] += 1
         params['TEST_SUMM'] = get_test_summary(sec.vals)
+
         sec.name = sec.name.format(**params)
 
         yield sec
diff --git a/wally/suits/io/ceph.cfg b/wally/suits/io/ceph.cfg
index c9b2f53..425696a 100644
--- a/wally/suits/io/ceph.cfg
+++ b/wally/suits/io/ceph.cfg
@@ -8,8 +8,8 @@
 filename={FILENAME}
 NUM_ROUNDS=7
 
-NUMJOBS={% 1, 5, 10, 15, 20, 30, 40 %}
-NUMJOBS_SHORT={% 1, 5, 10 %}
+NUMJOBS={% 1, 5, 10, 15, 40 %}
+NUMJOBS_SHORT={% 1, 2, 3, 10 %}
 
 size=30G
 ramp_time=5
diff --git a/wally/suits/io/hdd.cfg b/wally/suits/io/hdd.cfg
index 07e45e9..4156171 100644
--- a/wally/suits/io/hdd.cfg
+++ b/wally/suits/io/hdd.cfg
@@ -33,6 +33,14 @@
 numjobs={NUMJOBS}
 
 # ---------------------------------------------------------------------
+# check IOPS randwrite.
+# ---------------------------------------------------------------------
+[hdd_test_{TEST_SUMM} * {NUM_ROUNDS}]
+blocksize=4k
+rw=randwrite
+direct=1
+
+# ---------------------------------------------------------------------
 # No reason for th count > 1 in case of sequantial operations
 # They became random
 # ---------------------------------------------------------------------
@@ -40,11 +48,3 @@
 blocksize=1m
 rw={% read, write %}
 direct=1
-
-# ---------------------------------------------------------------------
-# check IOPS randwrite.
-# ---------------------------------------------------------------------
-[hdd_test_{TEST_SUMM} * {NUM_ROUNDS}]
-blocksize=4k
-rw=randwrite
-direct=1