large refactoring, ready to move away from rally
diff --git a/scripts/data.py b/scripts/data.py
new file mode 100644
index 0000000..dfe97a2
--- /dev/null
+++ b/scripts/data.py
@@ -0,0 +1,115 @@
+import re
+import sys
+import json
+
+splitter_rr = "(?ms)=====+\n"
+
+
+def get_data_from_output(fname):
+    results = {}
+    fc = open(fname).read()
+
+    for block in re.split(splitter_rr, fc):
+        block = block.strip()
+        if not block.startswith("[{u'__meta__':"):
+            continue
+        for val in eval(block):
+            meta = val['__meta__']
+            meta['sync'] = 's' if meta['sync'] else 'a'
+            key = "{action} {sync} {blocksize}k".format(**meta)
+            results.setdefault(key, []).append(val['bw_mean'])
+
+    processed_res = {}
+
+    for k, v in results.items():
+        v.sort()
+        med = float(sum(v)) / len(v)
+        ran = sum(abs(x - med) for x in v) / len(v)
+        processed_res[k] = (int(med), int(ran))
+
+    return meta, processed_res
+
+
+def ksort(x):
+    op, sync, sz = x.split(" ")
+    return (op, sync, int(sz[:-1]))
+
+
+def create_json_results(meta, file_data):
+    row = {"build_id": "",
+           "type": "",
+           "iso_md5": ""}
+    row.update(file_data)
+    return json.dumps(row)
+
+
+def show_data(*pathes):
+    begin = "|  {:>10}  {:>5}  {:>5}"
+    first_file_templ = "  |  {:>6} ~ {:>5} {:>2}% {:>5}"
+    other_file_templ = "  |  {:>6} ~ {:>5} {:>2}% {:>5} ----  {:>6}%"
+
+    line_templ = begin + first_file_templ + \
+        other_file_templ * (len(pathes) - 1) + "  |"
+
+    header_ln = line_templ.replace("<", "^").replace(">", "^")
+
+    params = ["Oper", "Sync", "BSZ", "BW1", "DEV1", "%", "IOPS1"]
+    for pos in range(1, len(pathes)):
+        params += "BW{0}+DEV{0}+%+IOPS{0}+DIFF %".format(pos).split("+")
+
+    header_ln = header_ln.format(*params)
+
+    sep = '-' * len(header_ln)
+
+    results = [get_data_from_output(path)[1] for path in pathes]
+
+    print sep
+    print header_ln
+    print sep
+
+    prev_tp = None
+
+    common_keys = set(results[0].keys())
+    for result in results[1:]:
+        common_keys &= set(result.keys())
+
+    for k in sorted(common_keys, key=ksort):
+        tp = k.rsplit(" ", 1)[0]
+        op, s, sz = k.split(" ")
+        s = 'sync' if s == 's' else 'async'
+
+        if tp != prev_tp and prev_tp is not None:
+            print sep
+
+        prev_tp = tp
+
+        results0 = results[0]
+        m0, d0 = results0[k]
+        iops0 = m0 / int(sz[:-1])
+        perc0 = int(d0 * 100.0 / m0 + 0.5)
+
+        data = [op, s, sz, m0, d0, perc0, iops0]
+
+        for result in results[1:]:
+            m, d = result[k]
+            iops = m / int(sz[:-1])
+            perc = int(d * 100.0 / m + 0.5)
+            avg_diff = int(((m - m0) * 100.) / m + 0.5)
+            data.extend([m, d, perc, iops, avg_diff])
+
+        print line_templ.format(*data)
+
+    print sep
+
+
+def main(argv):
+    path1 = argv[0]
+    if path1 == '--json':
+        print create_json_results(*get_data_from_output(argv[1]))
+    else:
+        show_data(*argv)
+    return 0
+
+if __name__ == "__main__":
+    exit(main(sys.argv[1:]))
+# print " ", results[k]
diff --git a/scripts/data_extractor.py b/scripts/data_extractor.py
new file mode 100644
index 0000000..d655f17
--- /dev/null
+++ b/scripts/data_extractor.py
@@ -0,0 +1,196 @@
+import sys
+import json
+import sqlite3
+import contextlib
+
+
+def connect(url):
+    return sqlite3.connect(url)
+
+
+create_db_sql_templ = """
+CREATE TABLE build (id integer primary key,
+                    build text,
+                    type text,
+                    md5 text);
+
+CREATE TABLE params_combination (id integer primary key, {params});
+CREATE TABLE param (id integer primary key, name text, type text);
+
+CREATE TABLE result (build_id integer,
+                     params_combination integer,
+                     bandwith float,
+                     deviation float);
+"""
+
+
+PARAM_COUNT = 20
+
+
+def get_all_tables(conn):
+    cursor = conn.cursor()
+    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
+    return cursor.fetchall()
+
+
+def drop_database(conn):
+    cursor = conn.cursor()
+    cursor.execute("drop table result")
+    cursor.execute("drop table params_combination")
+    cursor.execute("drop table build")
+    cursor.execute("drop table param")
+
+
+def init_database(conn):
+    cursor = conn.cursor()
+
+    params = ["param_{0} text".format(i) for i in range(PARAM_COUNT)]
+    create_db_sql = create_db_sql_templ.format(params=",".join(params))
+
+    for sql in create_db_sql.split(";"):
+        cursor.execute(sql)
+
+
+def insert_io_params(conn):
+    sql = """insert into param (name, type) values ('operation',
+                '{write,randwrite,read,randread}');
+             insert into param (name, type) values ('sync', '{a,s}');
+             insert into param (name, type) values ('block_size', 'size_kmg');
+          """
+
+    for insert in sql.split(";"):
+        conn.execute(insert)
+
+
+def insert_build(cursor, build_id, build_type, iso_md5):
+    cursor.execute("insert into build (build, type, md5) values (?, ?, ?)",
+                   (build_id, build_type, iso_md5))
+    return cursor.lastrowid
+
+
+def insert_params(cursor, *param_vals):
+    param_vals = param_vals + ("",) * (PARAM_COUNT - len(param_vals))
+
+    params = ",".join(['?'] * PARAM_COUNT)
+    select_templ = "select id from params_combination where {params_where}"
+
+    params_where = ["param_{0}=?".format(i) for i in range(PARAM_COUNT)]
+    req = select_templ.format(params_where=" AND ".join(params_where))
+    cursor.execute(req, param_vals)
+    res = cursor.fetchall()
+    if [] != res:
+        return res[0][0]
+
+    params = ",".join(['?'] * PARAM_COUNT)
+    param_insert_templ = "insert into params_combination ({0}) values ({1})"
+    param_names = ",".join("param_{0}".format(i) for i in range(PARAM_COUNT))
+    req = param_insert_templ.format(param_names, params)
+    cursor.execute(req, param_vals)
+    return cursor.lastrowid
+
+
+def insert_results(cursor, build_id, params_id, bw, dev):
+    req = "insert into result values (?, ?, ?, ?)"
+    cursor.execute(req, (build_id, params_id, bw, dev))
+
+
+@contextlib.contextmanager
+def transaction(conn):
+    try:
+        cursor = conn.cursor()
+        yield cursor
+    except:
+        conn.rollback()
+        raise
+    else:
+        conn.commit()
+
+
+def json_to_db(json_data, conn):
+    data = json.loads(json_data)
+    with transaction(conn) as cursor:
+        for build_data in data:
+            build_id = insert_build(cursor,
+                                    build_data.pop("build_id"),
+                                    build_data.pop("type"),
+                                    build_data.pop("iso_md5"))
+
+            for params, (bw, dev) in build_data.items():
+                param_id = insert_params(cursor, *params.split(" "))
+                insert_results(cursor, build_id, param_id, bw, dev)
+
+
+def to_db():
+    conn = sqlite3.connect(sys.argv[1])
+    json_data = open(sys.argv[2]).read()
+
+    if len(get_all_tables(conn)) == 0:
+        init_database(conn)
+
+    json_to_db(json_data, conn)
+
+
+def ssize_to_kb(ssize):
+    try:
+        smap = dict(k=1, K=1, M=1024, m=1024, G=1024**2, g=1024**2)
+        for ext, coef in smap.items():
+            if ssize.endswith(ext):
+                return int(ssize[:-1]) * coef
+
+        if int(ssize) % 1024 != 0:
+            raise ValueError()
+
+        return int(ssize) / 1024
+
+    except (ValueError, TypeError, AttributeError):
+        tmpl = "Unknow size format {0!r} (or size not multiples 1024)"
+        raise ValueError(tmpl.format(ssize))
+
+
+def load_slice(cursor, build_id, y_param, **params):
+    params_id = {}
+    for param in list(params) + [y_param]:
+        cursor.execute("select id from param where name=?", (param,))
+        params_id[param] = cursor.fetchone()
+
+    sql = """select params_combination.param_{0}, result.bandwith
+             from params_combination, result
+             where result.build_id=?""".format(params_id[y_param])
+
+    for param, val in params.items():
+        pid = params_id[param]
+        sql += " and params_combination.param_{0}='{1}'".format(pid, val)
+
+    cursor.execute(sql)
+
+
+def from_db():
+    conn = sqlite3.connect(sys.argv[1])
+    # sql = sys.argv[2]
+    cursor = conn.cursor()
+
+    sql = """select params_combination.param_2, result.bandwith
+    from params_combination, result
+    where params_combination.param_0="write"
+          and params_combination.param_1="s"
+          and params_combination.id=result.params_combination
+          and result.build_id=60"""
+
+    cursor.execute(sql)
+    data = []
+
+    for (sz, bw) in cursor.fetchall():
+        data.append((ssize_to_kb(sz), sz, bw))
+
+    data.sort()
+
+    import matplotlib.pyplot as plt
+    xvals = range(len(data))
+    plt.plot(xvals, [dt[2] for dt in data])
+    plt.ylabel('bandwith')
+    plt.xlabel('block size')
+    plt.xticks(xvals, [dt[1] for dt in data])
+    plt.show()
+
+
+from_db()
diff --git a/scripts/data_generator.py b/scripts/data_generator.py
new file mode 100644
index 0000000..096da3b
--- /dev/null
+++ b/scripts/data_generator.py
@@ -0,0 +1,35 @@
+import os
+import sys
+import uuid
+import random
+import itertools
+
+from petname import Generate as pet_generate
+from storage_api import create_storage
+
+from report import ssize_to_kb
+
+types = ["GA", "master"] + [pet_generate(2, '-') for _ in range(2)]
+random.shuffle(types)
+tp = itertools.cycle(types)
+
+sz = ["1k", "4k", "64k", "256k", "1m"]
+op_type = ["randread", "read", "randwrite", "write"]
+is_sync = ["s", "a"]
+
+storage = create_storage(sys.argv[1], "", "")
+combinations = list(itertools.product(op_type, is_sync, sz))
+
+for i in range(30):
+    row = {"build_id": pet_generate(2, " "),
+           "type": next(tp),
+           "iso_md5": uuid.uuid4().get_hex()}
+
+    for op_type, is_sync, sz in combinations:
+        ((random.random() - 0.5) * 0.2 + 1)
+        row[" ".join([op_type, is_sync, sz])] = (
+            ((random.random() - 0.5) * 0.2 + 1) * (ssize_to_kb(sz) ** 0.5),
+        ((random.random() - 0.5) * 0.2 + 1) * (ssize_to_kb(sz) ** 0.5) * 0.15)
+
+    print len(row)
+    storage.store(row)
diff --git a/scripts/run.sh b/scripts/run.sh
new file mode 100644
index 0000000..342356e
--- /dev/null
+++ b/scripts/run.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+set -x
+set -e
+
+type="iozone"
+
+bsizes="1k 4k 64k 256k 1m"
+ops="write randwrite"
+osync="s a"
+three_times="1 2 3"
+
+for bsize in $bsizes ; do
+	for op in $ops ; do 
+		for sync in $osync ; do 
+			for xxx in $three_times ; do
+				if [[ "$ops" == "write" && "$osync" == "s" ]] ; then
+					continue
+				fi
+
+				if [[ "$sync" == "s" ]] ; then
+					ssync="-s"
+					factor="x500"
+				else
+					if [[ "$bsize" == "1k" || "$bsize" == "4k" ]] ; then
+						continue
+					fi
+
+					ssync=
+					factor="r2"
+				fi
+
+				io_opts="--type $type -a $op --iodepth 16 --blocksize $bsize --iosize $factor $ssync"
+				python run_rally_test.py -l -o "$io_opts" -t io-scenario $type --rally-extra-opts="--deployment $1"
+			done
+		done
+	done
+done
+
+# bsizes="4k 64k 256k 1m"
+# ops="randread read"
+
+# for bsize in $bsizes ; do
+# 	for op in $ops ; do 
+# 		for xxx in $three_times ; do
+# 			io_opts="--type $type -a $op --iodepth 16 --blocksize $bsize --iosize r2"
+# 			python run_rally_test.py -l -o "$io_opts" -t io-scenario $type --rally-extra-opts="--deployment $1"
+# 		done
+# 	done
+# done
+
+# bsizes="1k 4k"
+# ops="randwrite write"
+# three_times="1 2 3"
+
+# for bsize in $bsizes ; do
+# 	for op in $ops ; do 
+# 		for xxx in $three_times ; do
+# 			factor="r2"
+# 			io_opts="--type $type -a $op --iodepth 16 --blocksize $bsize --iosize $factor"
+# 			python run_rally_test.py -l -o "$io_opts" -t io-scenario $type --rally-extra-opts="--deployment $1"
+# 		done
+# 	done
+# done
+
+# ops="randread read"
+
+# for op in $ops ; do 
+# 	for xxx in $three_times ; do
+# 		io_opts="--type $type -a $op --iodepth 16 --blocksize 1k --iosize r2"
+# 		python run_rally_test.py -l -o "$io_opts" -t io-scenario $type --rally-extra-opts="--deployment $1"
+# 	done
+# done
diff --git a/scripts/run_2.sh b/scripts/run_2.sh
new file mode 100644
index 0000000..7bd92fb
--- /dev/null
+++ b/scripts/run_2.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+set -x
+set -e
+
+type="iozone"
+
+io_opts="--type $type -a write --iodepth 16 --blocksize 1m --iosize x20"
+python run_rally_test.py -l -o "$io_opts" -t io-scenario $type --rally-extra-opts="--deployment $1"
diff --git a/scripts/run_tests.sh b/scripts/run_tests.sh
new file mode 100755
index 0000000..f80e5be
--- /dev/null
+++ b/scripts/run_tests.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+set -x
+set -e
+
+CMD1="--type iozone -a write --iodepth 8 --blocksize 4k --iosize 40M -s"
+CMD2="--type fio -a write --iodepth 8 --blocksize 4k --iosize 4M -s"
+
+python run_rally_test.py -l -o "$CMD1" -t io-scenario iozone 2>&1 | tee ceph_results.txt
+python run_rally_test.py -l -o "$CMD2" -t io-scenario fio 2>&1 | tee -a ceph_results.txt
+
diff --git a/scripts/starts_vms.py b/scripts/starts_vms.py
new file mode 100644
index 0000000..b52c2d5
--- /dev/null
+++ b/scripts/starts_vms.py
@@ -0,0 +1,240 @@
+import re
+import os
+import time
+
+import paramiko
+from novaclient.client import Client as n_client
+from cinderclient.v1.client import Client as c_client
+
+
+def ostack_get_creds():
+    env = os.environ.get
+    name = env('OS_USERNAME')
+    passwd = env('OS_PASSWORD')
+    tenant = env('OS_TENANT_NAME')
+    auth_url = env('OS_AUTH_URL')
+    return name, passwd, tenant, auth_url
+
+
+def nova_connect():
+    return n_client('1.1', *ostack_get_creds())
+
+
+def create_keypair(nova, name, key_path):
+    with open(key_path) as key:
+        return nova.keypairs.create(name, key.read())
+
+
+def create_volume(size, name=None, volid=[0]):
+    cinder = c_client(*ostack_get_creds())
+    name = 'ceph-test-{0}'.format(volid[0])
+    volid[0] = volid[0] + 1
+    vol = cinder.volumes.create(size=size, display_name=name)
+    err_count = 0
+    while vol.status != 'available':
+        if vol.status == 'error':
+            if err_count == 3:
+                print "Fail to create volume"
+                raise RuntimeError("Fail to create volume")
+            else:
+                err_count += 1
+                cinder.volumes.delete(vol)
+                time.sleep(1)
+                vol = cinder.volumes.create(size=size, display_name=name)
+                continue
+        time.sleep(1)
+        vol = cinder.volumes.get(vol.id)
+    return vol
+
+
+def wait_for_server_active(nova, server, timeout=240):
+    t = time.time()
+    while True:
+        time.sleep(5)
+        sstate = getattr(server, 'OS-EXT-STS:vm_state').lower()
+
+        if sstate == 'active':
+            return True
+
+        print "Curr state is", sstate, "waiting for active"
+
+        if sstate == 'error':
+            return False
+
+        if time.time() - t > timeout:
+            return False
+
+        server = nova.servers.get(server)
+
+
+def get_or_create_floating_ip(nova, pool, used_ip):
+    ip_list = nova.floating_ips.list()
+
+    if pool is not None:
+        ip_list = [ip for ip in ip_list if ip.pool == pool]
+
+    ip_list = [ip for ip in ip_list if ip.instance_id is None]
+    ip_list = [ip for ip in ip_list if ip.ip not in used_ip]
+
+    if len(ip_list) > 0:
+        return ip_list[0]
+    else:
+        return nova.floating_ips.create(pool)
+
+
+def create_vms(nova, amount, keypair_name, img_name,
+               flavor_name, vol_sz, network_zone_name=None):
+
+    network = nova.networks.find(label=network_zone_name)
+    nics = [{'net-id': network.id}]
+    fl = nova.flavors.find(name=flavor_name)
+    img = nova.images.find(name=img_name)
+    srvs = []
+    counter = 0
+
+    for i in range(3):
+        amount_left = amount - len(srvs)
+
+        new_srvs = []
+        for i in range(amount_left):
+            print "creating server"
+            srv = nova.servers.create("ceph-test-{0}".format(counter),
+                                      flavor=fl, image=img, nics=nics,
+                                      key_name=keypair_name)
+            counter += 1
+            new_srvs.append(srv)
+            print srv
+
+        deleted_servers = []
+        for srv in new_srvs:
+            if not wait_for_server_active(nova, srv):
+                print "Server", srv.name, "fails to start. Kill it and",
+                print " try again"
+
+                nova.servers.delete(srv)
+                deleted_servers.append(srv)
+            else:
+                srvs.append(srv)
+
+        if len(deleted_servers) != 0:
+            time.sleep(5)
+
+    if len(srvs) != amount:
+        print "ERROR: can't start required amount of servers. Exit"
+        raise RuntimeError("Fail to create {0} servers".format(amount))
+
+    result = {}
+    for srv in srvs:
+        print "wait till server be ready"
+        wait_for_server_active(nova, srv)
+        print "creating volume"
+        vol = create_volume(vol_sz)
+        print "attach volume to server"
+        nova.volumes.create_server_volume(srv.id, vol.id, None)
+        print "create floating ip"
+        flt_ip = get_or_create_floating_ip(nova, 'net04_ext', result.keys())
+        print "attaching ip to server"
+        srv.add_floating_ip(flt_ip)
+        result[flt_ip.ip] = srv
+
+    return result
+
+
+def clear_all(nova):
+    deleted_srvs = set()
+    for srv in nova.servers.list():
+        if re.match(r"ceph-test-\d+", srv.name):
+            print "Deleting server", srv.name
+            nova.servers.delete(srv)
+            deleted_srvs.add(srv.id)
+
+    while deleted_srvs != set():
+        print "Waiting till all servers are actually deleted"
+        all_id = set(srv.id for srv in nova.servers.list())
+        if all_id.intersection(deleted_srvs) == set():
+            print "Done, deleting volumes"
+            break
+        time.sleep(1)
+
+    # wait till vm actually deleted
+
+    cinder = c_client(*ostack_get_creds())
+    for vol in cinder.volumes.list():
+        if isinstance(vol.display_name, basestring):
+            if re.match(r'ceph-test-\d+', vol.display_name):
+                if vol.status in ('available', 'error'):
+                    print "Deleting volume", vol.display_name
+                    cinder.volumes.delete(vol)
+
+    print "Clearing done (yet some volumes may still deleting)"
+
+
+def wait_ssh_ready(host, user, key_file, retry_count=10, timeout=5):
+    ssh = paramiko.SSHClient()
+    ssh.load_host_keys('/dev/null')
+    ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+    ssh.known_hosts = None
+
+    for i in range(retry_count):
+        try:
+            ssh.connect(host, username=user, key_filename=key_file,
+                        look_for_keys=False)
+            break
+        except:
+            if i == retry_count - 1:
+                raise
+            time.sleep(timeout)
+
+
+# def prepare_host(key_file, ip, fio_path, dst_fio_path, user='cirros'):
+#     print "Wait till ssh ready...."
+#     wait_ssh_ready(ip, user, key_file)
+
+#     print "Preparing host >"
+#     print "    Coping fio"
+#     copy_fio(key_file, ip, fio_path, user, dst_fio_path)
+
+#     key_opts = '-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no'
+#     args = (key_file, user, ip, key_opts)
+#     cmd_format = "ssh {3} -i {0} {1}@{2} '{{0}}'".format(*args).format
+
+#     def exec_on_host(cmd):
+#         print "    " + cmd
+#         subprocess.check_call(cmd_format(cmd), shell=True)
+
+#     exec_on_host("sudo /usr/sbin/mkfs.ext4 /dev/vdb")
+#     exec_on_host("sudo /bin/mkdir /media/ceph")
+#     exec_on_host("sudo /bin/mount /dev/vdb /media/ceph")
+#     exec_on_host("sudo /bin/chmod a+rwx /media/ceph")
+
+
+def main():
+    image_name = 'TestVM'
+    flavor_name = 'ceph'
+    vol_sz = 50
+    network_zone_name = 'net04'
+    amount = 10
+    keypair_name = 'ceph-test'
+
+    nova = nova_connect()
+    clear_all(nova)
+
+    try:
+        ips = []
+        params = dict(vol_sz=vol_sz)
+        params['image_name'] = image_name
+        params['flavor_name'] = flavor_name
+        params['network_zone_name'] = network_zone_name
+        params['amount'] = amount
+        params['keypair_name'] = keypair_name
+
+        for ip, host in create_vms(nova, **params).items():
+            ips.append(ip)
+
+        print "All setup done! Ips =", " ".join(ips)
+        print "Starting tests"
+    finally:
+        clear_all(nova)
+
+if __name__ == "__main__":
+    exit(main())