large refactoring, ready to move away from rally
diff --git a/scripts/data_extractor.py b/scripts/data_extractor.py
new file mode 100644
index 0000000..d655f17
--- /dev/null
+++ b/scripts/data_extractor.py
@@ -0,0 +1,196 @@
+import sys
+import json
+import sqlite3
+import contextlib
+
+
+def connect(url):
+ return sqlite3.connect(url)
+
+
+create_db_sql_templ = """
+CREATE TABLE build (id integer primary key,
+ build text,
+ type text,
+ md5 text);
+
+CREATE TABLE params_combination (id integer primary key, {params});
+CREATE TABLE param (id integer primary key, name text, type text);
+
+CREATE TABLE result (build_id integer,
+ params_combination integer,
+ bandwith float,
+ deviation float);
+"""
+
+
+PARAM_COUNT = 20
+
+
+def get_all_tables(conn):
+ cursor = conn.cursor()
+ cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
+ return cursor.fetchall()
+
+
+def drop_database(conn):
+ cursor = conn.cursor()
+ cursor.execute("drop table result")
+ cursor.execute("drop table params_combination")
+ cursor.execute("drop table build")
+ cursor.execute("drop table param")
+
+
+def init_database(conn):
+ cursor = conn.cursor()
+
+ params = ["param_{0} text".format(i) for i in range(PARAM_COUNT)]
+ create_db_sql = create_db_sql_templ.format(params=",".join(params))
+
+ for sql in create_db_sql.split(";"):
+ cursor.execute(sql)
+
+
+def insert_io_params(conn):
+ sql = """insert into param (name, type) values ('operation',
+ '{write,randwrite,read,randread}');
+ insert into param (name, type) values ('sync', '{a,s}');
+ insert into param (name, type) values ('block_size', 'size_kmg');
+ """
+
+ for insert in sql.split(";"):
+ conn.execute(insert)
+
+
+def insert_build(cursor, build_id, build_type, iso_md5):
+ cursor.execute("insert into build (build, type, md5) values (?, ?, ?)",
+ (build_id, build_type, iso_md5))
+ return cursor.lastrowid
+
+
+def insert_params(cursor, *param_vals):
+ param_vals = param_vals + ("",) * (PARAM_COUNT - len(param_vals))
+
+ params = ",".join(['?'] * PARAM_COUNT)
+ select_templ = "select id from params_combination where {params_where}"
+
+ params_where = ["param_{0}=?".format(i) for i in range(PARAM_COUNT)]
+ req = select_templ.format(params_where=" AND ".join(params_where))
+ cursor.execute(req, param_vals)
+ res = cursor.fetchall()
+ if [] != res:
+ return res[0][0]
+
+ params = ",".join(['?'] * PARAM_COUNT)
+ param_insert_templ = "insert into params_combination ({0}) values ({1})"
+ param_names = ",".join("param_{0}".format(i) for i in range(PARAM_COUNT))
+ req = param_insert_templ.format(param_names, params)
+ cursor.execute(req, param_vals)
+ return cursor.lastrowid
+
+
+def insert_results(cursor, build_id, params_id, bw, dev):
+ req = "insert into result values (?, ?, ?, ?)"
+ cursor.execute(req, (build_id, params_id, bw, dev))
+
+
+@contextlib.contextmanager
+def transaction(conn):
+ try:
+ cursor = conn.cursor()
+ yield cursor
+ except:
+ conn.rollback()
+ raise
+ else:
+ conn.commit()
+
+
+def json_to_db(json_data, conn):
+ data = json.loads(json_data)
+ with transaction(conn) as cursor:
+ for build_data in data:
+ build_id = insert_build(cursor,
+ build_data.pop("build_id"),
+ build_data.pop("type"),
+ build_data.pop("iso_md5"))
+
+ for params, (bw, dev) in build_data.items():
+ param_id = insert_params(cursor, *params.split(" "))
+ insert_results(cursor, build_id, param_id, bw, dev)
+
+
+def to_db():
+ conn = sqlite3.connect(sys.argv[1])
+ json_data = open(sys.argv[2]).read()
+
+ if len(get_all_tables(conn)) == 0:
+ init_database(conn)
+
+ json_to_db(json_data, conn)
+
+
+def ssize_to_kb(ssize):
+ try:
+ smap = dict(k=1, K=1, M=1024, m=1024, G=1024**2, g=1024**2)
+ for ext, coef in smap.items():
+ if ssize.endswith(ext):
+ return int(ssize[:-1]) * coef
+
+ if int(ssize) % 1024 != 0:
+ raise ValueError()
+
+ return int(ssize) / 1024
+
+ except (ValueError, TypeError, AttributeError):
+ tmpl = "Unknow size format {0!r} (or size not multiples 1024)"
+ raise ValueError(tmpl.format(ssize))
+
+
+def load_slice(cursor, build_id, y_param, **params):
+ params_id = {}
+ for param in list(params) + [y_param]:
+ cursor.execute("select id from param where name=?", (param,))
+ params_id[param] = cursor.fetchone()
+
+ sql = """select params_combination.param_{0}, result.bandwith
+ from params_combination, result
+ where result.build_id=?""".format(params_id[y_param])
+
+ for param, val in params.items():
+ pid = params_id[param]
+ sql += " and params_combination.param_{0}='{1}'".format(pid, val)
+
+ cursor.execute(sql)
+
+
+def from_db():
+ conn = sqlite3.connect(sys.argv[1])
+ # sql = sys.argv[2]
+ cursor = conn.cursor()
+
+ sql = """select params_combination.param_2, result.bandwith
+ from params_combination, result
+ where params_combination.param_0="write"
+ and params_combination.param_1="s"
+ and params_combination.id=result.params_combination
+ and result.build_id=60"""
+
+ cursor.execute(sql)
+ data = []
+
+ for (sz, bw) in cursor.fetchall():
+ data.append((ssize_to_kb(sz), sz, bw))
+
+ data.sort()
+
+ import matplotlib.pyplot as plt
+ xvals = range(len(data))
+ plt.plot(xvals, [dt[2] for dt in data])
+ plt.ylabel('bandwith')
+ plt.xlabel('block size')
+ plt.xticks(xvals, [dt[1] for dt in data])
+ plt.show()
+
+
+from_db()