many updates in report code and in storage structure, this commit is broken
diff --git a/configs-examples/default.yaml b/configs-examples/default.yaml
index 871de56..60260ab 100644
--- a/configs-examples/default.yaml
+++ b/configs-examples/default.yaml
@@ -4,8 +4,9 @@
settings_dir: ~/.wally
connect_timeout: 30
download_rpc_logs: true
-rpc_log_level: DEBUG
+rpc_log_level: INFO
default_test_local_folder: "/tmp/wally_{uuid}_{name}"
+keep_raw_files: false
logging:
level: DEBUG
@@ -93,3 +94,41 @@
openstack_ceph: OS_1_to_1 + ceph_vdb
openstack_cinder: OS_1_to_1 + ceph_iscsi_vdb
openstack_nova: OS_1_to_1 + nova_io
+
+
+reporting:
+ colors:
+ primary_color: b
+ suppl_color1: teal
+ suppl_color2: magenta
+ box_color: y
+ noise_alpha: 0.3
+ subinfo_alpha: 0.7
+ imshow_colormap: null
+
+ style:
+ grid: true
+ tide_layout: true
+ hist_boxes: 10
+ min_points_for_dev: 5
+ dev_range_x: 2.0
+ dev_perc: 95
+ avg_range: 20
+ curve_approx_level: 5
+ curve_approx_points: 100
+ extra_io_spine: true
+ legend_for_eng: True
+ heatmap_interpolation: 1d
+ heatmap_interpolation_points: 300
+ outliers_q_nd: 3.0
+ outliers_lat: [0.01, 0.995]
+ violin_instead_of_box: true
+ violin_point_count: 30000
+ heatmap_colorbar: false
+ units:
+ bw: ["MiBps", 1024, "bandwith"]
+ iops: ["IOPS", 1, "iops"]
+ lat: ["ms", 1, "latency"]
+
+ matplotlib_params:
+ "font.size": 10
\ No newline at end of file
diff --git a/configs-examples/local_lxc_ceph.yaml b/configs-examples/local_lxc_ceph.yaml
index 60afc48..2c4cbf4 100644
--- a/configs-examples/local_lxc_ceph.yaml
+++ b/configs-examples/local_lxc_ceph.yaml
@@ -6,11 +6,16 @@
tests:
- fio:
- load: rrd_qd_scan
+# load: rrd_qd_scan
+ load: hdd
+# skip_prefill: true
params:
+# FILENAME: /media/data/Swiss.Army.Man.2016.BDRip.1080p.mkv
FILENAME: /media/koder/test_space/test.bin
FILESIZE: 10G
- RUNTIME: 1200
+ RUNTIME: 180
+# RUNTIME: 5
# QDS: [1, 2, 4, 8, 16, 32, 64]
- QDS: [16]
+# QDS: [1, 16, 128]
+# QDS: [1]
diff --git a/tests/test_math.py b/tests/test_math.py
new file mode 100644
index 0000000..5a75858
--- /dev/null
+++ b/tests/test_math.py
@@ -0,0 +1,32 @@
+import numpy
+from wally.statistic import rebin_histogram
+
+
+def array_eq(x: numpy.array, y: numpy.array, max_diff: float = 1E-3) -> bool:
+ return numpy.abs(x - y).max() <= max_diff
+
+
+def test_rebin_histo():
+ curr_histo = numpy.empty((100,), dtype=int)
+ curr_histo[:] = 1
+ edges = numpy.arange(100)
+ new_histo, new_edges = rebin_histogram(curr_histo, edges, 10)
+
+ assert new_edges.shape == (10,)
+ assert new_histo.shape == (10,)
+ assert new_edges.dtype.name.startswith('float')
+ assert new_histo.dtype.name.startswith('int')
+
+ assert array_eq(new_edges, numpy.arange(10) * 9.9)
+ assert new_histo.sum() == curr_histo.sum()
+ assert list(new_histo) == [10] * 10
+
+ new_histo, new_edges = rebin_histogram(curr_histo, edges, 3,
+ left_tail_idx=20,
+ right_tail_idx=50)
+
+ assert new_edges.shape == (3,)
+ assert new_histo.shape == (3,)
+ assert array_eq(new_edges, numpy.array([20, 30, 40]))
+ assert new_histo.sum() == curr_histo.sum()
+ assert list(new_histo) == [30, 10, 60]
diff --git a/tests/test_rpc.py b/tests/test_rpc.py
index fd35555..db24768 100644
--- a/tests/test_rpc.py
+++ b/tests/test_rpc.py
@@ -7,13 +7,13 @@
@contextlib.contextmanager
-def rpc_conn_ctx(uri):
+def rpc_conn_ctx(uri, log_level=None):
creds = ssh_utils.parse_ssh_uri(uri)
rpc_code, modules = node.get_rpc_server_code()
ssh_conn = node.connect(node_interfaces.NodeInfo(creds, set()))
try:
- rpc_conn = node.setup_rpc(ssh_conn, rpc_code, plugins=modules)
+ rpc_conn = node.setup_rpc(ssh_conn, rpc_code, plugins=modules, log_level=log_level)
try:
yield rpc_conn
finally:
diff --git a/v2_plans.md b/v2_plans.md
index a85e5de..56e2669 100644
--- a/v2_plans.md
+++ b/v2_plans.md
@@ -2,68 +2,65 @@
* With current code impossible to do vm count scan test
* TODO next
- * Add settings to keep raw log files on disk (not fio output)
- * Job description should have tuple of parameters, characterized load and abbreviated/readable description
- * TS should have units, UI modules should use function to calculate coefficient for show values
- * Get done iops amount from fio?
- * Rearrange report layout - make engeneering reports per job
- * Store sensors and all data in csv, load from csv
- * Plot aggregated sensors across cluster during test
- * Aggregated sensors distribution and boxplot
- * Hitmap for aggregated sensors
- * automatically find what to plot from storage data (but also allow to seelct via config)
- * store aggregated and per-node TS in it
- * show distributions parameters on histogram plots
+ * unit tests for math functions
+ * CEPH PERFORMANCE COUNTERS
+ * Sync storage_structure
+ * fix fio job summary
+ * Use disk max QD as qd limit?
+ * Cumulative statistic table for all jobs
+ * Add column for job params, which show how many cluster resource consumed
+ * show extra outliers with arrows
+ * More X = func(QD) plots. Eg. - kurt/skew, etc.
+ * Hide cluster load if no nodes available
+ * Show latency skew and curtosis
+ * Sort engineering report by result tuple
+ * Name engineering reports by long summary
+ * Latency heatmap and violin aren't consistent
+ * profile violint plot
* Fix plot layout, there to much unused space around typical plot
- * update API for work with storage Should allows select each sensor for some interval and sum of particular sensor
- across all node devices, all nodes of same type and entire cluster.
- * Collect latency distribution
* iops boxplot as function from QD
- * store statistic results in storage
* collect device types mapping from nodes - device should be block/net/...
- * add integral sensors gap interpolation
* Optimize sensor communication with ceph, can run fist OSD request for
data validation only on start.
- * Each sensor should collect only one portion of data. During
- start it should scan all awailable sources and tell upper code to create separated funcs for them.
- * run test with sensor on large and small file
- * Move test load code to io.fio file
- * UT, which run test with predefined in yaml cluster (cluster and config created separatelly, not with tests)
- and check that result storage work as expected. Declare db sheme in seaprated yaml file, UT should check.
* Update Storage test, add tests for stat and plot module
+ * Aggregated sensors boxplot
+ * Hitmap for aggregated sensors
+ * automatically find what to plot from storage data (but also allow to select via config)
+
+Have to think:
+ * Each sensor should collect only one portion of data. During
+ start it should scan all available sources and tell upper code to create separated funcs for them.
+ * store statistic results in storage
* During prefill check io on file
- * Check FS on device, where test file located
- * Dump and analyze target block device settings on test nodes
+ * Store percentiles levels in TS, separate 1D TS and 2D TS to different classes, store levels in 2D TS
+ * weight average and deviation
+ * C++/Go disk stat sensors to measure IOPS/Lat on milliseconds
+
+* TODO large
+ * Force to kill running fio on ctrl+C and correct cleanup or cleanup all previous run with 'wally cleanup PATH'
* Code:
* RW mixed report
- * C++/Go disk stat sensors to measure IOPS/Lat on milliseconds
- * Allow to cleanup all uncleaned from previous run 'wally cleanup PATH'
* RPC reconnect in case of errors
- * store more information for node - OSD settings, etc
- * Unit-tests
+ * store more information for node - OSD settings, FS on test nodes, target block device settings on test nodes
* Sensors
- Revise sensors code. Prepack on node side, different sensors data types
- perf
- [bcc](https://github.com/iovisor/bcc)
- ceph sensors
- * Config revised:
- * Result config then validated
+ * Config validation
* Add sync 4k write with small set of thcount
* Flexible SSH connection creds - use agent, default ssh settings or part of config
* Remove created temporary files - create all tempfiles via func from .utils, which track them
* Use ceph-monitoring from wally
- * Remove warm-up time from fio. Use warm-up detection to select real test time,
- also fio/OS log files should be used to get test results, not directly
- calculated by fio.
+ * Use warm-up detection to select real test time.
* Report code:
- - Compatible report types setted up by config and load??
- - Set of reporter classes run again results and avaluate ability to generate required report type
- - They generate report blocks with description and html data
- - final report compose code arrange blocks in single document
+ - Compatible report types set up by config and load??
* Calculate statistic for previous iteration in background
* UT
+ * UT, which run test with predefined in yaml cluster (cluster and config created separatelly, not with tests)
+ and check that result storage work as expected. Declare db sheme in seaprated yaml file, UT should check.
* White-box event logs for UT
* Result-to-yaml for UT
@@ -73,13 +70,10 @@
* Update setup.py to provide CLI entry points
* Statistical result check and report:
- * Comprehensive report with results histograms and other, [Q-Q plot](https://en.wikipedia.org/wiki/Q%E2%80%93Q_plot)
+ * [Q-Q plot](https://en.wikipedia.org/wiki/Q%E2%80%93Q_plot)
* Check results distribution
* Warn for non-normal results
- * Check that distribution of different parts is close. Average
- performance should be steady across test
- * Graphs for raw data over time
- * Save pictures from report in jpg in separated folder
+ * Check that distribution of different parts is close. Average performance should be steady across test
* Node histogram distribution
* Interactive report, which shows different plots and data,
depending on selected visualization type
@@ -104,27 +98,11 @@
- http://www.lognormal.com/features/
- http://blog.simiacryptus.com/2015/10/modeling-network-latency.html
* For HDD read/write - report caches hit ratio, maps of real read/writes, FS counters
-
-* Report structure
- * Overall report
- * Extended engineering report
- * Cluster information
- * Loads. For each load:
- - IOPS distribution, stat analisys
- - LAT heatmap/histo, stat analisys
- - Bottleneck analisys
- * Changes for load groups - show how IOPS/LAT histo is chages with thread count
* Report help page, link for explanations
-
-* Report pictures:
* checkboxes for show/hide part of image
* pop-up help for part of picture
* pop-up text values for bars/lines
* waterfall charts for ceph request processing
-
-* Intellectual postprocessing:
- * Difference calculation
- * Resource usage calculator/visualizer, bottleneck hunter
* correct comparison between different systems
* Maybe move to 2.1:
diff --git a/wally/common_types.py b/wally/common_types.py
index 9464b57..470a325 100644
--- a/wally/common_types.py
+++ b/wally/common_types.py
@@ -25,7 +25,7 @@
class Storable(IStorable):
"""Default implementation"""
- __ignore_fields__ = []
+ __ignore_fields__ = [] # type: List[str]
def raw(self) -> Dict[str, Any]:
return {name: val
diff --git a/wally/hlstorage.py b/wally/hlstorage.py
index c71159f..f2c9488 100644
--- a/wally/hlstorage.py
+++ b/wally/hlstorage.py
@@ -1,16 +1,13 @@
-import re
import os
-import array
-import struct
import logging
-from typing import cast, Iterator, Tuple, Type, Dict, Set, List, Optional
+from typing import cast, Iterator, Tuple, Type, Dict, Optional
import numpy
-from .result_classes import (TestSuiteConfig, TestJobConfig, TimeSeries, DataSource,
- StatProps, IResultStorage)
-from .storage import Storage
-from .utils import StopTestError
+from .suits.job import JobConfig
+from .result_classes import SuiteConfig, TimeSeries, DataSource, StatProps, IResultStorage
+from .storage import Storage, csv_file_encoding
+from .utils import StopTestError, str2shape, shape2str
from .suits.all_suits import all_suits
@@ -19,44 +16,60 @@
class DB_re:
node_id = r'\d+.\d+.\d+.\d+:\d+'
- job_id = r'[-a-zA-Z0-9]+_\d+'
- sensor = r'[a-z_]+'
+ job_id = r'[-a-zA-Z0-9_]+_\d+'
+ suite_id = r'[a-z_]+_\d+'
+ sensor = r'[-a-z_]+'
dev = r'[-a-zA-Z0-9_]+'
- suite_id = r'[a-z]+_\d+'
tag = r'[a-z_.]+'
+ metric = r'[a-z_.]+'
class DB_paths:
- suite_cfg_r = r'results/{suite_id}_info\.yml'
- suite_cfg = suite_cfg_r.replace("\\.", '.')
+ suite_cfg_r = r'results/{suite_id}\.info\.yml'
- job_cfg_r = r'results/{suite_id}\.{job_id}/info\.yml'
+ job_root = r'results/{suite_id}.{job_id}/'
+ job_cfg_r = job_root + r'info\.yml'
+
+ # time series, data from load tool, sensor is a tool name
+ ts_r = job_root + r'{node_id}\.{sensor}\.{metric}.{tag}'
+
+ # statistica data for ts
+ stat_r = job_root + r'{node_id}\.{sensor}\.{metric}\.stat.yaml'
+
+ # sensor data
+ sensor_data_r = r'sensors/{node_id}_{sensor}\.{dev}\.{metric}\.csv'
+ sensor_time_r = r'sensors/{node_id}_collected_at\.csv'
+
+ report_root = 'report/'
+ plot_r = r'report/{suite_id}\.{job_id}/{node_id}\.{sensor}\.{dev}\.{metric}\.{tag}'
+
job_cfg = job_cfg_r.replace("\\.", '.')
-
- job_extra_r = r'results/{suite_id}\.{job_id}/{node_id}/{dev}\.{sensor}\.{tag}'
- job_extra = job_extra_r.replace("\\.", '.')
-
- ts_r = r'results/{suite_id}\.{job_id}/{node_id}/{dev}\.{sensor}\.{tag}'
+ suite_cfg = suite_cfg_r.replace("\\.", '.')
ts = ts_r.replace("\\.", '.')
-
- stat_r = r'results/{suite_id}\.{job_id}/{node_id}/{dev}\.{sensor}\.{tag}'
stat = stat_r.replace("\\.", '.')
-
- plot_r = r'report/{suite_id}\.{job_id}/{node_id}/{dev}\.{sensor}\.{tag}'
+ sensor_data = sensor_data_r.replace("\\.", '.')
+ sensor_time = sensor_time_r.replace("\\.", '.')
plot = plot_r.replace("\\.", '.')
- report = r'report/'
+
+DB_rr = {name: r"(?P<{}>{})".format(name, rr)
+ for name, rr in DB_re.__dict__.items()
+ if not name.startswith("__")}
-DB_rr = {name: r"(?P<{}>{})".format(name, rr) for name, rr in DB_re.__dict__.items() if not name.startswith("__")}
+def fill_path(path: str, **params) -> str:
+ for name, val in params.items():
+ if val is not None:
+ path = path.replace("{" + name + "}", val)
+ return path
class ResultStorage(IResultStorage):
# TODO: check that all path components match required patterns
+ ts_header_size = 64
ts_header_format = "!IIIcc"
- ts_arr_tag = 'bin'
- ts_raw_tag = 'txt'
+ ts_arr_tag = 'csv'
def __init__(self, storage: Storage) -> None:
self.storage = storage
@@ -64,7 +77,50 @@
def sync(self) -> None:
self.storage.sync()
- def put_or_check_suite(self, suite: TestSuiteConfig) -> None:
+ # ----------------- SERIALIZATION / DESERIALIZATION -------------------------------------------------------------
+
+ def load_ts(self, ds: DataSource, path: str) -> TimeSeries:
+
+ with self.storage.get_fd(path, "rb") as fd:
+ header = fd.readline().decode(csv_file_encoding).strip().split(",")
+ shape, dtype, units, time_units = header
+ arr = numpy.loadtxt(fd, delimiter=',', dtype=dtype)
+
+ return TimeSeries("{}.{}".format(ds.dev, ds.sensor),
+ raw=None,
+ data=arr[:,1:].reshape(str2shape(shape)),
+ times=arr[:,0],
+ source=ds,
+ units=units,
+ time_units=time_units)
+
+ def load_sensor(self, ds: DataSource) -> TimeSeries:
+ collect_header, collected_at = self.storage.get_array(DB_paths.sensor_time.format(**ds.__dict__))
+ assert collect_header == [ds.node_id, 'collected_at', 'us'], repr(collect_header)
+
+ data_header, data = self.storage.get_array(DB_paths.sensor_data.format(**ds.__dict__))
+
+ data_units = data_header[2]
+ assert data_header == [ds.node_id, ds.metric_fqdn, data_units]
+
+ return TimeSeries(ds.metric_fqdn,
+ raw=None,
+ data=data,
+ times=collected_at,
+ source=ds,
+ units=data_units,
+ time_units='us')
+
+ # ------------- CHECK DATA IN STORAGE ----------------------------------------------------------------------------
+
+ def check_plot_file(self, source: DataSource) -> Optional[str]:
+ path = DB_paths.plot.format(**source.__dict__)
+ fpath = self.storage.resolve_raw(path)
+ return path if os.path.exists(fpath) else None
+
+ # ------------- PUT DATA INTO STORAGE --------------------------------------------------------------------------
+
+ def put_or_check_suite(self, suite: SuiteConfig) -> None:
path = DB_paths.suite_cfg.format(suite_id=suite.storage_id)
if path in self.storage:
db_cfg = self.storage.get(path)
@@ -74,132 +130,113 @@
self.storage.put(suite, path)
- def put_job(self, suite: TestSuiteConfig, job: TestJobConfig) -> None:
+ def put_job(self, suite: SuiteConfig, job: JobConfig) -> None:
path = DB_paths.job_cfg.format(suite_id=suite.storage_id, job_id=job.storage_id)
self.storage.put(job, path)
def put_ts(self, ts: TimeSeries) -> None:
- data = cast(List[int], ts.data)
- times = cast(List[int], ts.times)
+ assert ts.data.dtype == ts.times.dtype
+ assert ts.data.dtype.kind == 'u'
+ assert ts.source.tag == self.ts_arr_tag
- if len(data) % ts.second_axis_size != 0:
- logger.error("Time series data size(%s) is not propotional to second_axis_size(%s).",
- len(data), ts.second_axis_size)
- raise StopTestError()
+ csv_path = DB_paths.ts.format(**ts.source.__dict__)
+ header = [shape2str(ts.data.shape),
+ ts.data.dtype.name,
+ ts.units,
+ ts.time_units]
- if len(data) // ts.second_axis_size != len(times):
- logger.error("Unbalanced data and time srray sizes. %s", ts)
- raise StopTestError()
+ with self.storage.get_fd(csv_path, "cb") as fd:
+ tv = ts.times.view().reshape((-1, 1))
- bin_path = DB_paths.ts.format(**ts.source(tag=self.ts_arr_tag).__dict__)
+ if len(ts.data.shape) == 1:
+ dv = ts.data.view().reshape((ts.times.shape[0], -1))
+ else:
+ dv = ts.data
- with self.storage.get_fd(bin_path, "cb") as fd:
- header = struct.pack(self.ts_header_format,
- ts.second_axis_size,
- len(data),
- len(times),
- ts.data.typecode.encode("ascii"),
- ts.times.typecode.encode("ascii"))
- fd.write(header)
- ts.data.tofile(fd) # type: ignore
- ts.times.tofile(fd) # type: ignore
+ result = numpy.concatenate((tv, dv), axis=1)
+ fd.write((",".join(map(str, header)) + "\n").encode(csv_file_encoding))
+ numpy.savetxt(fd, result, delimiter=',', newline="\n", fmt="%lu")
if ts.raw:
- raw_path = DB_paths.ts.format(**ts.source(tag=self.ts_raw_tag).__dict__)
+ raw_path = DB_paths.ts.format(**ts.source(tag=ts.raw_tag).__dict__)
self.storage.put_raw(ts.raw, raw_path)
def put_extra(self, data: bytes, source: DataSource) -> None:
- path = DB_paths.job_cfg.format(**source.__dict__)
- self.storage.put_raw(data, path)
+ self.storage.put(data, DB_paths.ts.format(**source.__dict__))
def put_stat(self, data: StatProps, source: DataSource) -> None:
- path = DB_paths.stat.format(**source.__dict__)
- self.storage.put(data, path)
-
- def get_stat(self, stat_cls: Type[StatProps], source: DataSource) -> StatProps:
- path = DB_paths.stat.format(**source.__dict__)
- return self.storage.load(stat_cls, path)
-
- def iter_paths(self, path_glob) -> Iterator[Tuple[bool, str, Dict[str, str]]]:
- path = path_glob.format(**DB_rr).split("/")
- yield from self.storage._iter_paths("", path, {})
-
- def iter_suite(self, suite_type: str = None) -> Iterator[TestSuiteConfig]:
- for is_file, suite_info_path, groups in self.iter_paths(DB_paths.suite_cfg_r):
- assert is_file
- suite = cast(TestSuiteConfig, self.storage.load(TestSuiteConfig, suite_info_path))
- assert suite.storage_id == groups['suite_id']
- if not suite_type or suite.test_type == suite_type:
- yield suite
-
- def iter_job(self, suite: TestSuiteConfig) -> Iterator[TestJobConfig]:
- job_glob = DB_paths.job_cfg_r.replace('{suite_id}', suite.storage_id)
- job_config_cls = all_suits[suite.test_type].job_config_cls
-
- for is_file, path, groups in self.iter_paths(job_glob):
- assert is_file
- job = cast(TestJobConfig, self.storage.load(job_config_cls, path))
- assert job.storage_id == groups['job_id']
- yield job
-
- def iter_datasource(self, suite: TestSuiteConfig, job: TestJobConfig) -> Iterator[Tuple[DataSource, Dict[str, str]]]:
- ts_glob = DB_paths.ts_r.replace('{suite_id}', suite.storage_id).replace('{job_id}', job.storage_id)
- ts_found = {} # type: Dict[Tuple[str, str, str], Dict[str, str]]
-
- for is_file, path, groups in self.iter_paths(ts_glob):
- assert is_file
- key = (groups['node_id'], groups['dev'], groups['sensor'])
- ts_found.setdefault(key, {})[groups['tag']] = path
-
- for (node_id, dev, sensor), tag2path in ts_found.items():
- if self.ts_arr_tag in tag2path:
- yield DataSource(suite_id=suite.storage_id,
- job_id=job.storage_id,
- node_id=node_id,
- dev=dev, sensor=sensor, tag=None), tag2path
-
- def load_ts(self, ds: DataSource, path: str) -> TimeSeries:
- with self.storage.get_fd(path, "rb") as fd:
- header = fd.read(struct.calcsize(self.ts_header_format))
- second_axis_size, data_sz, time_sz, data_typecode, time_typecode = \
- struct.unpack(self.ts_header_format, header)
-
- data = array.array(data_typecode.decode("ascii"))
- times = array.array(time_typecode.decode("ascii"))
-
- data.fromfile(fd, data_sz) # type: ignore
- times.fromfile(fd, time_sz) # type: ignore
-
- return TimeSeries("{}.{}".format(ds.dev, ds.sensor),
- raw=None,
- data=numpy.array(data, dtype=numpy.dtype('float32')),
- times=numpy.array(times),
- second_axis_size=second_axis_size,
- source=ds)
-
- def iter_ts(self, suite: TestSuiteConfig, job: TestJobConfig, **filters) -> Iterator[TimeSeries]:
- for ds, tag2path in self.iter_datasource(suite, job):
- for name, val in filters.items():
- if val != getattr(ds, name):
- break
- else:
- ts = self.load_ts(ds, tag2path[self.ts_arr_tag])
- if self.ts_raw_tag in tag2path:
- ts.raw = self.storage.get_raw(tag2path[self.ts_raw_tag])
-
- yield ts
+ self.storage.put(data, DB_paths.stat.format(**source.__dict__))
# return path to file to be inserted into report
def put_plot_file(self, data: bytes, source: DataSource) -> str:
path = DB_paths.plot.format(**source.__dict__)
return cast(str, self.storage.put_raw(data, path))
- def check_plot_file(self, source: DataSource) -> Optional[str]:
- path = DB_paths.plot.format(**source.__dict__)
- fpath = self.storage.resolve_raw(path)
- if os.path.exists(fpath):
- return fpath
- return None
-
def put_report(self, report: str, name: str) -> str:
- return self.storage.put_raw(report.encode("utf8"), DB_paths.report + name)
+ return self.storage.put_raw(report.encode("utf8"), DB_paths.report_root + name)
+
+ def append_sensor(self, data: numpy.array, ds: DataSource, units: str) -> None:
+ if ds.metric == 'collected_at':
+ path = DB_paths.sensor_time
+ metrics_fqn = 'collected_at'
+ else:
+ path = DB_paths.sensor_data
+ metrics_fqn = ds.metric_fqdn
+ self.storage.append([ds.node_id, metrics_fqn, units], data, path.format(**ds.__dict__))
+
+ # ------------- GET DATA FROM STORAGE --------------------------------------------------------------------------
+
+ def get_stat(self, stat_cls: Type[StatProps], source: DataSource) -> StatProps:
+ return self.storage.load(stat_cls, DB_paths.stat.format(**source.__dict__))
+
+ # ------------- ITER OVER STORAGE ------------------------------------------------------------------------------
+
+ def iter_paths(self, path_glob) -> Iterator[Tuple[bool, str, Dict[str, str]]]:
+ path = path_glob.format(**DB_rr).split("/")
+ yield from self.storage._iter_paths("", path, {})
+
+ def iter_suite(self, suite_type: str = None) -> Iterator[SuiteConfig]:
+ for is_file, suite_info_path, groups in self.iter_paths(DB_paths.suite_cfg_r):
+ assert is_file
+ suite = self.storage.load(SuiteConfig, suite_info_path)
+ # suite = cast(SuiteConfig, self.storage.load(SuiteConfig, suite_info_path))
+ assert suite.storage_id == groups['suite_id']
+ if not suite_type or suite.test_type == suite_type:
+ yield suite
+
+ def iter_job(self, suite: SuiteConfig) -> Iterator[JobConfig]:
+ job_glob = fill_path(DB_paths.job_cfg_r, suite_id=suite.storage_id)
+ job_config_cls = all_suits[suite.test_type].job_config_cls
+ for is_file, path, groups in self.iter_paths(job_glob):
+ assert is_file
+ job = cast(JobConfig, self.storage.load(job_config_cls, path))
+ assert job.storage_id == groups['job_id']
+ yield job
+
+ # iterate over test tool data
+ def iter_ts(self, suite: SuiteConfig, job: JobConfig, **filters) -> Iterator[TimeSeries]:
+ filters.update(suite_id=suite.storage_id, job_id=job.storage_id)
+ ts_glob = fill_path(DB_paths.ts_r, **filters)
+
+ for is_file, path, groups in self.iter_paths(ts_glob):
+ assert is_file
+ groups = groups.copy()
+ groups.update(filters)
+ ds = DataSource(suite_id=suite.storage_id,
+ job_id=job.storage_id,
+ node_id=groups["node_id"],
+ sensor=groups["sensor"],
+ dev=None,
+ metric=groups["metric"],
+ tag=groups["tag"])
+ yield self.load_ts(ds, path)
+
+ def iter_sensors(self, node_id: str = None, sensor: str = None, dev: str = None, metric: str = None) -> \
+ Iterator[Tuple[str, Dict[str, str]]]:
+
+ path = fill_path(DB_paths.sensor_data_r, node_id=node_id, sensor=sensor, dev=dev, metric=metric)
+ for is_file, path, groups in self.iter_paths(path):
+ assert is_file
+ yield path, groups
+
+
diff --git a/wally/html.py b/wally/html.py
index e92e7d1..553aff5 100644
--- a/wally/html.py
+++ b/wally/html.py
@@ -1,2 +1,44 @@
-def img(link):
+from typing import Optional, List, Callable
+
+
+import xmlbuilder3
+
+
+eol = "<br>"
+
+
+def tag(name: str) -> Callable[[str], str]:
+ def closure(data: str) -> str:
+ return "<{}>{}</{}>".format(name, data, name)
+ return closure
+
+
+H3 = tag("H3")
+H2 = tag("H2")
+center = tag("center")
+
+
+def img(link: str) -> str:
return '<img src="{}">'.format(link)
+
+
+def table(caption: str, headers: Optional[List[str]], data: List[List[str]]) -> str:
+ doc = xmlbuilder3.XMLBuilder("table",
+ **{"class": "table table-bordered table-striped table-condensed table-hover",
+ "style": "width: auto;"})
+
+ doc.caption.H3.center(caption)
+
+ if headers is not None:
+ with doc.thead:
+ with doc.tr:
+ for header in headers:
+ doc.th(header)
+
+ with doc.tbody:
+ for line in data:
+ with doc.tr:
+ for vl in line:
+ doc.td(vl)
+
+ return xmlbuilder3.tostr(doc).split("\n", 1)[1]
\ No newline at end of file
diff --git a/wally/hw_info.py b/wally/hw_info.py
index 9da8cb7..61938ab 100644
--- a/wally/hw_info.py
+++ b/wally/hw_info.py
@@ -5,7 +5,7 @@
from typing import List, Tuple, cast, Optional
from . import utils
-from .node_utils import get_os
+from .node_utils import get_os, OSRelease
from .node_interfaces import IRPCNode
@@ -119,7 +119,7 @@
self.kernel_version = None # type: str
self.libvirt_version = None # type: Optional[str]
self.qemu_version = None # type: Optional[str]
- self.OS_version = None # type: utils.OSRelease
+ self.OS_version = None # type: OSRelease
self.ceph_info = None # type: Optional[CephInfo]
diff --git a/wally/legacy_report.py b/wally/legacy_report.py
new file mode 100644
index 0000000..42302b9
--- /dev/null
+++ b/wally/legacy_report.py
@@ -0,0 +1,1499 @@
+# --------------------------- LEGACY --------------------------------------------------------------------------------
+
+
+# # disk_info = None
+# # base = None
+# # linearity = None
+#
+#
+# def group_by_name(test_data):
+# name_map = collections.defaultdict(lambda: [])
+#
+# for data in test_data:
+# name_map[(data.name, data.summary())].append(data)
+#
+# return name_map
+#
+#
+# def report(name, required_fields):
+# def closure(func):
+# report_funcs.append((required_fields.split(","), name, func))
+# return func
+# return closure
+#
+#
+# def get_test_lcheck_params(pinfo):
+# res = [{
+# 's': 'sync',
+# 'd': 'direct',
+# 'a': 'async',
+# 'x': 'sync direct'
+# }[pinfo.sync_mode]]
+#
+# res.append(pinfo.p.rw)
+#
+# return " ".join(res)
+#
+#
+# def get_emb_data_svg(plt):
+# sio = StringIO()
+# plt.savefig(sio, format='svg')
+# img_start = "<!-- Created with matplotlib (http://matplotlib.org/) -->"
+# return sio.getvalue().split(img_start, 1)[1]
+#
+#
+# def get_template(templ_name):
+# very_root_dir = os.path.dirname(os.path.dirname(wally.__file__))
+# templ_dir = os.path.join(very_root_dir, 'report_templates')
+# templ_file = os.path.join(templ_dir, templ_name)
+# return open(templ_file, 'r').read()
+#
+#
+# def group_by(data, func):
+# if len(data) < 2:
+# yield data
+# return
+#
+# ndata = [(func(dt), dt) for dt in data]
+# ndata.sort(key=func)
+# pkey, dt = ndata[0]
+# curr_list = [dt]
+#
+# for key, val in ndata[1:]:
+# if pkey != key:
+# yield curr_list
+# curr_list = [val]
+# else:
+# curr_list.append(val)
+# pkey = key
+#
+# yield curr_list
+#
+#
+# @report('linearity', 'linearity_test')
+# def linearity_report(processed_results, lab_info, comment):
+# labels_and_data_mp = collections.defaultdict(lambda: [])
+# vls = {}
+#
+# # plot io_time = func(bsize)
+# for res in processed_results.values():
+# if res.name.startswith('linearity_test'):
+# iotimes = [1000. / val for val in res.iops.raw]
+#
+# op_summ = get_test_summary(res.params)[:3]
+#
+# labels_and_data_mp[op_summ].append(
+# [res.p.blocksize, res.iops.raw, iotimes])
+#
+# cvls = res.params.vals.copy()
+# del cvls['blocksize']
+# del cvls['rw']
+#
+# cvls.pop('sync', None)
+# cvls.pop('direct', None)
+# cvls.pop('buffered', None)
+#
+# if op_summ not in vls:
+# vls[op_summ] = cvls
+# else:
+# assert cvls == vls[op_summ]
+#
+# all_labels = None
+# _, ax1 = plt.subplots()
+# for name, labels_and_data in labels_and_data_mp.items():
+# labels_and_data.sort(key=lambda x: ssize2b(x[0]))
+#
+# labels, _, iotimes = zip(*labels_and_data)
+#
+# if all_labels is None:
+# all_labels = labels
+# else:
+# assert all_labels == labels
+#
+# plt.boxplot(iotimes)
+# if len(labels_and_data) > 2 and \
+# ssize2b(labels_and_data[-2][0]) >= 4096:
+#
+# xt = range(1, len(labels) + 1)
+#
+# def io_time(sz, bw, initial_lat):
+# return sz / bw + initial_lat
+#
+# x = numpy.array(map(ssize2b, labels))
+# y = numpy.array([sum(dt) / len(dt) for dt in iotimes])
+# popt, _ = scipy.optimize.curve_fit(io_time, x, y, p0=(100., 1.))
+#
+# y1 = io_time(x, *popt)
+# plt.plot(xt, y1, linestyle='--',
+# label=name + ' LS linear approx')
+#
+# for idx, (sz, _, _) in enumerate(labels_and_data):
+# if ssize2b(sz) >= 4096:
+# break
+#
+# bw = (x[-1] - x[idx]) / (y[-1] - y[idx])
+# lat = y[-1] - x[-1] / bw
+# y2 = io_time(x, bw, lat)
+# plt.plot(xt, y2, linestyle='--',
+# label=abbv_name_to_full(name) +
+# ' (4k & max) linear approx')
+#
+# plt.setp(ax1, xticklabels=labels)
+#
+# plt.xlabel("Block size")
+# plt.ylabel("IO time, ms")
+#
+# plt.subplots_adjust(top=0.85)
+# plt.legend(bbox_to_anchor=(0.5, 1.15),
+# loc='upper center',
+# prop={'size': 10}, ncol=2)
+# plt.grid()
+# iotime_plot = get_emb_data_svg(plt)
+# plt.clf()
+#
+# # plot IOPS = func(bsize)
+# _, ax1 = plt.subplots()
+#
+# for name, labels_and_data in labels_and_data_mp.items():
+# labels_and_data.sort(key=lambda x: ssize2b(x[0]))
+# _, data, _ = zip(*labels_and_data)
+# plt.boxplot(data)
+# avg = [float(sum(arr)) / len(arr) for arr in data]
+# xt = range(1, len(data) + 1)
+# plt.plot(xt, avg, linestyle='--',
+# label=abbv_name_to_full(name) + " avg")
+#
+# plt.setp(ax1, xticklabels=labels)
+# plt.xlabel("Block size")
+# plt.ylabel("IOPS")
+# plt.legend(bbox_to_anchor=(0.5, 1.15),
+# loc='upper center',
+# prop={'size': 10}, ncol=2)
+# plt.grid()
+# plt.subplots_adjust(top=0.85)
+#
+# iops_plot = get_emb_data_svg(plt)
+#
+# res = set(get_test_lcheck_params(res) for res in processed_results.values())
+# ncount = list(set(res.testnodes_count for res in processed_results.values()))
+# conc = list(set(res.concurence for res in processed_results.values()))
+#
+# assert len(conc) == 1
+# assert len(ncount) == 1
+#
+# descr = {
+# 'vm_count': ncount[0],
+# 'concurence': conc[0],
+# 'oper_descr': ", ".join(res).capitalize()
+# }
+#
+# params_map = {'iotime_vs_size': iotime_plot,
+# 'iops_vs_size': iops_plot,
+# 'descr': descr}
+#
+# return get_template('report_linearity.html').format(**params_map)
+#
+#
+# @report('lat_vs_iops', 'lat_vs_iops')
+# def lat_vs_iops(processed_results, lab_info, comment):
+# lat_iops = collections.defaultdict(lambda: [])
+# requsted_vs_real = collections.defaultdict(lambda: {})
+#
+# for res in processed_results.values():
+# if res.name.startswith('lat_vs_iops'):
+# lat_iops[res.concurence].append((res.lat,
+# 0,
+# res.iops.average,
+# res.iops.deviation))
+# # lat_iops[res.concurence].append((res.lat.average / 1000.0,
+# # res.lat.deviation / 1000.0,
+# # res.iops.average,
+# # res.iops.deviation))
+# requested_iops = res.p.rate_iops * res.concurence
+# requsted_vs_real[res.concurence][requested_iops] = \
+# (res.iops.average, res.iops.deviation)
+#
+# colors = ['red', 'green', 'blue', 'orange', 'magenta', "teal"]
+# colors_it = iter(colors)
+# for conc, lat_iops in sorted(lat_iops.items()):
+# lat, dev, iops, iops_dev = zip(*lat_iops)
+# plt.errorbar(iops, lat, xerr=iops_dev, yerr=dev, fmt='ro',
+# label=str(conc) + " threads",
+# color=next(colors_it))
+#
+# plt.xlabel("IOPS")
+# plt.ylabel("Latency, ms")
+# plt.grid()
+# plt.legend(loc=0)
+# plt_iops_vs_lat = get_emb_data_svg(plt)
+# plt.clf()
+#
+# colors_it = iter(colors)
+# for conc, req_vs_real in sorted(requsted_vs_real.items()):
+# req, real = zip(*sorted(req_vs_real.items()))
+# iops, dev = zip(*real)
+# plt.errorbar(req, iops, yerr=dev, fmt='ro',
+# label=str(conc) + " threads",
+# color=next(colors_it))
+# plt.xlabel("Requested IOPS")
+# plt.ylabel("Get IOPS")
+# plt.grid()
+# plt.legend(loc=0)
+# plt_iops_vs_requested = get_emb_data_svg(plt)
+#
+# res1 = processed_results.values()[0]
+# params_map = {'iops_vs_lat': plt_iops_vs_lat,
+# 'iops_vs_requested': plt_iops_vs_requested,
+# 'oper_descr': get_test_lcheck_params(res1).capitalize()}
+#
+# return get_template('report_iops_vs_lat.html').format(**params_map)
+#
+#
+# def render_all_html(comment, info, lab_description, images, templ_name):
+# data = info.__dict__.copy()
+# for name, val in data.items():
+# if not name.startswith('__'):
+# if val is None:
+# if name in ('direct_iops_w64_max', 'direct_iops_w_max'):
+# data[name] = ('-', '-', '-')
+# else:
+# data[name] = '-'
+# elif isinstance(val, (int, float, long)):
+# data[name] = round_3_digit(val)
+#
+# data['bw_read_max'] = (data['bw_read_max'][0] // 1024,
+# data['bw_read_max'][1],
+# data['bw_read_max'][2])
+#
+# data['bw_write_max'] = (data['bw_write_max'][0] // 1024,
+# data['bw_write_max'][1],
+# data['bw_write_max'][2])
+#
+# images.update(data)
+# templ = get_template(templ_name)
+# return templ.format(lab_info=lab_description,
+# comment=comment,
+# **images)
+#
+#
+# def io_chart(title, concurence,
+# latv, latv_min, latv_max,
+# iops_or_bw, iops_or_bw_err,
+# legend,
+# log_iops=False,
+# log_lat=False,
+# boxplots=False,
+# latv_50=None,
+# latv_95=None,
+# error2=None):
+#
+# matplotlib.rcParams.update({'font.size': 10})
+# points = " MiBps" if legend == 'BW' else ""
+# lc = len(concurence)
+# width = 0.35
+# xt = range(1, lc + 1)
+#
+# op_per_vm = [v / (vm * th) for v, (vm, th) in zip(iops_or_bw, concurence)]
+# fig, p1 = plt.subplots()
+# xpos = [i - width / 2 for i in xt]
+#
+# p1.bar(xpos, iops_or_bw,
+# width=width,
+# color='y',
+# label=legend)
+#
+# err1_leg = None
+# for pos, y, err in zip(xpos, iops_or_bw, iops_or_bw_err):
+# err1_leg = p1.errorbar(pos + width / 2,
+# y,
+# err,
+# color='magenta')
+#
+# err2_leg = None
+# if error2 is not None:
+# for pos, y, err in zip(xpos, iops_or_bw, error2):
+# err2_leg = p1.errorbar(pos + width / 2 + 0.08,
+# y,
+# err,
+# lw=2,
+# alpha=0.5,
+# color='teal')
+#
+# p1.grid(True)
+# p1.plot(xt, op_per_vm, '--', label=legend + "/thread", color='black')
+# handles1, labels1 = p1.get_legend_handles_labels()
+#
+# handles1 += [err1_leg]
+# labels1 += ["95% conf"]
+#
+# if err2_leg is not None:
+# handles1 += [err2_leg]
+# labels1 += ["95% dev"]
+#
+# p2 = p1.twinx()
+#
+# if latv_50 is None:
+# p2.plot(xt, latv_max, label="lat max")
+# p2.plot(xt, latv, label="lat avg")
+# p2.plot(xt, latv_min, label="lat min")
+# else:
+# p2.plot(xt, latv_50, label="lat med")
+# p2.plot(xt, latv_95, label="lat 95%")
+#
+# plt.xlim(0.5, lc + 0.5)
+# plt.xticks(xt, ["{0} * {1}".format(vm, th) for (vm, th) in concurence])
+# p1.set_xlabel("VM Count * Thread per VM")
+# p1.set_ylabel(legend + points)
+# p2.set_ylabel("Latency ms")
+# plt.title(title)
+# handles2, labels2 = p2.get_legend_handles_labels()
+#
+# plt.legend(handles1 + handles2, labels1 + labels2,
+# loc='center left', bbox_to_anchor=(1.1, 0.81))
+#
+# if log_iops:
+# p1.set_yscale('log')
+#
+# if log_lat:
+# p2.set_yscale('log')
+#
+# plt.subplots_adjust(right=0.68)
+#
+# return get_emb_data_svg(plt)
+#
+#
+# def make_plots(processed_results, plots):
+# """
+# processed_results: [PerfInfo]
+# plots = [(test_name_prefix:str, fname:str, description:str)]
+# """
+# files = {}
+# for name_pref, fname, desc in plots:
+# chart_data = []
+#
+# for res in processed_results:
+# summ = res.name + "_" + res.summary
+# if summ.startswith(name_pref):
+# chart_data.append(res)
+#
+# if len(chart_data) == 0:
+# raise ValueError("Can't found any date for " + name_pref)
+#
+# use_bw = ssize2b(chart_data[0].p.blocksize) > 16 * 1024
+#
+# chart_data.sort(key=lambda x: x.params['vals']['numjobs'])
+#
+# lat = None
+# lat_min = None
+# lat_max = None
+#
+# lat_50 = [x.lat_50 for x in chart_data]
+# lat_95 = [x.lat_95 for x in chart_data]
+#
+# lat_diff_max = max(x.lat_95 / x.lat_50 for x in chart_data)
+# lat_log_scale = (lat_diff_max > 10)
+#
+# testnodes_count = x.testnodes_count
+# concurence = [(testnodes_count, x.concurence)
+# for x in chart_data]
+#
+# if use_bw:
+# data = [x.bw.average / 1000 for x in chart_data]
+# data_conf = [x.bw.confidence / 1000 for x in chart_data]
+# data_dev = [x.bw.deviation * 2.5 / 1000 for x in chart_data]
+# name = "BW"
+# else:
+# data = [x.iops.average for x in chart_data]
+# data_conf = [x.iops.confidence for x in chart_data]
+# data_dev = [x.iops.deviation * 2 for x in chart_data]
+# name = "IOPS"
+#
+# fc = io_chart(title=desc,
+# concurence=concurence,
+#
+# latv=lat,
+# latv_min=lat_min,
+# latv_max=lat_max,
+#
+# iops_or_bw=data,
+# iops_or_bw_err=data_conf,
+#
+# legend=name,
+# log_lat=lat_log_scale,
+#
+# latv_50=lat_50,
+# latv_95=lat_95,
+#
+# error2=data_dev)
+# files[fname] = fc
+#
+# return files
+#
+#
+# def find_max_where(processed_results, sync_mode, blocksize, rw, iops=True):
+# result = None
+# attr = 'iops' if iops else 'bw'
+# for measurement in processed_results:
+# ok = measurement.sync_mode == sync_mode
+# ok = ok and (measurement.p.blocksize == blocksize)
+# ok = ok and (measurement.p.rw == rw)
+#
+# if ok:
+# field = getattr(measurement, attr)
+#
+# if result is None:
+# result = field
+# elif field.average > result.average:
+# result = field
+#
+# return result
+#
+#
+# def get_disk_info(processed_results):
+# di = DiskInfo()
+# di.direct_iops_w_max = find_max_where(processed_results,
+# 'd', '4k', 'randwrite')
+# di.direct_iops_r_max = find_max_where(processed_results,
+# 'd', '4k', 'randread')
+#
+# di.direct_iops_w64_max = find_max_where(processed_results,
+# 'd', '64k', 'randwrite')
+#
+# for sz in ('16m', '64m'):
+# di.bw_write_max = find_max_where(processed_results,
+# 'd', sz, 'randwrite', False)
+# if di.bw_write_max is not None:
+# break
+#
+# if di.bw_write_max is None:
+# for sz in ('1m', '2m', '4m', '8m'):
+# di.bw_write_max = find_max_where(processed_results,
+# 'd', sz, 'write', False)
+# if di.bw_write_max is not None:
+# break
+#
+# for sz in ('16m', '64m'):
+# di.bw_read_max = find_max_where(processed_results,
+# 'd', sz, 'randread', False)
+# if di.bw_read_max is not None:
+# break
+#
+# if di.bw_read_max is None:
+# di.bw_read_max = find_max_where(processed_results,
+# 'd', '1m', 'read', False)
+#
+# rws4k_iops_lat_th = []
+# for res in processed_results:
+# if res.sync_mode in 'xs' and res.p.blocksize == '4k':
+# if res.p.rw != 'randwrite':
+# continue
+# rws4k_iops_lat_th.append((res.iops.average,
+# res.lat,
+# # res.lat.average,
+# res.concurence))
+#
+# rws4k_iops_lat_th.sort(key=lambda x: x[2])
+#
+# latv = [lat for _, lat, _ in rws4k_iops_lat_th]
+#
+# for tlat in [10, 30, 100]:
+# pos = bisect.bisect_left(latv, tlat)
+# if 0 == pos:
+# setattr(di, 'rws4k_{}ms'.format(tlat), 0)
+# elif pos == len(latv):
+# iops3, _, _ = rws4k_iops_lat_th[-1]
+# iops3 = int(round_3_digit(iops3))
+# setattr(di, 'rws4k_{}ms'.format(tlat), ">=" + str(iops3))
+# else:
+# lat1 = latv[pos - 1]
+# lat2 = latv[pos]
+#
+# iops1, _, th1 = rws4k_iops_lat_th[pos - 1]
+# iops2, _, th2 = rws4k_iops_lat_th[pos]
+#
+# th_lat_coef = (th2 - th1) / (lat2 - lat1)
+# th3 = th_lat_coef * (tlat - lat1) + th1
+#
+# th_iops_coef = (iops2 - iops1) / (th2 - th1)
+# iops3 = th_iops_coef * (th3 - th1) + iops1
+# iops3 = int(round_3_digit(iops3))
+# setattr(di, 'rws4k_{}ms'.format(tlat), iops3)
+#
+# hdi = DiskInfo()
+#
+# def pp(x):
+# med, conf = x.rounded_average_conf()
+# conf_perc = int(float(conf) / med * 100)
+# dev_perc = int(float(x.deviation) / med * 100)
+# return (round_3_digit(med), conf_perc, dev_perc)
+#
+# hdi.direct_iops_r_max = pp(di.direct_iops_r_max)
+#
+# if di.direct_iops_w_max is not None:
+# hdi.direct_iops_w_max = pp(di.direct_iops_w_max)
+# else:
+# hdi.direct_iops_w_max = None
+#
+# if di.direct_iops_w64_max is not None:
+# hdi.direct_iops_w64_max = pp(di.direct_iops_w64_max)
+# else:
+# hdi.direct_iops_w64_max = None
+#
+# hdi.bw_write_max = pp(di.bw_write_max)
+# hdi.bw_read_max = pp(di.bw_read_max)
+#
+# hdi.rws4k_10ms = di.rws4k_10ms if 0 != di.rws4k_10ms else None
+# hdi.rws4k_30ms = di.rws4k_30ms if 0 != di.rws4k_30ms else None
+# hdi.rws4k_100ms = di.rws4k_100ms if 0 != di.rws4k_100ms else None
+# return hdi
+#
+#
+# @report('hdd', 'hdd')
+# def make_hdd_report(processed_results, lab_info, comment):
+# plots = [
+# ('hdd_rrd4k', 'rand_read_4k', 'Random read 4k direct IOPS'),
+# ('hdd_rwx4k', 'rand_write_4k', 'Random write 4k sync IOPS')
+# ]
+# perf_infos = [res.disk_perf_info() for res in processed_results]
+# images = make_plots(perf_infos, plots)
+# di = get_disk_info(perf_infos)
+# return render_all_html(comment, di, lab_info, images, "report_hdd.html")
+#
+#
+# @report('cinder_iscsi', 'cinder_iscsi')
+# def make_cinder_iscsi_report(processed_results, lab_info, comment):
+# plots = [
+# ('cinder_iscsi_rrd4k', 'rand_read_4k', 'Random read 4k direct IOPS'),
+# ('cinder_iscsi_rwx4k', 'rand_write_4k', 'Random write 4k sync IOPS')
+# ]
+# perf_infos = [res.disk_perf_info() for res in processed_results]
+# try:
+# images = make_plots(perf_infos, plots)
+# except ValueError:
+# plots = [
+# ('cinder_iscsi_rrd4k', 'rand_read_4k', 'Random read 4k direct IOPS'),
+# ('cinder_iscsi_rws4k', 'rand_write_4k', 'Random write 4k sync IOPS')
+# ]
+# images = make_plots(perf_infos, plots)
+# di = get_disk_info(perf_infos)
+#
+# return render_all_html(comment, di, lab_info, images, "report_cinder_iscsi.html")
+#
+#
+# @report('ceph', 'ceph')
+# def make_ceph_report(processed_results, lab_info, comment):
+# plots = [
+# ('ceph_rrd4k', 'rand_read_4k', 'Random read 4k direct IOPS'),
+# ('ceph_rws4k', 'rand_write_4k', 'Random write 4k sync IOPS'),
+# ('ceph_rrd16m', 'rand_read_16m', 'Random read 16m direct MiBps'),
+# ('ceph_rwd16m', 'rand_write_16m',
+# 'Random write 16m direct MiBps'),
+# ]
+#
+# perf_infos = [res.disk_perf_info() for res in processed_results]
+# images = make_plots(perf_infos, plots)
+# di = get_disk_info(perf_infos)
+# return render_all_html(comment, di, lab_info, images, "report_ceph.html")
+#
+#
+# @report('mixed', 'mixed')
+# def make_mixed_report(processed_results, lab_info, comment):
+# #
+# # IOPS(X% read) = 100 / ( X / IOPS_W + (100 - X) / IOPS_R )
+# #
+#
+# perf_infos = [res.disk_perf_info() for res in processed_results]
+# mixed = collections.defaultdict(lambda: [])
+#
+# is_ssd = False
+# for res in perf_infos:
+# if res.name.startswith('mixed'):
+# if res.name.startswith('mixed-ssd'):
+# is_ssd = True
+# mixed[res.concurence].append((res.p.rwmixread,
+# res.lat,
+# 0,
+# # res.lat.average / 1000.0,
+# # res.lat.deviation / 1000.0,
+# res.iops.average,
+# res.iops.deviation))
+#
+# if len(mixed) == 0:
+# raise ValueError("No mixed load found")
+#
+# fig, p1 = plt.subplots()
+# p2 = p1.twinx()
+#
+# colors = ['red', 'green', 'blue', 'orange', 'magenta', "teal"]
+# colors_it = iter(colors)
+# for conc, mix_lat_iops in sorted(mixed.items()):
+# mix_lat_iops = sorted(mix_lat_iops)
+# read_perc, lat, dev, iops, iops_dev = zip(*mix_lat_iops)
+# p1.errorbar(read_perc, iops, color=next(colors_it),
+# yerr=iops_dev, label=str(conc) + " th")
+#
+# p2.errorbar(read_perc, lat, color=next(colors_it),
+# ls='--', yerr=dev, label=str(conc) + " th lat")
+#
+# if is_ssd:
+# p1.set_yscale('log')
+# p2.set_yscale('log')
+#
+# p1.set_xlim(-5, 105)
+#
+# read_perc = set(read_perc)
+# read_perc.add(0)
+# read_perc.add(100)
+# read_perc = sorted(read_perc)
+#
+# plt.xticks(read_perc, map(str, read_perc))
+#
+# p1.grid(True)
+# p1.set_xlabel("% of reads")
+# p1.set_ylabel("Mixed IOPS")
+# p2.set_ylabel("Latency, ms")
+#
+# handles1, labels1 = p1.get_legend_handles_labels()
+# handles2, labels2 = p2.get_legend_handles_labels()
+# plt.subplots_adjust(top=0.85)
+# plt.legend(handles1 + handles2, labels1 + labels2,
+# bbox_to_anchor=(0.5, 1.15),
+# loc='upper center',
+# prop={'size': 12}, ncol=3)
+# plt.show()
+#
+#
+# def make_load_report(idx, results_dir, fname):
+# dpath = os.path.join(results_dir, "io_" + str(idx))
+# files = sorted(os.listdir(dpath))
+# gf = lambda x: "_".join(x.rsplit(".", 1)[0].split('_')[:3])
+#
+# for key, group in itertools.groupby(files, gf):
+# fname = os.path.join(dpath, key + ".fio")
+#
+# cfgs = list(parse_all_in_1(open(fname).read(), fname))
+#
+# fname = os.path.join(dpath, key + "_lat.log")
+#
+# curr = []
+# arrays = []
+#
+# with open(fname) as fd:
+# for offset, lat, _, _ in csv.reader(fd):
+# offset = int(offset)
+# lat = int(lat)
+# if len(curr) > 0 and curr[-1][0] > offset:
+# arrays.append(curr)
+# curr = []
+# curr.append((offset, lat))
+# arrays.append(curr)
+# conc = int(cfgs[0].vals.get('numjobs', 1))
+#
+# if conc != 5:
+# continue
+#
+# assert len(arrays) == len(cfgs) * conc
+#
+# garrays = [[(0, 0)] for _ in range(conc)]
+#
+# for offset in range(len(cfgs)):
+# for acc, new_arr in zip(garrays, arrays[offset * conc:(offset + 1) * conc]):
+# last = acc[-1][0]
+# for off, lat in new_arr:
+# acc.append((off / 1000. + last, lat / 1000.))
+#
+# for cfg, arr in zip(cfgs, garrays):
+# plt.plot(*zip(*arr[1:]))
+# plt.show()
+# exit(1)
+#
+#
+# def make_io_report(dinfo, comment, path, lab_info=None):
+# lab_info = {
+# "total_disk": "None",
+# "total_memory": "None",
+# "nodes_count": "None",
+# "processor_count": "None"
+# }
+#
+# try:
+# res_fields = sorted(v.name for v in dinfo)
+#
+# found = False
+# for fields, name, func in report_funcs:
+# for field in fields:
+# pos = bisect.bisect_left(res_fields, field)
+#
+# if pos == len(res_fields):
+# break
+#
+# if not res_fields[pos].startswith(field):
+# break
+# else:
+# found = True
+# hpath = path.format(name)
+#
+# try:
+# report = func(dinfo, lab_info, comment)
+# except:
+# logger.exception("Diring {0} report generation".format(name))
+# continue
+#
+# if report is not None:
+# try:
+# with open(hpath, "w") as fd:
+# fd.write(report)
+# except:
+# logger.exception("Diring saving {0} report".format(name))
+# continue
+# logger.info("Report {0} saved into {1}".format(name, hpath))
+# else:
+# logger.warning("No report produced by {0!r}".format(name))
+#
+# if not found:
+# logger.warning("No report generator found for this load")
+#
+# except Exception as exc:
+# import traceback
+# traceback.print_exc()
+# logger.error("Failed to generate html report:" + str(exc))
+#
+#
+# # @classmethod
+# # def prepare_data(cls, results) -> List[Dict[str, Any]]:
+# # """create a table with io performance report for console"""
+# #
+# # def key_func(data: FioRunResult) -> Tuple[str, str, str, str, int]:
+# # tpl = data.summary_tpl()
+# # return (data.name,
+# # tpl.oper,
+# # tpl.mode,
+# # ssize2b(tpl.bsize),
+# # int(tpl.th_count) * int(tpl.vm_count))
+# # res = []
+# #
+# # for item in sorted(results, key=key_func):
+# # test_dinfo = item.disk_perf_info()
+# # testnodes_count = len(item.config.nodes)
+# #
+# # iops, _ = test_dinfo.iops.rounded_average_conf()
+# #
+# # if test_dinfo.iops_sys is not None:
+# # iops_sys, iops_sys_conf = test_dinfo.iops_sys.rounded_average_conf()
+# # _, iops_sys_dev = test_dinfo.iops_sys.rounded_average_dev()
+# # iops_sys_per_vm = round_3_digit(iops_sys / testnodes_count)
+# # iops_sys = round_3_digit(iops_sys)
+# # else:
+# # iops_sys = None
+# # iops_sys_per_vm = None
+# # iops_sys_dev = None
+# # iops_sys_conf = None
+# #
+# # bw, bw_conf = test_dinfo.bw.rounded_average_conf()
+# # _, bw_dev = test_dinfo.bw.rounded_average_dev()
+# # conf_perc = int(round(bw_conf * 100 / bw))
+# # dev_perc = int(round(bw_dev * 100 / bw))
+# #
+# # lat_50 = round_3_digit(int(test_dinfo.lat_50))
+# # lat_95 = round_3_digit(int(test_dinfo.lat_95))
+# # lat_avg = round_3_digit(int(test_dinfo.lat_avg))
+# #
+# # iops_per_vm = round_3_digit(iops / testnodes_count)
+# # bw_per_vm = round_3_digit(bw / testnodes_count)
+# #
+# # iops = round_3_digit(iops)
+# # bw = round_3_digit(bw)
+# #
+# # summ = "{0.oper}{0.mode} {0.bsize:>4} {0.th_count:>3}th {0.vm_count:>2}vm".format(item.summary_tpl())
+# #
+# # res.append({"name": key_func(item)[0],
+# # "key": key_func(item)[:4],
+# # "summ": summ,
+# # "iops": int(iops),
+# # "bw": int(bw),
+# # "conf": str(conf_perc),
+# # "dev": str(dev_perc),
+# # "iops_per_vm": int(iops_per_vm),
+# # "bw_per_vm": int(bw_per_vm),
+# # "lat_50": lat_50,
+# # "lat_95": lat_95,
+# # "lat_avg": lat_avg,
+# #
+# # "iops_sys": iops_sys,
+# # "iops_sys_per_vm": iops_sys_per_vm,
+# # "sys_conf": iops_sys_conf,
+# # "sys_dev": iops_sys_dev})
+# #
+# # return res
+# #
+# # Field = collections.namedtuple("Field", ("header", "attr", "allign", "size"))
+# # fiels_and_header = [
+# # Field("Name", "name", "l", 7),
+# # Field("Description", "summ", "l", 19),
+# # Field("IOPS\ncum", "iops", "r", 3),
+# # # Field("IOPS_sys\ncum", "iops_sys", "r", 3),
+# # Field("KiBps\ncum", "bw", "r", 6),
+# # Field("Cnf %\n95%", "conf", "r", 3),
+# # Field("Dev%", "dev", "r", 3),
+# # Field("iops\n/vm", "iops_per_vm", "r", 3),
+# # Field("KiBps\n/vm", "bw_per_vm", "r", 6),
+# # Field("lat ms\nmedian", "lat_50", "r", 3),
+# # Field("lat ms\n95%", "lat_95", "r", 3),
+# # Field("lat\navg", "lat_avg", "r", 3),
+# # ]
+# #
+# # fiels_and_header_dct = dict((item.attr, item) for item in fiels_and_header)
+# #
+# # @classmethod
+# # def format_for_console(cls, results) -> str:
+# # """create a table with io performance report for console"""
+# #
+# # tab = texttable.Texttable(max_width=120)
+# # tab.set_deco(tab.HEADER | tab.VLINES | tab.BORDER)
+# # tab.set_cols_align([f.allign for f in cls.fiels_and_header])
+# # sep = ["-" * f.size for f in cls.fiels_and_header]
+# # tab.header([f.header for f in cls.fiels_and_header])
+# # prev_k = None
+# # for item in cls.prepare_data(results):
+# # if prev_k is not None:
+# # if prev_k != item["key"]:
+# # tab.add_row(sep)
+# #
+# # prev_k = item["key"]
+# # tab.add_row([item[f.attr] for f in cls.fiels_and_header])
+# #
+# # return tab.draw()
+# #
+# # @classmethod
+# # def format_diff_for_console(cls, list_of_results: List[Any]) -> str:
+# # """create a table with io performance report for console"""
+# #
+# # tab = texttable.Texttable(max_width=200)
+# # tab.set_deco(tab.HEADER | tab.VLINES | tab.BORDER)
+# #
+# # header = [
+# # cls.fiels_and_header_dct["name"].header,
+# # cls.fiels_and_header_dct["summ"].header,
+# # ]
+# # allign = ["l", "l"]
+# #
+# # header.append("IOPS ~ Cnf% ~ Dev%")
+# # allign.extend(["r"] * len(list_of_results))
+# # header.extend(
+# # "IOPS_{0} %".format(i + 2) for i in range(len(list_of_results[1:]))
+# # )
+# #
+# # header.append("BW")
+# # allign.extend(["r"] * len(list_of_results))
+# # header.extend(
+# # "BW_{0} %".format(i + 2) for i in range(len(list_of_results[1:]))
+# # )
+# #
+# # header.append("LAT")
+# # allign.extend(["r"] * len(list_of_results))
+# # header.extend(
+# # "LAT_{0}".format(i + 2) for i in range(len(list_of_results[1:]))
+# # )
+# #
+# # tab.header(header)
+# # sep = ["-" * 3] * len(header)
+# # processed_results = map(cls.prepare_data, list_of_results)
+# #
+# # key2results = []
+# # for res in processed_results:
+# # key2results.append(dict(
+# # ((item["name"], item["summ"]), item) for item in res
+# # ))
+# #
+# # prev_k = None
+# # iops_frmt = "{0[iops]} ~ {0[conf]:>2} ~ {0[dev]:>2}"
+# # for item in processed_results[0]:
+# # if prev_k is not None:
+# # if prev_k != item["key"]:
+# # tab.add_row(sep)
+# #
+# # prev_k = item["key"]
+# #
+# # key = (item['name'], item['summ'])
+# # line = list(key)
+# # base = key2results[0][key]
+# #
+# # line.append(iops_frmt.format(base))
+# #
+# # for test_results in key2results[1:]:
+# # val = test_results.get(key)
+# # if val is None:
+# # line.append("-")
+# # elif base['iops'] == 0:
+# # line.append("Nan")
+# # else:
+# # prc_val = {'dev': val['dev'], 'conf': val['conf']}
+# # prc_val['iops'] = int(100 * val['iops'] / base['iops'])
+# # line.append(iops_frmt.format(prc_val))
+# #
+# # line.append(base['bw'])
+# #
+# # for test_results in key2results[1:]:
+# # val = test_results.get(key)
+# # if val is None:
+# # line.append("-")
+# # elif base['bw'] == 0:
+# # line.append("Nan")
+# # else:
+# # line.append(int(100 * val['bw'] / base['bw']))
+# #
+# # for test_results in key2results:
+# # val = test_results.get(key)
+# # if val is None:
+# # line.append("-")
+# # else:
+# # line.append("{0[lat_50]} - {0[lat_95]}".format(val))
+# #
+# # tab.add_row(line)
+# #
+# # tab.set_cols_align(allign)
+# # return tab.draw()
+#
+#
+# # READ_IOPS_DISCSTAT_POS = 3
+# # WRITE_IOPS_DISCSTAT_POS = 7
+# #
+# #
+# # def load_sys_log_file(ftype: str, fname: str) -> TimeSeriesValue:
+# # assert ftype == 'iops'
+# # pval = None
+# # with open(fname) as fd:
+# # iops = []
+# # for ln in fd:
+# # params = ln.split()
+# # cval = int(params[WRITE_IOPS_DISCSTAT_POS]) + \
+# # int(params[READ_IOPS_DISCSTAT_POS])
+# # if pval is not None:
+# # iops.append(cval - pval)
+# # pval = cval
+# #
+# # vals = [(idx * 1000, val) for idx, val in enumerate(iops)]
+# # return TimeSeriesValue(vals)
+# #
+# #
+# # def load_test_results(folder: str, run_num: int) -> 'FioRunResult':
+# # res = {}
+# # params = None
+# #
+# # fn = os.path.join(folder, str(run_num) + '_params.yaml')
+# # params = yaml.load(open(fn).read())
+# #
+# # conn_ids_set = set()
+# # rr = r"{}_(?P<conn_id>.*?)_(?P<type>[^_.]*)\.\d+\.log$".format(run_num)
+# # for fname in os.listdir(folder):
+# # rm = re.match(rr, fname)
+# # if rm is None:
+# # continue
+# #
+# # conn_id_s = rm.group('conn_id')
+# # conn_id = conn_id_s.replace('_', ':')
+# # ftype = rm.group('type')
+# #
+# # if ftype not in ('iops', 'bw', 'lat'):
+# # continue
+# #
+# # ts = load_fio_log_file(os.path.join(folder, fname))
+# # res.setdefault(ftype, {}).setdefault(conn_id, []).append(ts)
+# #
+# # conn_ids_set.add(conn_id)
+# #
+# # rr = r"{}_(?P<conn_id>.*?)_(?P<type>[^_.]*)\.sys\.log$".format(run_num)
+# # for fname in os.listdir(folder):
+# # rm = re.match(rr, fname)
+# # if rm is None:
+# # continue
+# #
+# # conn_id_s = rm.group('conn_id')
+# # conn_id = conn_id_s.replace('_', ':')
+# # ftype = rm.group('type')
+# #
+# # if ftype not in ('iops', 'bw', 'lat'):
+# # continue
+# #
+# # ts = load_sys_log_file(ftype, os.path.join(folder, fname))
+# # res.setdefault(ftype + ":sys", {}).setdefault(conn_id, []).append(ts)
+# #
+# # conn_ids_set.add(conn_id)
+# #
+# # mm_res = {}
+# #
+# # if len(res) == 0:
+# # raise ValueError("No data was found")
+# #
+# # for key, data in res.items():
+# # conn_ids = sorted(conn_ids_set)
+# # awail_ids = [conn_id for conn_id in conn_ids if conn_id in data]
+# # matr = [data[conn_id] for conn_id in awail_ids]
+# # mm_res[key] = MeasurementMatrix(matr, awail_ids)
+# #
+# # raw_res = {}
+# # for conn_id in conn_ids:
+# # fn = os.path.join(folder, "{0}_{1}_rawres.json".format(run_num, conn_id_s))
+# #
+# # # remove message hack
+# # fc = "{" + open(fn).read().split('{', 1)[1]
+# # raw_res[conn_id] = json.loads(fc)
+# #
+# # fio_task = FioJobSection(params['name'])
+# # fio_task.vals.update(params['vals'])
+# #
+# # config = TestConfig('io', params, None, params['nodes'], folder, None)
+# # return FioRunResult(config, fio_task, mm_res, raw_res, params['intervals'], run_num)
+# #
+#
+# # class DiskPerfInfo:
+# # def __init__(self, name: str, summary: str, params: Dict[str, Any], testnodes_count: int) -> None:
+# # self.name = name
+# # self.bw = None
+# # self.iops = None
+# # self.lat = None
+# # self.lat_50 = None
+# # self.lat_95 = None
+# # self.lat_avg = None
+# #
+# # self.raw_bw = []
+# # self.raw_iops = []
+# # self.raw_lat = []
+# #
+# # self.params = params
+# # self.testnodes_count = testnodes_count
+# # self.summary = summary
+# #
+# # self.sync_mode = get_test_sync_mode(self.params['vals'])
+# # self.concurence = self.params['vals'].get('numjobs', 1)
+# #
+# #
+# # class IOTestResults:
+# # def __init__(self, suite_name: str, fio_results: 'FioRunResult', log_directory: str):
+# # self.suite_name = suite_name
+# # self.fio_results = fio_results
+# # self.log_directory = log_directory
+# #
+# # def __iter__(self):
+# # return iter(self.fio_results)
+# #
+# # def __len__(self):
+# # return len(self.fio_results)
+# #
+# # def get_yamable(self) -> Dict[str, List[str]]:
+# # items = [(fio_res.summary(), fio_res.idx) for fio_res in self]
+# # return {self.suite_name: [self.log_directory] + items}
+#
+#
+# # class FioRunResult(TestResults):
+# # """
+# # Fio run results
+# # config: TestConfig
+# # fio_task: FioJobSection
+# # ts_results: {str: MeasurementMatrix[TimeSeriesValue]}
+# # raw_result: ????
+# # run_interval:(float, float) - test tun time, used for sensors
+# # """
+# # def __init__(self, config, fio_task, ts_results, raw_result, run_interval, idx):
+# #
+# # self.name = fio_task.name.rsplit("_", 1)[0]
+# # self.fio_task = fio_task
+# # self.idx = idx
+# #
+# # self.bw = ts_results['bw']
+# # self.lat = ts_results['lat']
+# # self.iops = ts_results['iops']
+# #
+# # if 'iops:sys' in ts_results:
+# # self.iops_sys = ts_results['iops:sys']
+# # else:
+# # self.iops_sys = None
+# #
+# # res = {"bw": self.bw,
+# # "lat": self.lat,
+# # "iops": self.iops,
+# # "iops:sys": self.iops_sys}
+# #
+# # self.sensors_data = None
+# # self._pinfo = None
+# # TestResults.__init__(self, config, res, raw_result, run_interval)
+# #
+# # def get_params_from_fio_report(self):
+# # nodes = self.bw.connections_ids
+# #
+# # iops = [self.raw_result[node]['jobs'][0]['mixed']['iops'] for node in nodes]
+# # total_ios = [self.raw_result[node]['jobs'][0]['mixed']['total_ios'] for node in nodes]
+# # runtime = [self.raw_result[node]['jobs'][0]['mixed']['runtime'] / 1000 for node in nodes]
+# # flt_iops = [float(ios) / rtime for ios, rtime in zip(total_ios, runtime)]
+# #
+# # bw = [self.raw_result[node]['jobs'][0]['mixed']['bw'] for node in nodes]
+# # total_bytes = [self.raw_result[node]['jobs'][0]['mixed']['io_bytes'] for node in nodes]
+# # flt_bw = [float(tbytes) / rtime for tbytes, rtime in zip(total_bytes, runtime)]
+# #
+# # return {'iops': iops,
+# # 'flt_iops': flt_iops,
+# # 'bw': bw,
+# # 'flt_bw': flt_bw}
+# #
+# # def summary(self):
+# # return get_test_summary(self.fio_task, len(self.config.nodes))
+# #
+# # def summary_tpl(self):
+# # return get_test_summary_tuple(self.fio_task, len(self.config.nodes))
+# #
+# # def get_lat_perc_50_95_multy(self):
+# # lat_mks = collections.defaultdict(lambda: 0)
+# # num_res = 0
+# #
+# # for result in self.raw_result.values():
+# # num_res += len(result['jobs'])
+# # for job_info in result['jobs']:
+# # for k, v in job_info['latency_ms'].items():
+# # if isinstance(k, basestring) and k.startswith('>='):
+# # lat_mks[int(k[2:]) * 1000] += v
+# # else:
+# # lat_mks[int(k) * 1000] += v
+# #
+# # for k, v in job_info['latency_us'].items():
+# # lat_mks[int(k)] += v
+# #
+# # for k, v in lat_mks.items():
+# # lat_mks[k] = float(v) / num_res
+# # return get_lat_perc_50_95(lat_mks)
+# #
+# # def disk_perf_info(self, avg_interval=2.0):
+# #
+# # if self._pinfo is not None:
+# # return self._pinfo
+# #
+# # testnodes_count = len(self.config.nodes)
+# #
+# # pinfo = DiskPerfInfo(self.name,
+# # self.summary(),
+# # self.params,
+# # testnodes_count)
+# #
+# # def prepare(data, drop=1):
+# # if data is None:
+# # return data
+# #
+# # res = []
+# # for ts_data in data:
+# # if ts_data.average_interval() < avg_interval:
+# # ts_data = ts_data.derived(avg_interval)
+# #
+# # # drop last value on bounds
+# # # as they may contains ranges without activities
+# # assert len(ts_data.values) >= drop + 1, str(drop) + " " + str(ts_data.values)
+# #
+# # if drop > 0:
+# # res.append(ts_data.values[:-drop])
+# # else:
+# # res.append(ts_data.values)
+# #
+# # return res
+# #
+# # def agg_data(matr):
+# # arr = sum(matr, [])
+# # min_len = min(map(len, arr))
+# # res = []
+# # for idx in range(min_len):
+# # res.append(sum(dt[idx] for dt in arr))
+# # return res
+# #
+# # pinfo.raw_lat = map(prepare, self.lat.per_vm())
+# # num_th = sum(map(len, pinfo.raw_lat))
+# # lat_avg = [val / num_th for val in agg_data(pinfo.raw_lat)]
+# # pinfo.lat_avg = data_property(lat_avg).average / 1000 # us to ms
+# #
+# # pinfo.lat_50, pinfo.lat_95 = self.get_lat_perc_50_95_multy()
+# # pinfo.lat = pinfo.lat_50
+# #
+# # pinfo.raw_bw = map(prepare, self.bw.per_vm())
+# # pinfo.raw_iops = map(prepare, self.iops.per_vm())
+# #
+# # if self.iops_sys is not None:
+# # pinfo.raw_iops_sys = map(prepare, self.iops_sys.per_vm())
+# # pinfo.iops_sys = data_property(agg_data(pinfo.raw_iops_sys))
+# # else:
+# # pinfo.raw_iops_sys = None
+# # pinfo.iops_sys = None
+# #
+# # fparams = self.get_params_from_fio_report()
+# # fio_report_bw = sum(fparams['flt_bw'])
+# # fio_report_iops = sum(fparams['flt_iops'])
+# #
+# # agg_bw = agg_data(pinfo.raw_bw)
+# # agg_iops = agg_data(pinfo.raw_iops)
+# #
+# # log_bw_avg = average(agg_bw)
+# # log_iops_avg = average(agg_iops)
+# #
+# # # update values to match average from fio report
+# # coef_iops = fio_report_iops / float(log_iops_avg)
+# # coef_bw = fio_report_bw / float(log_bw_avg)
+# #
+# # bw_log = data_property([val * coef_bw for val in agg_bw])
+# # iops_log = data_property([val * coef_iops for val in agg_iops])
+# #
+# # bw_report = data_property([fio_report_bw])
+# # iops_report = data_property([fio_report_iops])
+# #
+# # # When IOPS/BW per thread is too low
+# # # data from logs is rounded to match
+# # iops_per_th = sum(sum(pinfo.raw_iops, []), [])
+# # if average(iops_per_th) > 10:
+# # pinfo.iops = iops_log
+# # pinfo.iops2 = iops_report
+# # else:
+# # pinfo.iops = iops_report
+# # pinfo.iops2 = iops_log
+# #
+# # bw_per_th = sum(sum(pinfo.raw_bw, []), [])
+# # if average(bw_per_th) > 10:
+# # pinfo.bw = bw_log
+# # pinfo.bw2 = bw_report
+# # else:
+# # pinfo.bw = bw_report
+# # pinfo.bw2 = bw_log
+# #
+# # self._pinfo = pinfo
+# #
+# # return pinfo
+#
+# # class TestResult:
+# # """Hold all information for a given test - test info,
+# # sensors data and performance results for test period from all nodes"""
+# # run_id = None # type: int
+# # test_info = None # type: Any
+# # begin_time = None # type: int
+# # end_time = None # type: int
+# # sensors = None # Dict[Tuple[str, str, str], TimeSeries]
+# # performance = None # Dict[Tuple[str, str], TimeSeries]
+# #
+# # class TestResults:
+# # """
+# # this class describe test results
+# #
+# # config:TestConfig - test config object
+# # params:dict - parameters from yaml file for this test
+# # results:{str:MeasurementMesh} - test results object
+# # raw_result:Any - opaque object to store raw results
+# # run_interval:(float, float) - test tun time, used for sensors
+# # """
+# #
+# # def __init__(self,
+# # config: TestConfig,
+# # results: Dict[str, Any],
+# # raw_result: Any,
+# # run_interval: Tuple[float, float]) -> None:
+# # self.config = config
+# # self.params = config.params
+# # self.results = results
+# # self.raw_result = raw_result
+# # self.run_interval = run_interval
+# #
+# # def __str__(self) -> str:
+# # res = "{0}({1}):\n results:\n".format(
+# # self.__class__.__name__,
+# # self.summary())
+# #
+# # for name, val in self.results.items():
+# # res += " {0}={1}\n".format(name, val)
+# #
+# # res += " params:\n"
+# #
+# # for name, val in self.params.items():
+# # res += " {0}={1}\n".format(name, val)
+# #
+# # return res
+# #
+# # def summary(self) -> str:
+# # raise NotImplementedError()
+# # return ""
+# #
+# # def get_yamable(self) -> Any:
+# # raise NotImplementedError()
+# # return None
+#
+#
+#
+# # class MeasurementMatrix:
+# # """
+# # data:[[MeasurementResult]] - VM_COUNT x TH_COUNT matrix of MeasurementResult
+# # """
+# # def __init__(self, data, connections_ids):
+# # self.data = data
+# # self.connections_ids = connections_ids
+# #
+# # def per_vm(self):
+# # return self.data
+# #
+# # def per_th(self):
+# # return sum(self.data, [])
+#
+#
+# # class MeasurementResults:
+# # data = None # type: List[Any]
+# #
+# # def stat(self) -> StatProps:
+# # return data_property(self.data)
+# #
+# # def __str__(self) -> str:
+# # return 'TS([' + ", ".join(map(str, self.data)) + '])'
+# #
+# #
+# # class SimpleVals(MeasurementResults):
+# # """
+# # data:[float] - list of values
+# # """
+# # def __init__(self, data: List[float]) -> None:
+# # self.data = data
+# #
+# #
+# # class TimeSeriesValue(MeasurementResults):
+# # """
+# # data:[(float, float, float)] - list of (start_time, lenght, average_value_for_interval)
+# # odata: original values
+# # """
+# # def __init__(self, data: List[Tuple[float, float]]) -> None:
+# # assert len(data) > 0
+# # self.odata = data[:]
+# # self.data = [] # type: List[Tuple[float, float, float]]
+# #
+# # cstart = 0.0
+# # for nstart, nval in data:
+# # self.data.append((cstart, nstart - cstart, nval))
+# # cstart = nstart
+# #
+# # @property
+# # def values(self) -> List[float]:
+# # return [val[2] for val in self.data]
+# #
+# # def average_interval(self) -> float:
+# # return float(sum([val[1] for val in self.data])) / len(self.data)
+# #
+# # def skip(self, seconds) -> 'TimeSeriesValue':
+# # nres = []
+# # for start, ln, val in self.data:
+# # nstart = start + ln - seconds
+# # if nstart > 0:
+# # nres.append([nstart, val])
+# # return self.__class__(nres)
+# #
+# # def derived(self, tdelta) -> 'TimeSeriesValue':
+# # end = self.data[-1][0] + self.data[-1][1]
+# # tdelta = float(tdelta)
+# #
+# # ln = end / tdelta
+# #
+# # if ln - int(ln) > 0:
+# # ln += 1
+# #
+# # res = [[tdelta * i, 0.0] for i in range(int(ln))]
+# #
+# # for start, lenght, val in self.data:
+# # start_idx = int(start / tdelta)
+# # end_idx = int((start + lenght) / tdelta)
+# #
+# # for idx in range(start_idx, end_idx + 1):
+# # rstart = tdelta * idx
+# # rend = tdelta * (idx + 1)
+# #
+# # intersection_ln = min(rend, start + lenght) - max(start, rstart)
+# # if intersection_ln > 0:
+# # try:
+# # res[idx][1] += val * intersection_ln / tdelta
+# # except IndexError:
+# # raise
+# #
+# # return self.__class__(res)
+#
+#
+# def console_report_stage(ctx: TestRun) -> None:
+# # TODO(koder): load data from storage
+# raise NotImplementedError("...")
+# # first_report = True
+# # text_rep_fname = ctx.config.text_report_file
+# #
+# # with open(text_rep_fname, "w") as fd:
+# # for tp, data in ctx.results.items():
+# # if 'io' == tp and data is not None:
+# # rep_lst = []
+# # for result in data:
+# # rep_lst.append(
+# # IOPerfTest.format_for_console(list(result)))
+# # rep = "\n\n".join(rep_lst)
+# # elif tp in ['mysql', 'pgbench'] and data is not None:
+# # rep = MysqlTest.format_for_console(data)
+# # elif tp == 'omg':
+# # rep = OmgTest.format_for_console(data)
+# # else:
+# # logger.warning("Can't generate text report for " + tp)
+# # continue
+# #
+# # fd.write(rep)
+# # fd.write("\n")
+# #
+# # if first_report:
+# # logger.info("Text report were stored in " + text_rep_fname)
+# # first_report = False
+# #
+# # print("\n" + rep + "\n")
+#
+#
+# # def test_load_report_stage(cfg: Config, ctx: TestRun) -> None:
+# # load_rep_fname = cfg.load_report_file
+# # found = False
+# # for idx, (tp, data) in enumerate(ctx.results.items()):
+# # if 'io' == tp and data is not None:
+# # if found:
+# # logger.error("Making reports for more than one " +
+# # "io block isn't supported! All " +
+# # "report, except first are skipped")
+# # continue
+# # found = True
+# # report.make_load_report(idx, cfg['results'], load_rep_fname)
+# #
+# #
+#
+# # def html_report_stage(ctx: TestRun) -> None:
+# # TODO(koder): load data from storage
+# # raise NotImplementedError("...")
+# # html_rep_fname = cfg.html_report_file
+# # found = False
+# # for tp, data in ctx.results.items():
+# # if 'io' == tp and data is not None:
+# # if found or len(data) > 1:
+# # logger.error("Making reports for more than one " +
+# # "io block isn't supported! All " +
+# # "report, except first are skipped")
+# # continue
+# # found = True
+# # report.make_io_report(list(data[0]),
+# # cfg.get('comment', ''),
+# # html_rep_fname,
+# # lab_info=ctx.nodes)
+#
+# #
+# # def load_data_from_path(test_res_dir: str) -> Mapping[str, List[Any]]:
+# # files = get_test_files(test_res_dir)
+# # raw_res = yaml_load(open(files['raw_results']).read())
+# # res = collections.defaultdict(list)
+# #
+# # for tp, test_lists in raw_res:
+# # for tests in test_lists:
+# # for suite_name, suite_data in tests.items():
+# # result_folder = suite_data[0]
+# # res[tp].append(TOOL_TYPE_MAPPER[tp].load(suite_name, result_folder))
+# #
+# # return res
+# #
+# #
+# # def load_data_from_path_stage(var_dir: str, _, ctx: TestRun) -> None:
+# # for tp, vals in load_data_from_path(var_dir).items():
+# # ctx.results.setdefault(tp, []).extend(vals)
+# #
+# #
+# # def load_data_from(var_dir: str) -> Callable[[TestRun], None]:
+# # return functools.partial(load_data_from_path_stage, var_dir)
diff --git a/wally/main.py b/wally/main.py
index fd9b5a0..fa1a801 100644
--- a/wally/main.py
+++ b/wally/main.py
@@ -48,7 +48,7 @@
from .run_test import (CollectInfoStage, ExplicitNodesStage, SaveNodesStage,
RunTestsStage, ConnectStage, SleepStage, PrepareNodes,
LoadStoredNodesStage)
-from .process_results import CalcStatisticStage
+# from .process_results import CalcStatisticStage
from .report import ConsoleReportStage, HtmlReportStage
from .sensors import StartSensorsStage, CollectSensorsStage
@@ -357,12 +357,14 @@
IPython.embed()
return 0
- elif opts.subparser_name == 'jupyter':
- with tempfile.NamedTemporaryFile() as fd:
- fd.write(notebook_kern.replace("$STORAGE", opts.storage_dir))
- subprocess.call("jupyter notebook ", shell=True)
- return 0
-
+ # elif opts.subparser_name == 'jupyter':
+ # with tempfile.NamedTemporaryFile() as fd:
+ # fd.write(notebook_kern.replace("$STORAGE", opts.storage_dir))
+ # subprocess.call("jupyter notebook ", shell=True)
+ # return 0
+ else:
+ print("Subparser {!r} is not supported".format(opts.subparser_name))
+ return 1
report_stages = [] # type: List[Stage]
if not getattr(opts, "no_report", False):
diff --git a/wally/node.py b/wally/node.py
index 38342c6..57ca701 100644
--- a/wally/node.py
+++ b/wally/node.py
@@ -163,11 +163,15 @@
self.conn = conn
def __str__(self) -> str:
- return "Node({!r})".format(self.info.node_id())
+ return "Node({!r})".format(self.info)
def __repr__(self) -> str:
return str(self)
+ @property
+ def node_id(self) -> str:
+ return self.info.node_id
+
def get_file_content(self, path: str, expanduser: bool = False, compress: bool = True) -> bytes:
logger.debug("GET %s from %s", path, self.info)
if expanduser:
@@ -230,10 +234,10 @@
def disconnect(self, stop: bool = False) -> None:
if stop:
- logger.debug("Stopping RPC server on %s", self.info.node_id())
+ logger.debug("Stopping RPC server on %s", self.info)
self.conn.server.stop()
- logger.debug("Disconnecting from %s", self.info.node_id())
+ logger.debug("Disconnecting from %s", self.info)
self.conn.disconnect()
self.conn = None
diff --git a/wally/node_interfaces.py b/wally/node_interfaces.py
index 935ca41..3992048 100644
--- a/wally/node_interfaces.py
+++ b/wally/node_interfaces.py
@@ -53,7 +53,7 @@
data['ssh_creds'] = ConnCreds.fromraw(data['ssh_creds'])
data['roles'] = set(data['roles'])
- obj = cls.__new__(cls)
+ obj = cls.__new__(cls) # type: ignore
obj.__dict__.update(data)
return obj
@@ -85,6 +85,10 @@
self.disconnect()
return False
+ @property
+ def node_id(self) -> str:
+ return self.info.node_id
+
class IRPCNode(metaclass=abc.ABCMeta):
"""Remote filesystem interface"""
@@ -105,15 +109,15 @@
pass
@abc.abstractmethod
- def copy_file(self, local_path: str, remote_path: str = None) -> str:
+ def copy_file(self, local_path: str, remote_path: str = None, compress: bool = False) -> str:
pass
@abc.abstractmethod
- def get_file_content(self, path: str) -> bytes:
+ def get_file_content(self, path: str, compress: bool = False) -> bytes:
pass
@abc.abstractmethod
- def put_to_file(self, path: Optional[str], content: bytes) -> str:
+ def put_to_file(self, path: Optional[str], content: bytes, compress: bool = False) -> str:
pass
@abc.abstractmethod
diff --git a/wally/process_results.py b/wally/process_results.py
index 112826e..b2ed783 100644
--- a/wally/process_results.py
+++ b/wally/process_results.py
@@ -1,53 +1,52 @@
# put all result preprocessing here
-# selection, aggregation
-
-from io import BytesIO
-import logging
-from typing import Any
-
-from .stage import Stage, StepOrder
-from .test_run_class import TestRun
-from .statistic import calc_norm_stat_props, calc_histo_stat_props
-from .result_classes import StatProps, DataSource, TimeSeries
-from .hlstorage import ResultStorage
-from .suits.io.fio_hist import get_lat_vals, expected_lat_bins
-from .suits.io.fio import FioTest
-from .utils import StopTestError
-
-import matplotlib
-matplotlib.use('svg')
-import matplotlib.pyplot as plt
-
-
-logger = logging.getLogger("wally")
-
-
-class CalcStatisticStage(Stage):
- priority = StepOrder.TEST + 1
-
- def run(self, ctx: TestRun) -> None:
- rstorage = ResultStorage(ctx.storage)
-
- for suite in rstorage.iter_suite(FioTest.name):
- for job in rstorage.iter_job(suite):
- results = {}
- for ts in rstorage.iter_ts(suite, job):
- if ts.source.sensor == 'lat':
- if ts.second_axis_size != expected_lat_bins:
- logger.error("Sensor %s.%s on node %s has" +
- "second_axis_size=%s. Can only process sensors with second_axis_size=%s.",
- ts.source.dev, ts.source.sensor, ts.source.node_id,
- ts.second_axis_size, expected_lat_bins)
- continue
- ts.bins_edges = get_lat_vals(ts.second_axis_size)
- stat_prop = calc_histo_stat_props(ts) # type: StatProps
-
- elif ts.second_axis_size != 1:
- logger.warning("Sensor %s.%s on node %s provide 2D data with " +
- "ts.second_axis_size=%s. Can't process it.",
- ts.source.dev, ts.source.sensor, ts.source.node_id, ts.second_axis_size)
- continue
- else:
- stat_prop = calc_norm_stat_props(ts)
-
- raise StopTestError()
+# # selection, aggregation
+#
+# from io import BytesIO
+# import logging
+# from typing import Any
+#
+# from .stage import Stage, StepOrder
+# from .test_run_class import TestRun
+# from .statistic import calc_norm_stat_props, calc_histo_stat_props
+# from .result_classes import StatProps, DataSource, TimeSeries
+# from .hlstorage import ResultStorage
+# from .suits.io.fio_hist import get_lat_vals, expected_lat_bins
+# from .suits.io.fio import FioTest
+# from .utils import StopTestError
+#
+# import matplotlib
+# matplotlib.use('svg')
+# import matplotlib.pyplot as plt
+#
+#
+# logger = logging.getLogger("wally")
+#
+#
+# class CalcStatisticStage(Stage):
+# priority = StepOrder.TEST + 1
+#
+# def run(self, ctx: TestRun) -> None:
+# rstorage = ResultStorage(ctx.storage)
+#
+# for suite in rstorage.iter_suite(FioTest.name):
+# for job in rstorage.iter_job(suite):
+# for ts in rstorage.iter_ts(suite, job):
+# if ts.source.sensor == 'lat':
+# if ts.data.shape[1] != expected_lat_bins:
+# logger.error("Sensor %s.%s on node %s has" +
+# "shape=%s. Can only process sensors with shape=[X,%s].",
+# ts.source.dev, ts.source.sensor, ts.source.node_id,
+# ts.data.shape, expected_lat_bins)
+# continue
+#
+# ts.bins_edges = get_lat_vals(ts.data.shape[1])
+# stat_prop = calc_histo_stat_props(ts) # type: StatProps
+#
+# elif len(ts.data.shape) != 1:
+# logger.warning("Sensor %s.%s on node %s provide 2+D data. Can't process it.",
+# ts.source.dev, ts.source.sensor, ts.source.node_id)
+# continue
+# else:
+# stat_prop = calc_norm_stat_props(ts)
+#
+# raise StopTestError()
diff --git a/wally/report.py b/wally/report.py
index f8d8c5a..0b0540e 100644
--- a/wally/report.py
+++ b/wally/report.py
@@ -1,36 +1,32 @@
import os
-import re
import abc
-import bisect
import logging
from io import BytesIO
from functools import wraps
-from typing import Dict, Any, Iterator, Tuple, cast, List, Callable
+from typing import Dict, Any, Iterator, Tuple, cast, List, Callable, Set, Optional
from collections import defaultdict
import numpy
-import matplotlib
-# have to be before pyplot import to avoid tkinter(default graph frontend) import error
-matplotlib.use('svg')
-import matplotlib.pyplot as plt
import scipy.stats
+import matplotlib.pyplot as plt
import wally
from . import html
-from .utils import b2ssize
from .stage import Stage, StepOrder
from .test_run_class import TestRun
from .hlstorage import ResultStorage
from .node_interfaces import NodeInfo
-from .storage import Storage
-from .statistic import calc_norm_stat_props, calc_histo_stat_props
-from .result_classes import (StatProps, DataSource, TimeSeries, TestSuiteConfig,
- NormStatProps, HistoStatProps, TestJobConfig)
+from .utils import b2ssize, b2ssize_10, STORAGE_ROLES
+from .statistic import (calc_norm_stat_props, calc_histo_stat_props, moving_average, moving_dev,
+ hist_outliers_perc, ts_hist_outliers_perc, find_ouliers_ts, approximate_curve,
+ rebin_histogram)
+from .result_classes import (StatProps, DataSource, TimeSeries, NormStatProps, HistoStatProps, SuiteConfig,
+ IResultStorage)
from .suits.io.fio_hist import get_lat_vals, expected_lat_bins
from .suits.io.fio import FioTest, FioJobConfig
-from .suits.io.fio_task_parser import FioTestSumm
-from .statistic import approximate_curve, average, dev
+from .suits.io.fio_job import FioJobParams
+from .suits.job import JobConfig
logger = logging.getLogger("wally")
@@ -48,34 +44,61 @@
# ---------------- PROFILES ------------------------------------------------------------------------------------------
+# this is default values, real values is loaded from config
+
class ColorProfile:
primary_color = 'b'
suppl_color1 = 'teal'
suppl_color2 = 'magenta'
+ suppl_color3 = 'orange'
box_color = 'y'
+ err_color = 'red'
noise_alpha = 0.3
subinfo_alpha = 0.7
+ imshow_colormap = None # type: str
+
class StyleProfile:
grid = True
tide_layout = True
hist_boxes = 10
+ hist_lat_boxes = 25
+ hm_hist_bins_count = 25
min_points_for_dev = 5
dev_range_x = 2.0
dev_perc = 95
- avg_range = 20
+ point_shape = 'o'
+ err_point_shape = '*'
- curve_approx_level = 5
+ avg_range = 20
+ approx_average = True
+
+ curve_approx_level = 6
curve_approx_points = 100
assert avg_range >= min_points_for_dev
+ # figure size in inches
+ figsize = (10, 6)
+
extra_io_spine = True
legend_for_eng = True
+ heatmap_interpolation = '1d'
+ heatmap_interpolation_points = 300
+ outliers_q_nd = 3.0
+ outliers_hide_q_nd = 4.0
+ outliers_lat = (0.01, 0.995)
+
+ violin_instead_of_box = True
+ violin_point_count = 30000
+
+ heatmap_colorbar = False
+
+ min_iops_vs_qd_jobs = 3
units = {
'bw': ("MiBps", MiB2KiB, "bandwith"),
@@ -126,36 +149,10 @@
# -------------- AGGREGATION AND STAT FUNCTIONS ----------------------------------------------------------------------
-rexpr = {
- 'sensor': r'(?P<sensor>[-a-z]+)',
- 'dev': r'(?P<dev>[^.]+)',
- 'metric': r'(?P<metric>[a-z_]+)',
- 'node': r'(?P<node>\d+\.\d+\.\d+\.\d+:\d+)',
-}
-def iter_sensors(storage: Storage, node: str = None, sensor: str = None, dev: str = None, metric: str = None):
- if node is None:
- node = rexpr['node']
- if sensor is None:
- sensor = rexpr['sensor']
- if dev is None:
- dev = rexpr['dev']
- if metric is None:
- metric = rexpr['metric']
-
- rr = r"{}_{}\.{}\.{}$".format(node, sensor, dev, metric)
- sensor_name_re = re.compile(rr)
-
- for is_file, sensor_data_name in storage.list("sensors"):
- if is_file:
- rr = sensor_name_re.match(sensor_data_name)
- if rr:
- yield 'sensors/' + sensor_data_name, rr.groupdict()
-
-
-def make_iosum(rstorage: ResultStorage, suite: TestSuiteConfig, job: FioJobConfig) -> IOSummary:
+def make_iosum(rstorage: ResultStorage, suite: SuiteConfig, job: FioJobConfig) -> IOSummary:
lat = get_aggregated(rstorage, suite, job, "lat")
- bins_edges = numpy.array(get_lat_vals(lat.second_axis_size), dtype='float32') / 1000
+ bins_edges = numpy.array(get_lat_vals(lat.data.shape[1]), dtype='float32') / 1000
io = get_aggregated(rstorage, suite, job, "bw")
return IOSummary(job.qd,
@@ -191,34 +188,38 @@
# yield suite, fjob
-def get_aggregated(rstorage: ResultStorage, suite: TestSuiteConfig, job: FioJobConfig, sensor: str) -> TimeSeries:
- tss = list(rstorage.iter_ts(suite, job, sensor=sensor))
+AGG_TAG = 'ALL'
+
+
+def get_aggregated(rstorage: ResultStorage, suite: SuiteConfig, job: FioJobConfig, metric: str) -> TimeSeries:
+ tss = list(rstorage.iter_ts(suite, job, sensor=metric))
ds = DataSource(suite_id=suite.storage_id,
job_id=job.storage_id,
- node_id="__all__",
- dev='fio',
- sensor=sensor,
- tag=None)
+ node_id=AGG_TAG,
+ sensor='fio',
+ dev=AGG_TAG,
+ metric=metric,
+ tag='csv')
- agg_ts = TimeSeries(sensor,
+ agg_ts = TimeSeries(metric,
raw=None,
source=ds,
data=numpy.zeros(tss[0].data.shape, dtype=tss[0].data.dtype),
times=tss[0].times.copy(),
- second_axis_size=tss[0].second_axis_size)
+ units=tss[0].units)
for ts in tss:
- if sensor == 'lat' and ts.second_axis_size != expected_lat_bins:
+ if metric == 'lat' and (len(ts.data.shape) != 2 or ts.data.shape[1] != expected_lat_bins):
logger.error("Sensor %s.%s on node %s has" +
- "second_axis_size=%s. Can only process sensors with second_axis_size=%s.",
+ "shape=%s. Can only process sensors with shape=[X, %s].",
ts.source.dev, ts.source.sensor, ts.source.node_id,
- ts.second_axis_size, expected_lat_bins)
+ ts.data.shape, expected_lat_bins)
continue
- if sensor != 'lat' and ts.second_axis_size != 1:
+ if metric != 'lat' and len(ts.data.shape) != 1:
logger.error("Sensor %s.%s on node %s has" +
- "second_axis_size=%s. Can only process sensors with second_axis_size=1.",
- ts.source.dev, ts.source.sensor, ts.source.node_id, ts.second_axis_size)
+ "shape=%s. Can only process 1D sensors.",
+ ts.source.dev, ts.source.sensor, ts.source.node_id, ts.data.shape)
continue
# TODO: match times on different ts
@@ -227,24 +228,135 @@
return agg_ts
+def is_sensor_numarray(sensor: str, metric: str) -> bool:
+ """Returns True if sensor provides one-dimension array of numeric values. One number per one measurement."""
+ return True
+
+
+LEVEL_SENSORS = {("block-io", "io_queue"),
+ ("system-cpu", "procs_blocked"),
+ ("system-cpu", "procs_queue")}
+
+
+def is_level_sensor(sensor: str, metric: str) -> bool:
+ """Returns True if sensor measure level of any kind, E.g. queue depth."""
+ return (sensor, metric) in LEVEL_SENSORS
+
+
+def is_delta_sensor(sensor: str, metric: str) -> bool:
+ """Returns True if sensor provides deltas for cumulative value. E.g. io completed in given period"""
+ return not is_level_sensor(sensor, metric)
+
+
+def get_sensor_for_time_range(storage: IResultStorage,
+ node_id: str,
+ sensor: str,
+ dev: str,
+ metric: str,
+ time_range: Tuple[int, int]) -> numpy.array:
+ """Return sensor values for given node for given period. Return per second estimated values array
+
+ Raise an error if required range is not full covered by data in storage.
+ First it finds range of results from sensor, which fully covers requested range.
+ ...."""
+
+ ds = DataSource(node_id=node_id, sensor=sensor, dev=dev, metric=metric)
+ sensor_data = storage.load_sensor(ds)
+ assert sensor_data.time_units == 'us'
+
+ # collected_at is array of pairs (collection_started_at, collection_finished_at)
+ # extract start time from each pair
+ collection_start_at = sensor_data.times[::2] # type: numpy.array
+
+ MICRO = 1000000
+
+ # convert seconds to us
+ begin = time_range[0] * MICRO
+ end = time_range[1] * MICRO
+
+ if begin < collection_start_at[0] or end > collection_start_at[-1] or end <= begin:
+ raise AssertionError(("Incorrect data for get_sensor - time_range={!r}, collected_at=[{}, ..., {}]," +
+ "sensor = {}_{}.{}.{}").format(time_range,
+ sensor_data.times[0] // MICRO,
+ sensor_data.times[-1] // MICRO,
+ node_id, sensor, dev, metric))
+
+ pos1, pos2 = numpy.searchsorted(collection_start_at, (begin, end))
+
+ # current real data time chunk begin time
+ edge_it = iter(collection_start_at[pos1 - 1: pos2 + 1])
+
+ # current real data value
+ val_it = iter(sensor_data.data[pos1 - 1: pos2 + 1])
+
+ # result array, cumulative value per second
+ result = numpy.zeros((end - begin) // MICRO)
+ idx = 0
+ curr_summ = 0
+
+ # end of current time slot
+ results_cell_ends = begin + MICRO
+
+ # hack to unify looping
+ real_data_end = next(edge_it)
+ while results_cell_ends <= end:
+ real_data_start = real_data_end
+ real_data_end = next(edge_it)
+ real_val_left = next(val_it)
+
+ # real data "speed" for interval [real_data_start, real_data_end]
+ real_val_ps = float(real_val_left) / (real_data_end - real_data_start)
+
+ while real_data_end >= results_cell_ends and results_cell_ends <= end:
+ # part of current real value, which is fit into current result cell
+ curr_real_chunk = int((results_cell_ends - real_data_start) * real_val_ps)
+
+ # calculate rest of real data for next result cell
+ real_val_left -= curr_real_chunk
+ result[idx] = curr_summ + curr_real_chunk
+ idx += 1
+ curr_summ = 0
+
+ # adjust real data start time
+ real_data_start = results_cell_ends
+ results_cell_ends += MICRO
+
+ # don't lost any real data
+ curr_summ += real_val_left
+
+ return result
+
+
# -------------- PLOT HELPERS FUNCTIONS ------------------------------------------------------------------------------
-def get_emb_data_svg(plt: Any) -> bytes:
+def get_emb_data_svg(plt: Any, format: str = 'svg') -> bytes:
bio = BytesIO()
- plt.savefig(bio, format='svg')
- img_start = "<!-- Created with matplotlib (http://matplotlib.org/) -->"
- return bio.getvalue().decode("utf8").split(img_start, 1)[1].encode("utf8")
+ if format in ('png', 'jpg'):
+ plt.savefig(bio, format=format)
+ return bio.getvalue()
+ elif format == 'svg':
+ plt.savefig(bio, format='svg')
+ img_start = "<!-- Created with matplotlib (http://matplotlib.org/) -->"
+ return bio.getvalue().decode("utf8").split(img_start, 1)[1].encode("utf8")
def provide_plot(func: Callable[..., None]) -> Callable[..., str]:
@wraps(func)
- def closure1(storage: ResultStorage, path: DataSource, *args, **kwargs) -> str:
+ def closure1(storage: ResultStorage,
+ path: DataSource,
+ *args, **kwargs) -> str:
fpath = storage.check_plot_file(path)
if not fpath:
+ format = path.tag.split(".")[-1]
+
+ plt.figure(figsize=StyleProfile.figsize)
+ plt.subplots_adjust(right=0.66)
+
func(*args, **kwargs)
- fpath = storage.put_plot_file(get_emb_data_svg(plt), path)
+ fpath = storage.put_plot_file(get_emb_data_svg(plt, format=format), path)
+ logger.debug("Plot %s saved to %r", path, fpath)
plt.clf()
- logger.debug("Save plot for %s to %r", path, fpath)
+ plt.close('all')
return fpath
return closure1
@@ -269,11 +381,9 @@
style: Any = StyleProfile) -> None:
# TODO: unit should came from ts
- total = sum(prop.bins_populations)
- mids = prop.bins_mids
- normed_bins = [population / total for population in prop.bins_populations]
- bar_width = mids[1] - mids[0]
- plt.bar(mids - bar_width / 2, normed_bins, color=colors.box_color, width=bar_width, label="Real data")
+ normed_bins = prop.bins_populations / prop.bins_populations.sum()
+ bar_width = prop.bins_edges[1] - prop.bins_edges[0]
+ plt.bar(prop.bins_edges, normed_bins, color=colors.box_color, width=bar_width, label="Real data")
plt.xlabel(units)
plt.ylabel("Value probability")
@@ -284,18 +394,20 @@
nprop = cast(NormStatProps, prop)
stats = scipy.stats.norm(nprop.average, nprop.deviation)
- # xpoints = numpy.linspace(mids[0], mids[-1], style.curve_approx_points)
- # ypoints = stats.pdf(xpoints) / style.curve_approx_points
+ new_edges, step = numpy.linspace(prop.bins_edges[0], prop.bins_edges[-1],
+ len(prop.bins_edges) * 10, retstep=True)
- edges, step = numpy.linspace(mids[0], mids[-1], len(mids) * 10, retstep=True)
-
- ypoints = stats.cdf(edges) * 11
+ ypoints = stats.cdf(new_edges) * 11
ypoints = [next - prev for (next, prev) in zip(ypoints[1:], ypoints[:-1])]
- xpoints = (edges[1:] + edges[:-1]) / 2
+ xpoints = (new_edges[1:] + new_edges[:-1]) / 2
- plt.plot(xpoints, ypoints, color=colors.primary_color, label="Expected from\nnormal distribution")
+ plt.plot(xpoints, ypoints, color=colors.primary_color, label="Expected from\nnormal\ndistribution")
dist_plotted = True
+ plt.gca().set_xlim(left=prop.bins_edges[0])
+ if prop.log_bins:
+ plt.xscale('log')
+
apply_style(style, eng=True, no_legend=not dist_plotted)
@@ -308,58 +420,67 @@
min_time = min(ts.times)
# /1000 is us to ms conversion
- time_points = [(val_time - min_time) / 1000 for val_time in ts.times]
+ time_points = numpy.array([(val_time - min_time) / 1000 for val_time in ts.times])
+
+ outliers_idxs = find_ouliers_ts(ts.data, cut_range=style.outliers_q_nd)
+ outliers_4q_idxs = find_ouliers_ts(ts.data, cut_range=style.outliers_hide_q_nd)
+ normal_idxs = numpy.logical_not(outliers_idxs)
+ outliers_idxs = outliers_idxs & numpy.logical_not(outliers_4q_idxs)
+ hidden_outliers_count = numpy.count_nonzero(outliers_4q_idxs)
+
+ data = ts.data[normal_idxs]
+ data_times = time_points[normal_idxs]
+ outliers = ts.data[outliers_idxs]
+ outliers_times = time_points[outliers_idxs]
alpha = colors.noise_alpha if plot_avg_dev else 1.0
- plt.plot(time_points, ts.data, "o", color=colors.primary_color, alpha=alpha, label="Data")
+ plt.plot(data_times, data, style.point_shape,
+ color=colors.primary_color, alpha=alpha, label="Data")
+ plt.plot(outliers_times, outliers, style.err_point_shape,
+ color=colors.err_color, label="Outliers")
- if plot_avg_dev:
- avg_vals = []
- low_vals_dev = []
- hight_vals_dev = []
- avg_times = []
- dev_times = []
+ has_negative_dev = False
+ plus_minus = "\xb1"
- start = (len(ts.data) % style.avg_range) // 2
- points = list(range(start, len(ts.data) + 1, style.avg_range))
+ if plot_avg_dev and len(data) < style.avg_range * 2:
+ logger.warning("Array %r to small to plot average over %s points", title, style.avg_range)
+ elif plot_avg_dev:
+ avg_vals = moving_average(data, style.avg_range)
+ dev_vals = moving_dev(data, style.avg_range)
+ avg_times = moving_average(data_times, style.avg_range)
- for begin, end in zip(points[:-1], points[1:]):
- vals = ts.data[begin: end]
+ if style.approx_average:
+ avg_vals = approximate_curve(avg_times, avg_vals, avg_times, style.curve_approx_level)
+ dev_vals = approximate_curve(avg_times, dev_vals, avg_times, style.curve_approx_level)
- cavg = average(vals)
- cdev = dev(vals)
- tavg = average(time_points[begin: end])
+ plt.plot(avg_times, avg_vals, c=colors.suppl_color1, label="Average")
- avg_vals.append(cavg)
- avg_times.append(tavg)
-
- low_vals_dev.append(cavg - style.dev_range_x * cdev)
- hight_vals_dev.append(cavg + style.dev_range_x * cdev)
- dev_times.append(tavg)
-
- avg_timepoints = cast(List[float], numpy.linspace(avg_times[0], avg_times[-1], style.curve_approx_points))
-
- low_vals_dev = approximate_curve(dev_times, low_vals_dev, avg_timepoints, style.curve_approx_level)
- hight_vals_dev = approximate_curve(dev_times, hight_vals_dev, avg_timepoints, style.curve_approx_level)
- new_vals_avg = approximate_curve(avg_times, avg_vals, avg_timepoints, style.curve_approx_level)
-
- plt.plot(avg_timepoints, new_vals_avg, c=colors.suppl_color1,
- label="Average\nover {}s".format(style.avg_range))
- plt.plot(avg_timepoints, low_vals_dev, c=colors.suppl_color2,
- label="Avg \xB1 {} * stdev\nover {}s".format(style.dev_range_x, style.avg_range))
- plt.plot(avg_timepoints, hight_vals_dev, c=colors.suppl_color2)
+ low_vals_dev = avg_vals - dev_vals * style.dev_range_x
+ hight_vals_dev = avg_vals + dev_vals * style.dev_range_x
+ if style.dev_range_x - int(style.dev_range_x) < 0.01:
+ plt.plot(avg_times, low_vals_dev, c=colors.suppl_color2,
+ label="{}{}*stdev".format(plus_minus, int(style.dev_range_x)))
+ else:
+ plt.plot(avg_times, low_vals_dev, c=colors.suppl_color2,
+ label="{}{}*stdev".format(plus_minus, style.dev_range_x))
+ plt.plot(avg_times, hight_vals_dev, c=colors.suppl_color2)
+ has_negative_dev = low_vals_dev.min() < 0
plt.xlim(-5, max(time_points) + 5)
-
plt.xlabel("Time, seconds from test begin")
- plt.ylabel("{}. Average and \xB1stddev over {} points".format(units, style.avg_range))
+ plt.ylabel("{}. Average and {}stddev over {} points".format(units, plus_minus, style.avg_range))
plt.title(title)
+
+ if has_negative_dev:
+ plt.gca().set_ylim(bottom=0)
+
apply_style(style, eng=True)
@provide_plot
def plot_lat_over_time(title: str, ts: TimeSeries, bins_vals: List[int], samples: int = 5,
- colors: Any = ColorProfile, style: Any = StyleProfile) -> None:
+ colors: Any = ColorProfile,
+ style: Any = StyleProfile) -> None:
min_time = min(ts.times)
times = [int(tm - min_time + 500) // 1000 for tm in ts.times]
@@ -368,42 +489,59 @@
points = [times[int(i * step + 0.5)] for i in range(samples)]
points.append(times[-1])
bounds = list(zip(points[:-1], points[1:]))
- data = numpy.array(ts.data, dtype='int32')
- data.shape = [len(ts.data) // ts.second_axis_size, ts.second_axis_size] # type: ignore
agg_data = []
positions = []
labels = []
- min_idxs = []
- max_idxs = []
-
for begin, end in bounds:
- agg_hist = numpy.sum(data[begin:end], axis=0)
+ agg_hist = ts.data[begin:end].sum(axis=0)
+
+ if style.violin_instead_of_box:
+ # cut outliers
+ idx1, idx2 = hist_outliers_perc(agg_hist, style.outliers_lat)
+ agg_hist = agg_hist[idx1:idx2]
+ curr_bins_vals = bins_vals[idx1:idx2]
+
+ correct_coef = style.violin_point_count / sum(agg_hist)
+ if correct_coef > 1:
+ correct_coef = 1
+ else:
+ curr_bins_vals = bins_vals
+ correct_coef = 1
vals = numpy.empty(shape=(numpy.sum(agg_hist),), dtype='float32')
cidx = 0
- non_zero = agg_hist.nonzero()[0]
- min_idxs.append(non_zero[0])
- max_idxs.append(non_zero[-1])
+ non_zero, = agg_hist.nonzero()
for pos in non_zero:
- vals[cidx:cidx + agg_hist[pos]] = bins_vals[pos]
- cidx += agg_hist[pos]
+ count = int(agg_hist[pos] * correct_coef + 0.5)
- agg_data.append(vals)
+ if count != 0:
+ vals[cidx: cidx + count] = curr_bins_vals[pos]
+ cidx += count
+
+ agg_data.append(vals[:cidx])
positions.append((end + begin) / 2)
labels.append(str((end + begin) // 2))
- min_y = bins_vals[min(min_idxs)]
- max_y = bins_vals[max(max_idxs)]
+ if style.violin_instead_of_box:
+ patches = plt.violinplot(agg_data,
+ positions=positions,
+ showmeans=True,
+ showmedians=True,
+ widths=step / 2)
- min_y -= (max_y - min_y) * 0.05
- max_y += (max_y - min_y) * 0.05
+ patches['cmeans'].set_color("blue")
+ patches['cmedians'].set_color("green")
+ if style.legend_for_eng:
+ legend_location = "center left"
+ legend_bbox_to_anchor = (1.03, 0.81)
+ plt.legend([patches['cmeans'], patches['cmedians']], ["mean", "median"],
+ loc=legend_location, bbox_to_anchor=legend_bbox_to_anchor)
+ else:
+ plt.boxplot(agg_data, 0, '', positions=positions, labels=labels, widths=step / 4)
- # plot box size adjust (only plot, not spines and legend)
- plt.boxplot(agg_data, 0, '', positions=positions, labels=labels, widths=step / 4)
plt.xlim(min(times), max(times))
- plt.ylim(min_y, max_y)
plt.xlabel("Time, seconds from test begin, sampled for ~{} seconds".format(int(step)))
plt.ylabel("Latency, ms")
plt.title(title)
@@ -411,19 +549,74 @@
@provide_plot
-def plot_heatmap(title: str, ts: TimeSeries, bins_vals: List[int], samples: int = 5,
- colors: Any = ColorProfile, style: Any = StyleProfile) -> None:
- hist_bins_count = 20
- bin_top = [100 * 2 ** i for i in range(20)]
- bin_ranges = [[0, 0]]
- cborder_it = iter(bin_top)
- cborder = next(cborder_it)
- for bin_val in bins_vals:
- if bin_val < cborder:
- bin_ranges
+def plot_heatmap(title: str,
+ ts: TimeSeries,
+ bins_vals: List[int],
+ colors: Any = ColorProfile,
+ style: Any = StyleProfile) -> None:
- # bins: [100us, 200us, ...., 104s]
- # msp origin bins ranges to heatmap bins
+ assert len(ts.data.shape) == 2
+ assert ts.data.shape[1] == len(bins_vals)
+
+ total_hist = ts.data.sum(axis=0)
+
+ # idx1, idx2 = hist_outliers_perc(total_hist, style.outliers_lat)
+ idx1, idx2 = ts_hist_outliers_perc(ts.data, bounds_perc=style.outliers_lat)
+
+ # don't cut too many bins
+ min_bins_left = style.hm_hist_bins_count
+ if idx2 - idx1 < min_bins_left:
+ missed = min_bins_left - (idx2 - idx1) // 2
+ idx2 = min(len(total_hist), idx2 + missed)
+ idx1 = max(0, idx1 - missed)
+
+ data = ts.data[:, idx1:idx2]
+ bins_vals = bins_vals[idx1:idx2]
+
+ # don't using rebin_histogram here, as we need apply same bins for many arrays
+ step = (bins_vals[-1] - bins_vals[0]) / style.hm_hist_bins_count
+ new_bins_edges = numpy.arange(style.hm_hist_bins_count) * step + bins_vals[0]
+ bin_mapping = numpy.clip(numpy.searchsorted(new_bins_edges, bins_vals) - 1, 0, len(new_bins_edges) - 1)
+
+ # map origin bins ranges to heatmap bins, iterate over rows
+ cmap = []
+ for line in data:
+ curr_bins = [0] * style.hm_hist_bins_count
+ for idx, count in zip(bin_mapping, line):
+ curr_bins[idx] += count
+ cmap.append(curr_bins)
+ ncmap = numpy.array(cmap)
+
+ xmin = 0
+ xmax = (ts.times[-1] - ts.times[0]) / 1000 + 1
+ ymin = new_bins_edges[0]
+ ymax = new_bins_edges[-1]
+
+ fig, ax = plt.subplots(figsize=style.figsize)
+
+ if style.heatmap_interpolation == '1d':
+ interpolation = 'none'
+ res = []
+ for column in ncmap:
+ new_x = numpy.linspace(0, len(column), style.heatmap_interpolation_points)
+ old_x = numpy.arange(len(column)) + 0.5
+ new_vals = numpy.interp(new_x, old_x, column)
+ res.append(new_vals)
+ ncmap = numpy.array(res)
+ else:
+ interpolation = style.heatmap_interpolation
+
+ ax.imshow(ncmap[:,::-1].T,
+ interpolation=interpolation,
+ extent=(xmin, xmax, ymin, ymax),
+ cmap=colors.imshow_colormap)
+
+ ax.set_aspect((xmax - xmin) / (ymax - ymin) * (6 / 9))
+ ax.set_ylabel("Latency, ms")
+ ax.set_xlabel("Test time, s")
+
+ plt.title(title)
+
@provide_plot
def io_chart(title: str,
@@ -441,9 +634,6 @@
# offset from center of bar to deviation/confidence range indicator
err_x_offset = 0.05
- # figure size in inches
- figsize = (12, 6)
-
# extra space on top and bottom, comparing to maximal tight layout
extra_y_space = 0.05
@@ -472,7 +662,7 @@
# gs = gridspec.GridSpec(1, 3, width_ratios=[1, 4, 1])
# p1 = plt.subplot(gs[1])
- fig, p1 = plt.subplots(figsize=figsize)
+ fig, p1 = plt.subplots(figsize=StyleProfile.figsize)
# plot IOPS/BW bars
if block_size >= LARGE_BLOCKS:
@@ -560,7 +750,9 @@
# legend box
handles2, labels2 = p2.get_legend_handles_labels()
- plt.legend(handles1 + handles2, labels1 + labels2, loc=legend_location, bbox_to_anchor=legend_bbox_to_anchor)
+ plt.legend(handles1 + handles2, labels1 + labels2,
+ loc=legend_location,
+ bbox_to_anchor=legend_bbox_to_anchor)
# adjust central box size to fit legend
plt.subplots_adjust(**plot_box_adjust)
@@ -574,11 +766,35 @@
data = None # type: str
js_links = [] # type: List[str]
css_links = [] # type: List[str]
+ order_attr = None # type: Any
+
+ def __init__(self, data: str, order_attr: Any = None) -> None:
+ self.data = data
+ self.order_attr = order_attr
+
+ def __eq__(self, o: object) -> bool:
+ return o.order_attr == self.order_attr # type: ignore
+
+ def __lt__(self, o: object) -> bool:
+ return o.order_attr > self.order_attr # type: ignore
+
+
+class Table:
+ def __init__(self, header: List[str]) -> None:
+ self.header = header
+ self.data = []
+
+ def add_line(self, values: List[str]) -> None:
+ self.data.append(values)
+
+ def html(self):
+ return html.table("", self.header, self.data)
class Menu1st:
engineering = "Engineering"
summary = "Summary"
+ per_job = "Per Job"
class Menu2ndEng:
@@ -591,56 +807,66 @@
io_lat_qd = "IO & Lat vs QD"
-menu_1st_order = [Menu1st.summary, Menu1st.engineering]
+menu_1st_order = [Menu1st.summary, Menu1st.engineering, Menu1st.per_job]
# -------------------- REPORTS --------------------------------------------------------------------------------------
class Reporter(metaclass=abc.ABCMeta):
+ suite_types = set() # type: Set[str]
+
@abc.abstractmethod
- def get_divs(self, suite: TestSuiteConfig, storage: ResultStorage) -> Iterator[Tuple[str, str, HTMLBlock]]:
+ def get_divs(self, suite: SuiteConfig, storage: ResultStorage) -> Iterator[Tuple[str, str, HTMLBlock]]:
+ pass
+
+
+class JobReporter(metaclass=abc.ABCMeta):
+ suite_type = set() # type: Set[str]
+
+ @abc.abstractmethod
+ def get_divs(self,
+ suite: SuiteConfig,
+ job: JobConfig,
+ storage: ResultStorage) -> Iterator[Tuple[str, str, HTMLBlock]]:
pass
# Main performance report
class PerformanceSummary(Reporter):
- """Creates graph, which show how IOPS and Latency depend on QD"""
+ """Aggregated summary fro storage"""
# Main performance report
class IO_QD(Reporter):
"""Creates graph, which show how IOPS and Latency depend on QD"""
- def get_divs(self, suite: TestSuiteConfig, rstorage: ResultStorage) -> Iterator[Tuple[str, str, HTMLBlock]]:
- ts_map = {} # type: Dict[FioTestSumm, List[IOSummary]]
- str_summary = {} # type: Dict[FioTestSumm, List[IOSummary]]
+ suite_types = {'fio'}
+
+ def get_divs(self, suite: SuiteConfig, rstorage: ResultStorage) -> Iterator[Tuple[str, str, HTMLBlock]]:
+ ts_map = defaultdict(list) # type: Dict[FioJobParams, List[Tuple[SuiteConfig, FioJobConfig]]]
+ str_summary = {} # type: Dict[FioJobParams, List[IOSummary]]
for job in rstorage.iter_job(suite):
fjob = cast(FioJobConfig, job)
- tpl_no_qd = fjob.characterized_tuple_no_qd()
- io_summ = make_iosum(rstorage, suite, job)
+ fjob_no_qd = cast(FioJobParams, fjob.params.copy(qd=None))
+ str_summary[fjob_no_qd] = (fjob_no_qd.summary, fjob_no_qd.long_summary)
+ ts_map[fjob_no_qd].append((suite, fjob))
- if tpl_no_qd not in ts_map:
- ts_map[tpl_no_qd] = [io_summ]
- str_summary[tpl_no_qd] = (fjob.summary_no_qd(), fjob.long_summary_no_qd())
- else:
- ts_map[tpl_no_qd].append(io_summ)
+ for tpl, suites_jobs in ts_map.items():
+ if len(suites_jobs) > StyleProfile.min_iops_vs_qd_jobs:
+ iosums = [make_iosum(rstorage, suite, job) for suite, job in suites_jobs]
+ iosums.sort(key=lambda x: x.qd)
+ summary, summary_long = str_summary[tpl]
+ ds = DataSource(suite_id=suite.storage_id,
+ job_id=summary,
+ node_id=AGG_TAG,
+ sensor="fio",
+ dev=AGG_TAG,
+ metric="io_over_qd",
+ tag="svg")
- for tpl, iosums in ts_map.items():
- iosums.sort(key=lambda x: x.qd)
- summary, summary_long = str_summary[tlp]
-
- ds = DataSource(suite_id=suite.storage_id,
- job_id="io_over_qd_".format(summary),
- node_id="__all__",
- dev='fio',
- sensor="io_over_qd",
- tag="svg")
-
- title = "IOPS, BW, Lat vs. QD.\n" + summary_long
- fpath = io_chart(rstorage, ds, title=title, legend="IOPS/BW", iosums=iosums)
- yield Menu1st.summary, Menu2ndSumm.io_lat_qd, html.img(fpath)
- if DEBUG:
- return
+ title = "IOPS, BW, Lat vs. QD.\n" + summary_long
+ fpath = io_chart(rstorage, ds, title=title, legend="IOPS/BW", iosums=iosums) # type: str
+ yield Menu1st.summary, Menu2ndSumm.io_lat_qd, HTMLBlock(html.img(fpath))
# Linearization report
@@ -648,162 +874,234 @@
"""Creates graphs, which show how IOPS and Latency depend on block size"""
+def summ_sensors(rstorage: ResultStorage,
+ nodes: List[str],
+ sensor: str,
+ metric: str,
+ time_range: Tuple[int, int]) -> Optional[numpy.array]:
+
+ res = None # type: Optional[numpy.array]
+ for node_id in nodes:
+ for _, groups in rstorage.iter_sensors(node_id=node_id, sensor=sensor, metric=metric):
+ data = get_sensor_for_time_range(rstorage,
+ node_id=node_id,
+ sensor=sensor,
+ dev=groups['dev'],
+ metric=metric,
+ time_range=time_range)
+ if res is None:
+ res = data
+ else:
+ res += data
+ return res
+
+
# IOPS/latency distribution
-class IOHist(Reporter):
+class StatInfo(JobReporter):
+ """Statistic info for job results"""
+ suite_types = {'fio'}
+
+ def get_divs(self, suite: SuiteConfig, job: JobConfig,
+ rstorage: ResultStorage) -> Iterator[Tuple[str, str, HTMLBlock]]:
+
+ fjob = cast(FioJobConfig, job)
+ io_sum = make_iosum(rstorage, suite, fjob)
+
+ summary_data = [
+ ["Summary", job.params.long_summary],
+ ]
+
+ res = html.H2(html.center("Test summary"))
+ res += html.table("Test info", None, summary_data)
+ stat_data_headers = ["Name", "Average ~ Dev", "Conf interval", "Mediana", "Mode", "Kurt / Skew", "95%", "99%"]
+
+ KB = 1024
+ bw_data = ["Bandwidth",
+ "{}Bps ~ {}Bps".format(b2ssize(io_sum.bw.average * KB), b2ssize(io_sum.bw.deviation * KB)),
+ b2ssize(io_sum.bw.confidence * KB) + "Bps",
+ b2ssize(io_sum.bw.perc_50 * KB) + "Bps",
+ "-",
+ "{:.2f} / {:.2f}".format(io_sum.bw.kurt, io_sum.bw.skew),
+ b2ssize(io_sum.bw.perc_5 * KB) + "Bps",
+ b2ssize(io_sum.bw.perc_1 * KB) + "Bps"]
+
+ iops_data = ["IOPS",
+ "{}IOPS ~ {}IOPS".format(b2ssize_10(io_sum.bw.average / fjob.bsize),
+ b2ssize_10(io_sum.bw.deviation / fjob.bsize)),
+ b2ssize_10(io_sum.bw.confidence / fjob.bsize) + "IOPS",
+ b2ssize_10(io_sum.bw.perc_50 / fjob.bsize) + "IOPS",
+ "-",
+ "{:.2f} / {:.2f}".format(io_sum.bw.kurt, io_sum.bw.skew),
+ b2ssize_10(io_sum.bw.perc_5 / fjob.bsize) + "IOPS",
+ b2ssize_10(io_sum.bw.perc_1 / fjob.bsize) + "IOPS"]
+
+ MICRO = 1000000
+ # latency
+ lat_data = ["Latency",
+ "-",
+ "-",
+ b2ssize_10(io_sum.bw.perc_50 / MICRO) + "s",
+ "-",
+ "-",
+ b2ssize_10(io_sum.bw.perc_95 / MICRO) + "s",
+ b2ssize_10(io_sum.bw.perc_99 / MICRO) + "s"]
+
+ # sensor usage
+ stat_data = [iops_data, bw_data, lat_data]
+ res += html.table("Load stats info", stat_data_headers, stat_data)
+
+ resource_headers = ["Resource", "Usage count", "Proportional to work done"]
+
+ io_transfered = io_sum.bw.data.sum() * KB
+ resource_data = [
+ ["IO made", b2ssize_10(io_transfered / KB / fjob.bsize) + "OP", "-"],
+ ["Data transfered", b2ssize(io_transfered) + "B", "-"]
+ ]
+
+
+ storage = rstorage.storage
+ nodes = storage.load_list(NodeInfo, 'all_nodes') # type: List[NodeInfo]
+
+ storage_nodes = [node.node_id for node in nodes if node.roles.intersection(STORAGE_ROLES)]
+ test_nodes = [node.node_id for node in nodes if "testnode" in node.roles]
+
+ trange = [job.reliable_info_range[0] / 1000, job.reliable_info_range[1] / 1000]
+ ops_done = io_transfered / fjob.bsize / KB
+
+ all_metrics = [
+ ("Test nodes net send", 'net-io', 'send_bytes', b2ssize, test_nodes, "B", io_transfered),
+ ("Test nodes net recv", 'net-io', 'recv_bytes', b2ssize, test_nodes, "B", io_transfered),
+
+ ("Test nodes disk write", 'block-io', 'sectors_written', b2ssize, test_nodes, "B", io_transfered),
+ ("Test nodes disk read", 'block-io', 'sectors_read', b2ssize, test_nodes, "B", io_transfered),
+ ("Test nodes writes", 'block-io', 'writes_completed', b2ssize_10, test_nodes, "OP", ops_done),
+ ("Test nodes reads", 'block-io', 'reads_completed', b2ssize_10, test_nodes, "OP", ops_done),
+
+ ("Storage nodes net send", 'net-io', 'send_bytes', b2ssize, storage_nodes, "B", io_transfered),
+ ("Storage nodes net recv", 'net-io', 'recv_bytes', b2ssize, storage_nodes, "B", io_transfered),
+
+ ("Storage nodes disk write", 'block-io', 'sectors_written', b2ssize, storage_nodes, "B", io_transfered),
+ ("Storage nodes disk read", 'block-io', 'sectors_read', b2ssize, storage_nodes, "B", io_transfered),
+ ("Storage nodes writes", 'block-io', 'writes_completed', b2ssize_10, storage_nodes, "OP", ops_done),
+ ("Storage nodes reads", 'block-io', 'reads_completed', b2ssize_10, storage_nodes, "OP", ops_done),
+ ]
+
+ all_agg = {}
+
+ for descr, sensor, metric, ffunc, nodes, units, denom in all_metrics:
+ if not nodes:
+ continue
+
+ res_arr = summ_sensors(rstorage, nodes=nodes, sensor=sensor, metric=metric, time_range=trange)
+ if res_arr is None:
+ continue
+
+ agg = res_arr.sum()
+ resource_data.append([descr, ffunc(agg) + units, "{:.1f}".format(agg / denom)])
+ all_agg[descr] = agg
+
+
+ cums = [
+ ("Test nodes writes", "Test nodes reads", "Total test ops", b2ssize_10, "OP", ops_done),
+ ("Storage nodes writes", "Storage nodes reads", "Total storage ops", b2ssize_10, "OP", ops_done),
+ ("Storage nodes disk write", "Storage nodes disk read", "Total storage IO size", b2ssize,
+ "B", io_transfered),
+ ("Test nodes disk write", "Test nodes disk read", "Total test nodes IO size", b2ssize, "B", io_transfered),
+ ]
+
+ for name1, name2, descr, ffunc, units, denom in cums:
+ if name1 in all_agg and name2 in all_agg:
+ agg = all_agg[name1] + all_agg[name2]
+ resource_data.append([descr, ffunc(agg) + units, "{:.1f}".format(agg / denom)])
+
+ res += html.table("Resources usage", resource_headers, resource_data)
+
+ yield Menu1st.per_job, job.summary, HTMLBlock(res)
+
+
+# IOPS/latency distribution
+class IOHist(JobReporter):
"""IOPS.latency distribution histogram"""
- def get_divs(self, suite: TestSuiteConfig, rstorage: ResultStorage) -> Iterator[Tuple[str, str, HTMLBlock]]:
- for job in rstorage.iter_job(suite):
- fjob = cast(FioJobConfig, job)
- agg_lat = get_aggregated(rstorage, suite, fjob, "lat")
- bins_edges = numpy.array(get_lat_vals(agg_lat.second_axis_size), dtype='float32') / 1000 # convert us to ms
- lat_stat_prop = calc_histo_stat_props(agg_lat, bins_edges, bins_count=StyleProfile.hist_boxes)
+ suite_types = {'fio'}
- title = "Latency distribution. " + fjob.long_summary
- units = "ms"
+ def get_divs(self,
+ suite: SuiteConfig,
+ job: JobConfig,
+ rstorage: ResultStorage) -> Iterator[Tuple[str, str, HTMLBlock]]:
- fpath = plot_hist(rstorage, agg_lat.source(tag='hist.svg'), title, units, lat_stat_prop)
- if DEBUG:
- yield Menu1st.summary, Menu2ndSumm.io_lat_qd, html.img(fpath)
- else:
- yield Menu1st.engineering, Menu2ndEng.hist, html.img(fpath)
+ fjob = cast(FioJobConfig, job)
- agg_io = get_aggregated(rstorage, suite, fjob, "bw")
+ yield Menu1st.per_job, fjob.summary, HTMLBlock(html.H2(html.center("Load histograms")))
- if fjob.bsize >= LARGE_BLOCKS:
- title = "BW distribution. " + fjob.long_summary
- units = "MiBps"
- agg_io.data /= MiB2KiB
- else:
- title = "IOPS distribution. " + fjob.long_summary
- agg_io.data /= fjob.bsize
- units = "IOPS"
+ agg_lat = get_aggregated(rstorage, suite, fjob, "lat")
+ bins_edges = numpy.array(get_lat_vals(agg_lat.data.shape[1]), dtype='float32') / 1000 # convert us to ms
+ lat_stat_prop = calc_histo_stat_props(agg_lat, bins_edges, bins_count=StyleProfile.hist_lat_boxes)
- io_stat_prop = calc_norm_stat_props(agg_io, bins_count=StyleProfile.hist_boxes)
- fpath = plot_hist(rstorage, agg_io.source(tag='hist.svg'), title, units, io_stat_prop)
- if DEBUG:
- yield Menu1st.summary, Menu2ndSumm.io_lat_qd, html.img(fpath)
- return
- else:
- yield Menu1st.engineering, Menu2ndEng.hist, html.img(fpath)
+ # import IPython
+ # IPython.embed()
+
+ long_summary = cast(FioJobParams, fjob.params).long_summary
+
+ title = "Latency distribution"
+ units = "ms"
+
+ fpath = plot_hist(rstorage, agg_lat.source(tag='hist.svg'), title, units, lat_stat_prop) # type: str
+ yield Menu1st.per_job, fjob.summary, HTMLBlock(html.img(fpath))
+
+ agg_io = get_aggregated(rstorage, suite, fjob, "bw")
+
+ if fjob.bsize >= LARGE_BLOCKS:
+ title = "BW distribution"
+ units = "MiBps"
+ agg_io.data //= MiB2KiB
+ else:
+ title = "IOPS distribution"
+ agg_io.data //= fjob.bsize
+ units = "IOPS"
+
+ io_stat_prop = calc_norm_stat_props(agg_io, bins_count=StyleProfile.hist_boxes)
+ fpath = plot_hist(rstorage, agg_io.source(tag='hist.svg'), title, units, io_stat_prop) # type: str
+ yield Menu1st.per_job, fjob.summary, HTMLBlock(html.img(fpath))
# IOPS/latency over test time for each job
-class IOTime(Reporter):
+class IOTime(JobReporter):
"""IOPS/latency during test"""
- def get_divs(self, suite: TestSuiteConfig, rstorage: ResultStorage) -> Iterator[Tuple[str, str, HTMLBlock]]:
- for job in rstorage.iter_job(suite):
- fjob = cast(FioJobConfig, job)
- agg_lat = get_aggregated(rstorage, suite, fjob, "lat")
- bins_edges = numpy.array(get_lat_vals(agg_lat.second_axis_size), dtype='float32') / 1000
- title = "Latency during test. " + fjob.long_summary
+ suite_types = {'fio'}
- fpath = plot_lat_over_time(rstorage, agg_lat.source(tag='ts.svg'), title, agg_lat, bins_edges)
- if DEBUG:
- yield Menu1st.summary, Menu2ndSumm.io_lat_qd, html.img(fpath)
- else:
- yield Menu1st.engineering, Menu2ndEng.lat_time, html.img(fpath)
+ def get_divs(self,
+ suite: SuiteConfig,
+ job: JobConfig,
+ rstorage: ResultStorage) -> Iterator[Tuple[str, str, HTMLBlock]]:
- fpath = plot_heatmap(rstorage, agg_lat.source(tag='hmap.svg'), title, agg_lat, bins_edges)
- if DEBUG:
- yield Menu1st.summary, Menu2ndSumm.io_lat_qd, html.img(fpath)
- else:
- yield Menu1st.engineering, Menu2ndEng.lat_time, html.img(fpath)
+ fjob = cast(FioJobConfig, job)
- agg_io = get_aggregated(rstorage, suite, fjob, "bw")
- if fjob.bsize >= LARGE_BLOCKS:
- title = "BW during test. " + fjob.long_summary
- units = "MiBps"
- agg_io.data /= MiB2KiB
- else:
- title = "IOPS during test. " + fjob.long_summary
- agg_io.data /= fjob.bsize
- units = "IOPS"
+ yield Menu1st.per_job, fjob.summary, HTMLBlock(html.H2(html.center("Load over time")))
- fpath = plot_v_over_time(rstorage, agg_io.source(tag='ts.svg'), title, units, agg_io)
+ agg_io = get_aggregated(rstorage, suite, fjob, "bw")
+ if fjob.bsize >= LARGE_BLOCKS:
+ title = "Bandwidth"
+ units = "MiBps"
+ agg_io.data //= MiB2KiB
+ else:
+ title = "IOPS"
+ agg_io.data //= fjob.bsize
+ units = "IOPS"
- if DEBUG:
- yield Menu1st.summary, Menu2ndSumm.io_lat_qd, html.img(fpath)
- return
- else:
- yield Menu1st.engineering, Menu2ndEng.iops_time, html.img(fpath)
+ fpath = plot_v_over_time(rstorage, agg_io.source(tag='ts.svg'), title, units, agg_io) # type: str
+ yield Menu1st.per_job, fjob.summary, HTMLBlock(html.img(fpath))
+ agg_lat = get_aggregated(rstorage, suite, fjob, "lat")
+ bins_edges = numpy.array(get_lat_vals(agg_lat.data.shape[1]), dtype='float32') / 1000
+ title = "Latency"
-def is_sensor_numarray(sensor: str, metric: str) -> bool:
- """Returns True if sensor provides one-dimension array of numeric values. One number per one measurement."""
- return True
+ fpath = plot_lat_over_time(rstorage, agg_lat.source(tag='ts.svg'), title, agg_lat, bins_edges) # type: str
+ yield Menu1st.per_job, fjob.summary, HTMLBlock(html.img(fpath))
+ title = "Latency heatmap"
+ fpath = plot_heatmap(rstorage, agg_lat.source(tag='hmap.png'), title, agg_lat, bins_edges) # type: str
-LEVEL_SENSORS = {("block-io", "io_queue"),
- ("system-cpu", "procs_blocked"),
- ("system-cpu", "procs_queue")}
-
-
-def is_level_sensor(sensor: str, metric: str) -> bool:
- """Returns True if sensor measure level of any kind, E.g. queue depth."""
- return (sensor, metric) in LEVEL_SENSORS
-
-
-def is_delta_sensor(sensor: str, metric: str) -> bool:
- """Returns True if sensor provides deltas for cumulative value. E.g. io completed in given period"""
- return not is_level_sensor(sensor, metric)
-
-
-
-def get_sensor(storage: Storage, node: str, sensor: str, dev: str, metric: str,
- time_range: Tuple[int, int]) -> numpy.array:
- """Return sensor values for given node for given period. Return per second estimated values array
-
- Raise an error if required range is not full covered by data in storage.
- First it finds range of results from sensor, which fully covers requested range.
- ...."""
-
- collected_at = numpy.array(storage.get_array("sensors/{}_collected_at".format(node)), dtype="int")
- data = numpy.array(storage.get_array("sensors/{}_{}.{}.{}".format(node, sensor, dev, metric)))
-
- # collected_at is array of pairs (collection_started_at, collection_finished_at)
- collection_start_at = collected_at[::2]
-
- MICRO = 1000000
-
- # convert secods to us
- begin = time_range[0] * MICRO
- end = time_range[1] * MICRO
-
- if begin < collection_start_at[0] or end > collection_start_at[-1] or end <= begin:
- raise AssertionError(("Incorrect data for get_sensor - time_range={!r}, collected_at=[{}, ..., {}]," +
- "sensor = {}_{}.{}.{}").format(time_range,
- collected_at[0] // MICRO,
- collected_at[-1] // MICRO,
- node, sensor, dev, metric))
-
- pos1, pos2 = numpy.searchsorted(collection_start_at, (begin, end))
- assert pos1 >= 1
-
- time_bounds = collection_start_at[pos1 - 1: pos2]
- edge_it = iter(time_bounds)
- val_it = iter(data[pos1 - 1: pos2])
-
- result = []
- curr_summ = 0
-
- results_cell_ends = begin + MICRO
- curr_end = next(edge_it)
-
- while results_cell_ends <= end:
- curr_start = curr_end
- curr_end = next(edge_it)
- curr_val = next(val_it)
- while curr_end >= results_cell_ends and results_cell_ends <= end:
- current_part = (results_cell_ends - curr_start) / (curr_end - curr_start) * curr_val
- result.append(curr_summ + current_part)
- curr_summ = 0
- curr_val -= current_part
- curr_start = results_cell_ends
- results_cell_ends += MICRO
- curr_summ += curr_val
-
- assert len(result) == (end - begin) // MICRO
- return result
+ yield Menu1st.per_job, fjob.summary, HTMLBlock(html.img(fpath))
class ResourceUsage:
@@ -824,50 +1122,65 @@
# Cluster load over test time
-class ClusterLoad(Reporter):
+class ClusterLoad(JobReporter):
"""IOPS/latency during test"""
+ # TODO: units should came from sensor
storage_sensors = [
- ('block-io', 'reads_completed', "Read ops"),
- ('block-io', 'writes_completed', "Write ops"),
- ('block-io', 'sectors_read', "Read kb"),
- ('block-io', 'sectors_written', "Write kb"),
+ ('block-io', 'reads_completed', "Read ops", 'iops'),
+ ('block-io', 'writes_completed', "Write ops", 'iops'),
+ ('block-io', 'sectors_read', "Read kb", 'kb'),
+ ('block-io', 'sectors_written', "Write kb", 'kb'),
]
- def get_divs(self, suite: TestSuiteConfig, rstorage: ResultStorage) -> Iterator[Tuple[str, str, HTMLBlock]]:
+ def get_divs(self,
+ suite: SuiteConfig,
+ job: JobConfig,
+ rstorage: ResultStorage) -> Iterator[Tuple[str, str, HTMLBlock]]:
# split nodes on test and other
storage = rstorage.storage
nodes = storage.load_list(NodeInfo, "all_nodes") # type: List[NodeInfo]
+ yield Menu1st.per_job, job.summary, HTMLBlock(html.H2(html.center("Cluster load")))
test_nodes = {node.node_id for node in nodes if 'testnode' in node.roles}
cluster_nodes = {node.node_id for node in nodes if 'testnode' not in node.roles}
- for job in rstorage.iter_job(suite):
- # convert ms to s
- time_range = (job.reliable_info_starts_at // MS2S, job.reliable_info_stops_at // MS2S)
- len = time_range[1] - time_range[0]
+ # convert ms to s
+ time_range = (job.reliable_info_range[0] // MS2S, job.reliable_info_range[1] // MS2S)
+ len = time_range[1] - time_range[0]
+ for sensor, metric, sensor_title, units in self.storage_sensors:
+ sum_testnode = numpy.zeros((len,))
+ sum_other = numpy.zeros((len,))
+ for path, groups in rstorage.iter_sensors(sensor=sensor, metric=metric):
+ # todo: should return sensor units
+ data = get_sensor_for_time_range(rstorage,
+ groups['node_id'],
+ sensor,
+ groups['dev'],
+ metric, time_range)
+ if groups['node_id'] in test_nodes:
+ sum_testnode += data
+ else:
+ sum_other += data
- for sensor, metric, sensor_title in self.storage_sensors:
- sum_testnode = numpy.zeros((len,))
- sum_other = numpy.zeros((len,))
+ ds = DataSource(suite_id=suite.storage_id,
+ job_id=job.storage_id,
+ node_id="test_nodes",
+ sensor=sensor,
+ dev=AGG_TAG,
+ metric=metric,
+ tag="ts.svg")
- for path, groups in iter_sensors(rstorage.storage, sensor=sensor, metric=metric):
- data = get_sensor(rstorage.storage, groups['node'], sensor, groups['dev'], metric, time_range)
- if groups['node'] in test_nodes:
- sum_testnode += data
- else:
- sum_other += data
-
- ds = DataSource(suite_id=suite.storage_id, job_id=job.summary, node_id="cluster",
- dev=sensor, sensor=metric, tag="ts.svg")
-
- # s to ms
- ts = TimeSeries(name="", times=numpy.arange(*time_range) * MS2S, data=sum_testnode, raw=None)
- fpath = plot_v_over_time(rstorage, ds, "{}.{}".format(sensor, metric), sensor_title, ts=ts)
- yield Menu1st.engineering, Menu2ndEng.iops_time, html.img(fpath)
-
- if DEBUG:
- return
+ # s to ms
+ ts = TimeSeries(name="",
+ times=numpy.arange(*time_range) * MS2S,
+ data=sum_testnode,
+ raw=None,
+ units=units,
+ time_units="us",
+ source=ds)
+ fpath = plot_v_over_time(rstorage, ds, sensor_title, sensor_title, ts=ts) # type: str
+ yield Menu1st.per_job, job.summary, HTMLBlock(html.img(fpath))
# Ceph cluster summary
@@ -897,7 +1210,12 @@
def run(self, ctx: TestRun) -> None:
rstorage = ResultStorage(ctx.storage)
- reporters = [ClusterLoad()] # IO_QD(), IOTime(), IOHist()] # type: List[Reporter]
+
+ job_reporters = [StatInfo(), IOTime(), IOHist(), ClusterLoad()] # type: List[JobReporter]
+ reporters = [IO_QD()] # type: List[Reporter]
+
+ # job_reporters = [ClusterLoad()]
+ # reporters = []
root_dir = os.path.dirname(os.path.dirname(wally.__file__))
doc_templ_path = os.path.join(root_dir, "report_templates/index.html")
@@ -909,14 +1227,30 @@
content_block = []
link_idx = 0
- matplotlib.rcParams.update({'font.size': 10})
+ # matplotlib.rcParams.update(ctx.config.reporting.matplotlib_params.raw())
+ # ColorProfile.__dict__.update(ctx.config.reporting.colors.raw())
+ # StyleProfile.__dict__.update(ctx.config.reporting.style.raw())
- items = defaultdict(lambda: defaultdict(list)) # type: Dict[str, Dict[str, list]]
+ items = defaultdict(lambda: defaultdict(list)) # type: Dict[str, Dict[str, List[HTMLBlock]]]
+
+ # TODO: filter reporters
for suite in rstorage.iter_suite(FioTest.name):
+ all_jobs = list(rstorage.iter_job(suite))
+ all_jobs.sort(key=lambda job: job.params)
+ for job in all_jobs:
+ for reporter in job_reporters:
+ for block, item, html in reporter.get_divs(suite, job, rstorage):
+ items[block][item].append(html)
+ if DEBUG:
+ break
+
for reporter in reporters:
for block, item, html in reporter.get_divs(suite, rstorage):
items[block][item].append(html)
+ if DEBUG:
+ break
+
for idx_1st, menu_1st in enumerate(sorted(items, key=lambda x: menu_1st_order.index(x))):
menu_block.append(
'<a href="#item{}" class="nav-group" data-toggle="collapse" data-parent="#MainMenu">{}</a>'
@@ -927,7 +1261,7 @@
menu_block.append(' <a href="#content{}" class="nav-group-item">{}</a>'
.format(link_idx, menu_2nd))
content_block.append('<div id="content{}">'.format(link_idx))
- content_block.extend(" " + x for x in items[menu_1st][menu_2nd])
+ content_block.extend(" " + x.data for x in items[menu_1st][menu_2nd])
content_block.append('</div>')
link_idx += 1
menu_block.append('</div>')
@@ -946,1504 +1280,3 @@
def run(self, ctx: TestRun) -> None:
# TODO(koder): load data from storage
raise NotImplementedError("...")
-
-
-# --------------------------- LEGASY --------------------------------------------------------------------------------
-
-
-# # disk_info = None
-# # base = None
-# # linearity = None
-#
-#
-# def group_by_name(test_data):
-# name_map = collections.defaultdict(lambda: [])
-#
-# for data in test_data:
-# name_map[(data.name, data.summary())].append(data)
-#
-# return name_map
-#
-#
-# def report(name, required_fields):
-# def closure(func):
-# report_funcs.append((required_fields.split(","), name, func))
-# return func
-# return closure
-#
-#
-# def get_test_lcheck_params(pinfo):
-# res = [{
-# 's': 'sync',
-# 'd': 'direct',
-# 'a': 'async',
-# 'x': 'sync direct'
-# }[pinfo.sync_mode]]
-#
-# res.append(pinfo.p.rw)
-#
-# return " ".join(res)
-#
-#
-# def get_emb_data_svg(plt):
-# sio = StringIO()
-# plt.savefig(sio, format='svg')
-# img_start = "<!-- Created with matplotlib (http://matplotlib.org/) -->"
-# return sio.getvalue().split(img_start, 1)[1]
-#
-#
-# def get_template(templ_name):
-# very_root_dir = os.path.dirname(os.path.dirname(wally.__file__))
-# templ_dir = os.path.join(very_root_dir, 'report_templates')
-# templ_file = os.path.join(templ_dir, templ_name)
-# return open(templ_file, 'r').read()
-#
-#
-# def group_by(data, func):
-# if len(data) < 2:
-# yield data
-# return
-#
-# ndata = [(func(dt), dt) for dt in data]
-# ndata.sort(key=func)
-# pkey, dt = ndata[0]
-# curr_list = [dt]
-#
-# for key, val in ndata[1:]:
-# if pkey != key:
-# yield curr_list
-# curr_list = [val]
-# else:
-# curr_list.append(val)
-# pkey = key
-#
-# yield curr_list
-#
-#
-# @report('linearity', 'linearity_test')
-# def linearity_report(processed_results, lab_info, comment):
-# labels_and_data_mp = collections.defaultdict(lambda: [])
-# vls = {}
-#
-# # plot io_time = func(bsize)
-# for res in processed_results.values():
-# if res.name.startswith('linearity_test'):
-# iotimes = [1000. / val for val in res.iops.raw]
-#
-# op_summ = get_test_summary(res.params)[:3]
-#
-# labels_and_data_mp[op_summ].append(
-# [res.p.blocksize, res.iops.raw, iotimes])
-#
-# cvls = res.params.vals.copy()
-# del cvls['blocksize']
-# del cvls['rw']
-#
-# cvls.pop('sync', None)
-# cvls.pop('direct', None)
-# cvls.pop('buffered', None)
-#
-# if op_summ not in vls:
-# vls[op_summ] = cvls
-# else:
-# assert cvls == vls[op_summ]
-#
-# all_labels = None
-# _, ax1 = plt.subplots()
-# for name, labels_and_data in labels_and_data_mp.items():
-# labels_and_data.sort(key=lambda x: ssize2b(x[0]))
-#
-# labels, _, iotimes = zip(*labels_and_data)
-#
-# if all_labels is None:
-# all_labels = labels
-# else:
-# assert all_labels == labels
-#
-# plt.boxplot(iotimes)
-# if len(labels_and_data) > 2 and \
-# ssize2b(labels_and_data[-2][0]) >= 4096:
-#
-# xt = range(1, len(labels) + 1)
-#
-# def io_time(sz, bw, initial_lat):
-# return sz / bw + initial_lat
-#
-# x = numpy.array(map(ssize2b, labels))
-# y = numpy.array([sum(dt) / len(dt) for dt in iotimes])
-# popt, _ = scipy.optimize.curve_fit(io_time, x, y, p0=(100., 1.))
-#
-# y1 = io_time(x, *popt)
-# plt.plot(xt, y1, linestyle='--',
-# label=name + ' LS linear approx')
-#
-# for idx, (sz, _, _) in enumerate(labels_and_data):
-# if ssize2b(sz) >= 4096:
-# break
-#
-# bw = (x[-1] - x[idx]) / (y[-1] - y[idx])
-# lat = y[-1] - x[-1] / bw
-# y2 = io_time(x, bw, lat)
-# plt.plot(xt, y2, linestyle='--',
-# label=abbv_name_to_full(name) +
-# ' (4k & max) linear approx')
-#
-# plt.setp(ax1, xticklabels=labels)
-#
-# plt.xlabel("Block size")
-# plt.ylabel("IO time, ms")
-#
-# plt.subplots_adjust(top=0.85)
-# plt.legend(bbox_to_anchor=(0.5, 1.15),
-# loc='upper center',
-# prop={'size': 10}, ncol=2)
-# plt.grid()
-# iotime_plot = get_emb_data_svg(plt)
-# plt.clf()
-#
-# # plot IOPS = func(bsize)
-# _, ax1 = plt.subplots()
-#
-# for name, labels_and_data in labels_and_data_mp.items():
-# labels_and_data.sort(key=lambda x: ssize2b(x[0]))
-# _, data, _ = zip(*labels_and_data)
-# plt.boxplot(data)
-# avg = [float(sum(arr)) / len(arr) for arr in data]
-# xt = range(1, len(data) + 1)
-# plt.plot(xt, avg, linestyle='--',
-# label=abbv_name_to_full(name) + " avg")
-#
-# plt.setp(ax1, xticklabels=labels)
-# plt.xlabel("Block size")
-# plt.ylabel("IOPS")
-# plt.legend(bbox_to_anchor=(0.5, 1.15),
-# loc='upper center',
-# prop={'size': 10}, ncol=2)
-# plt.grid()
-# plt.subplots_adjust(top=0.85)
-#
-# iops_plot = get_emb_data_svg(plt)
-#
-# res = set(get_test_lcheck_params(res) for res in processed_results.values())
-# ncount = list(set(res.testnodes_count for res in processed_results.values()))
-# conc = list(set(res.concurence for res in processed_results.values()))
-#
-# assert len(conc) == 1
-# assert len(ncount) == 1
-#
-# descr = {
-# 'vm_count': ncount[0],
-# 'concurence': conc[0],
-# 'oper_descr': ", ".join(res).capitalize()
-# }
-#
-# params_map = {'iotime_vs_size': iotime_plot,
-# 'iops_vs_size': iops_plot,
-# 'descr': descr}
-#
-# return get_template('report_linearity.html').format(**params_map)
-#
-#
-# @report('lat_vs_iops', 'lat_vs_iops')
-# def lat_vs_iops(processed_results, lab_info, comment):
-# lat_iops = collections.defaultdict(lambda: [])
-# requsted_vs_real = collections.defaultdict(lambda: {})
-#
-# for res in processed_results.values():
-# if res.name.startswith('lat_vs_iops'):
-# lat_iops[res.concurence].append((res.lat,
-# 0,
-# res.iops.average,
-# res.iops.deviation))
-# # lat_iops[res.concurence].append((res.lat.average / 1000.0,
-# # res.lat.deviation / 1000.0,
-# # res.iops.average,
-# # res.iops.deviation))
-# requested_iops = res.p.rate_iops * res.concurence
-# requsted_vs_real[res.concurence][requested_iops] = \
-# (res.iops.average, res.iops.deviation)
-#
-# colors = ['red', 'green', 'blue', 'orange', 'magenta', "teal"]
-# colors_it = iter(colors)
-# for conc, lat_iops in sorted(lat_iops.items()):
-# lat, dev, iops, iops_dev = zip(*lat_iops)
-# plt.errorbar(iops, lat, xerr=iops_dev, yerr=dev, fmt='ro',
-# label=str(conc) + " threads",
-# color=next(colors_it))
-#
-# plt.xlabel("IOPS")
-# plt.ylabel("Latency, ms")
-# plt.grid()
-# plt.legend(loc=0)
-# plt_iops_vs_lat = get_emb_data_svg(plt)
-# plt.clf()
-#
-# colors_it = iter(colors)
-# for conc, req_vs_real in sorted(requsted_vs_real.items()):
-# req, real = zip(*sorted(req_vs_real.items()))
-# iops, dev = zip(*real)
-# plt.errorbar(req, iops, yerr=dev, fmt='ro',
-# label=str(conc) + " threads",
-# color=next(colors_it))
-# plt.xlabel("Requested IOPS")
-# plt.ylabel("Get IOPS")
-# plt.grid()
-# plt.legend(loc=0)
-# plt_iops_vs_requested = get_emb_data_svg(plt)
-#
-# res1 = processed_results.values()[0]
-# params_map = {'iops_vs_lat': plt_iops_vs_lat,
-# 'iops_vs_requested': plt_iops_vs_requested,
-# 'oper_descr': get_test_lcheck_params(res1).capitalize()}
-#
-# return get_template('report_iops_vs_lat.html').format(**params_map)
-#
-#
-# def render_all_html(comment, info, lab_description, images, templ_name):
-# data = info.__dict__.copy()
-# for name, val in data.items():
-# if not name.startswith('__'):
-# if val is None:
-# if name in ('direct_iops_w64_max', 'direct_iops_w_max'):
-# data[name] = ('-', '-', '-')
-# else:
-# data[name] = '-'
-# elif isinstance(val, (int, float, long)):
-# data[name] = round_3_digit(val)
-#
-# data['bw_read_max'] = (data['bw_read_max'][0] // 1024,
-# data['bw_read_max'][1],
-# data['bw_read_max'][2])
-#
-# data['bw_write_max'] = (data['bw_write_max'][0] // 1024,
-# data['bw_write_max'][1],
-# data['bw_write_max'][2])
-#
-# images.update(data)
-# templ = get_template(templ_name)
-# return templ.format(lab_info=lab_description,
-# comment=comment,
-# **images)
-#
-#
-# def io_chart(title, concurence,
-# latv, latv_min, latv_max,
-# iops_or_bw, iops_or_bw_err,
-# legend,
-# log_iops=False,
-# log_lat=False,
-# boxplots=False,
-# latv_50=None,
-# latv_95=None,
-# error2=None):
-#
-# matplotlib.rcParams.update({'font.size': 10})
-# points = " MiBps" if legend == 'BW' else ""
-# lc = len(concurence)
-# width = 0.35
-# xt = range(1, lc + 1)
-#
-# op_per_vm = [v / (vm * th) for v, (vm, th) in zip(iops_or_bw, concurence)]
-# fig, p1 = plt.subplots()
-# xpos = [i - width / 2 for i in xt]
-#
-# p1.bar(xpos, iops_or_bw,
-# width=width,
-# color='y',
-# label=legend)
-#
-# err1_leg = None
-# for pos, y, err in zip(xpos, iops_or_bw, iops_or_bw_err):
-# err1_leg = p1.errorbar(pos + width / 2,
-# y,
-# err,
-# color='magenta')
-#
-# err2_leg = None
-# if error2 is not None:
-# for pos, y, err in zip(xpos, iops_or_bw, error2):
-# err2_leg = p1.errorbar(pos + width / 2 + 0.08,
-# y,
-# err,
-# lw=2,
-# alpha=0.5,
-# color='teal')
-#
-# p1.grid(True)
-# p1.plot(xt, op_per_vm, '--', label=legend + "/thread", color='black')
-# handles1, labels1 = p1.get_legend_handles_labels()
-#
-# handles1 += [err1_leg]
-# labels1 += ["95% conf"]
-#
-# if err2_leg is not None:
-# handles1 += [err2_leg]
-# labels1 += ["95% dev"]
-#
-# p2 = p1.twinx()
-#
-# if latv_50 is None:
-# p2.plot(xt, latv_max, label="lat max")
-# p2.plot(xt, latv, label="lat avg")
-# p2.plot(xt, latv_min, label="lat min")
-# else:
-# p2.plot(xt, latv_50, label="lat med")
-# p2.plot(xt, latv_95, label="lat 95%")
-#
-# plt.xlim(0.5, lc + 0.5)
-# plt.xticks(xt, ["{0} * {1}".format(vm, th) for (vm, th) in concurence])
-# p1.set_xlabel("VM Count * Thread per VM")
-# p1.set_ylabel(legend + points)
-# p2.set_ylabel("Latency ms")
-# plt.title(title)
-# handles2, labels2 = p2.get_legend_handles_labels()
-#
-# plt.legend(handles1 + handles2, labels1 + labels2,
-# loc='center left', bbox_to_anchor=(1.1, 0.81))
-#
-# if log_iops:
-# p1.set_yscale('log')
-#
-# if log_lat:
-# p2.set_yscale('log')
-#
-# plt.subplots_adjust(right=0.68)
-#
-# return get_emb_data_svg(plt)
-#
-#
-# def make_plots(processed_results, plots):
-# """
-# processed_results: [PerfInfo]
-# plots = [(test_name_prefix:str, fname:str, description:str)]
-# """
-# files = {}
-# for name_pref, fname, desc in plots:
-# chart_data = []
-#
-# for res in processed_results:
-# summ = res.name + "_" + res.summary
-# if summ.startswith(name_pref):
-# chart_data.append(res)
-#
-# if len(chart_data) == 0:
-# raise ValueError("Can't found any date for " + name_pref)
-#
-# use_bw = ssize2b(chart_data[0].p.blocksize) > 16 * 1024
-#
-# chart_data.sort(key=lambda x: x.params['vals']['numjobs'])
-#
-# lat = None
-# lat_min = None
-# lat_max = None
-#
-# lat_50 = [x.lat_50 for x in chart_data]
-# lat_95 = [x.lat_95 for x in chart_data]
-#
-# lat_diff_max = max(x.lat_95 / x.lat_50 for x in chart_data)
-# lat_log_scale = (lat_diff_max > 10)
-#
-# testnodes_count = x.testnodes_count
-# concurence = [(testnodes_count, x.concurence)
-# for x in chart_data]
-#
-# if use_bw:
-# data = [x.bw.average / 1000 for x in chart_data]
-# data_conf = [x.bw.confidence / 1000 for x in chart_data]
-# data_dev = [x.bw.deviation * 2.5 / 1000 for x in chart_data]
-# name = "BW"
-# else:
-# data = [x.iops.average for x in chart_data]
-# data_conf = [x.iops.confidence for x in chart_data]
-# data_dev = [x.iops.deviation * 2 for x in chart_data]
-# name = "IOPS"
-#
-# fc = io_chart(title=desc,
-# concurence=concurence,
-#
-# latv=lat,
-# latv_min=lat_min,
-# latv_max=lat_max,
-#
-# iops_or_bw=data,
-# iops_or_bw_err=data_conf,
-#
-# legend=name,
-# log_lat=lat_log_scale,
-#
-# latv_50=lat_50,
-# latv_95=lat_95,
-#
-# error2=data_dev)
-# files[fname] = fc
-#
-# return files
-#
-#
-# def find_max_where(processed_results, sync_mode, blocksize, rw, iops=True):
-# result = None
-# attr = 'iops' if iops else 'bw'
-# for measurement in processed_results:
-# ok = measurement.sync_mode == sync_mode
-# ok = ok and (measurement.p.blocksize == blocksize)
-# ok = ok and (measurement.p.rw == rw)
-#
-# if ok:
-# field = getattr(measurement, attr)
-#
-# if result is None:
-# result = field
-# elif field.average > result.average:
-# result = field
-#
-# return result
-#
-#
-# def get_disk_info(processed_results):
-# di = DiskInfo()
-# di.direct_iops_w_max = find_max_where(processed_results,
-# 'd', '4k', 'randwrite')
-# di.direct_iops_r_max = find_max_where(processed_results,
-# 'd', '4k', 'randread')
-#
-# di.direct_iops_w64_max = find_max_where(processed_results,
-# 'd', '64k', 'randwrite')
-#
-# for sz in ('16m', '64m'):
-# di.bw_write_max = find_max_where(processed_results,
-# 'd', sz, 'randwrite', False)
-# if di.bw_write_max is not None:
-# break
-#
-# if di.bw_write_max is None:
-# for sz in ('1m', '2m', '4m', '8m'):
-# di.bw_write_max = find_max_where(processed_results,
-# 'd', sz, 'write', False)
-# if di.bw_write_max is not None:
-# break
-#
-# for sz in ('16m', '64m'):
-# di.bw_read_max = find_max_where(processed_results,
-# 'd', sz, 'randread', False)
-# if di.bw_read_max is not None:
-# break
-#
-# if di.bw_read_max is None:
-# di.bw_read_max = find_max_where(processed_results,
-# 'd', '1m', 'read', False)
-#
-# rws4k_iops_lat_th = []
-# for res in processed_results:
-# if res.sync_mode in 'xs' and res.p.blocksize == '4k':
-# if res.p.rw != 'randwrite':
-# continue
-# rws4k_iops_lat_th.append((res.iops.average,
-# res.lat,
-# # res.lat.average,
-# res.concurence))
-#
-# rws4k_iops_lat_th.sort(key=lambda x: x[2])
-#
-# latv = [lat for _, lat, _ in rws4k_iops_lat_th]
-#
-# for tlat in [10, 30, 100]:
-# pos = bisect.bisect_left(latv, tlat)
-# if 0 == pos:
-# setattr(di, 'rws4k_{}ms'.format(tlat), 0)
-# elif pos == len(latv):
-# iops3, _, _ = rws4k_iops_lat_th[-1]
-# iops3 = int(round_3_digit(iops3))
-# setattr(di, 'rws4k_{}ms'.format(tlat), ">=" + str(iops3))
-# else:
-# lat1 = latv[pos - 1]
-# lat2 = latv[pos]
-#
-# iops1, _, th1 = rws4k_iops_lat_th[pos - 1]
-# iops2, _, th2 = rws4k_iops_lat_th[pos]
-#
-# th_lat_coef = (th2 - th1) / (lat2 - lat1)
-# th3 = th_lat_coef * (tlat - lat1) + th1
-#
-# th_iops_coef = (iops2 - iops1) / (th2 - th1)
-# iops3 = th_iops_coef * (th3 - th1) + iops1
-# iops3 = int(round_3_digit(iops3))
-# setattr(di, 'rws4k_{}ms'.format(tlat), iops3)
-#
-# hdi = DiskInfo()
-#
-# def pp(x):
-# med, conf = x.rounded_average_conf()
-# conf_perc = int(float(conf) / med * 100)
-# dev_perc = int(float(x.deviation) / med * 100)
-# return (round_3_digit(med), conf_perc, dev_perc)
-#
-# hdi.direct_iops_r_max = pp(di.direct_iops_r_max)
-#
-# if di.direct_iops_w_max is not None:
-# hdi.direct_iops_w_max = pp(di.direct_iops_w_max)
-# else:
-# hdi.direct_iops_w_max = None
-#
-# if di.direct_iops_w64_max is not None:
-# hdi.direct_iops_w64_max = pp(di.direct_iops_w64_max)
-# else:
-# hdi.direct_iops_w64_max = None
-#
-# hdi.bw_write_max = pp(di.bw_write_max)
-# hdi.bw_read_max = pp(di.bw_read_max)
-#
-# hdi.rws4k_10ms = di.rws4k_10ms if 0 != di.rws4k_10ms else None
-# hdi.rws4k_30ms = di.rws4k_30ms if 0 != di.rws4k_30ms else None
-# hdi.rws4k_100ms = di.rws4k_100ms if 0 != di.rws4k_100ms else None
-# return hdi
-#
-#
-# @report('hdd', 'hdd')
-# def make_hdd_report(processed_results, lab_info, comment):
-# plots = [
-# ('hdd_rrd4k', 'rand_read_4k', 'Random read 4k direct IOPS'),
-# ('hdd_rwx4k', 'rand_write_4k', 'Random write 4k sync IOPS')
-# ]
-# perf_infos = [res.disk_perf_info() for res in processed_results]
-# images = make_plots(perf_infos, plots)
-# di = get_disk_info(perf_infos)
-# return render_all_html(comment, di, lab_info, images, "report_hdd.html")
-#
-#
-# @report('cinder_iscsi', 'cinder_iscsi')
-# def make_cinder_iscsi_report(processed_results, lab_info, comment):
-# plots = [
-# ('cinder_iscsi_rrd4k', 'rand_read_4k', 'Random read 4k direct IOPS'),
-# ('cinder_iscsi_rwx4k', 'rand_write_4k', 'Random write 4k sync IOPS')
-# ]
-# perf_infos = [res.disk_perf_info() for res in processed_results]
-# try:
-# images = make_plots(perf_infos, plots)
-# except ValueError:
-# plots = [
-# ('cinder_iscsi_rrd4k', 'rand_read_4k', 'Random read 4k direct IOPS'),
-# ('cinder_iscsi_rws4k', 'rand_write_4k', 'Random write 4k sync IOPS')
-# ]
-# images = make_plots(perf_infos, plots)
-# di = get_disk_info(perf_infos)
-#
-# return render_all_html(comment, di, lab_info, images, "report_cinder_iscsi.html")
-#
-#
-# @report('ceph', 'ceph')
-# def make_ceph_report(processed_results, lab_info, comment):
-# plots = [
-# ('ceph_rrd4k', 'rand_read_4k', 'Random read 4k direct IOPS'),
-# ('ceph_rws4k', 'rand_write_4k', 'Random write 4k sync IOPS'),
-# ('ceph_rrd16m', 'rand_read_16m', 'Random read 16m direct MiBps'),
-# ('ceph_rwd16m', 'rand_write_16m',
-# 'Random write 16m direct MiBps'),
-# ]
-#
-# perf_infos = [res.disk_perf_info() for res in processed_results]
-# images = make_plots(perf_infos, plots)
-# di = get_disk_info(perf_infos)
-# return render_all_html(comment, di, lab_info, images, "report_ceph.html")
-#
-#
-# @report('mixed', 'mixed')
-# def make_mixed_report(processed_results, lab_info, comment):
-# #
-# # IOPS(X% read) = 100 / ( X / IOPS_W + (100 - X) / IOPS_R )
-# #
-#
-# perf_infos = [res.disk_perf_info() for res in processed_results]
-# mixed = collections.defaultdict(lambda: [])
-#
-# is_ssd = False
-# for res in perf_infos:
-# if res.name.startswith('mixed'):
-# if res.name.startswith('mixed-ssd'):
-# is_ssd = True
-# mixed[res.concurence].append((res.p.rwmixread,
-# res.lat,
-# 0,
-# # res.lat.average / 1000.0,
-# # res.lat.deviation / 1000.0,
-# res.iops.average,
-# res.iops.deviation))
-#
-# if len(mixed) == 0:
-# raise ValueError("No mixed load found")
-#
-# fig, p1 = plt.subplots()
-# p2 = p1.twinx()
-#
-# colors = ['red', 'green', 'blue', 'orange', 'magenta', "teal"]
-# colors_it = iter(colors)
-# for conc, mix_lat_iops in sorted(mixed.items()):
-# mix_lat_iops = sorted(mix_lat_iops)
-# read_perc, lat, dev, iops, iops_dev = zip(*mix_lat_iops)
-# p1.errorbar(read_perc, iops, color=next(colors_it),
-# yerr=iops_dev, label=str(conc) + " th")
-#
-# p2.errorbar(read_perc, lat, color=next(colors_it),
-# ls='--', yerr=dev, label=str(conc) + " th lat")
-#
-# if is_ssd:
-# p1.set_yscale('log')
-# p2.set_yscale('log')
-#
-# p1.set_xlim(-5, 105)
-#
-# read_perc = set(read_perc)
-# read_perc.add(0)
-# read_perc.add(100)
-# read_perc = sorted(read_perc)
-#
-# plt.xticks(read_perc, map(str, read_perc))
-#
-# p1.grid(True)
-# p1.set_xlabel("% of reads")
-# p1.set_ylabel("Mixed IOPS")
-# p2.set_ylabel("Latency, ms")
-#
-# handles1, labels1 = p1.get_legend_handles_labels()
-# handles2, labels2 = p2.get_legend_handles_labels()
-# plt.subplots_adjust(top=0.85)
-# plt.legend(handles1 + handles2, labels1 + labels2,
-# bbox_to_anchor=(0.5, 1.15),
-# loc='upper center',
-# prop={'size': 12}, ncol=3)
-# plt.show()
-#
-#
-# def make_load_report(idx, results_dir, fname):
-# dpath = os.path.join(results_dir, "io_" + str(idx))
-# files = sorted(os.listdir(dpath))
-# gf = lambda x: "_".join(x.rsplit(".", 1)[0].split('_')[:3])
-#
-# for key, group in itertools.groupby(files, gf):
-# fname = os.path.join(dpath, key + ".fio")
-#
-# cfgs = list(parse_all_in_1(open(fname).read(), fname))
-#
-# fname = os.path.join(dpath, key + "_lat.log")
-#
-# curr = []
-# arrays = []
-#
-# with open(fname) as fd:
-# for offset, lat, _, _ in csv.reader(fd):
-# offset = int(offset)
-# lat = int(lat)
-# if len(curr) > 0 and curr[-1][0] > offset:
-# arrays.append(curr)
-# curr = []
-# curr.append((offset, lat))
-# arrays.append(curr)
-# conc = int(cfgs[0].vals.get('numjobs', 1))
-#
-# if conc != 5:
-# continue
-#
-# assert len(arrays) == len(cfgs) * conc
-#
-# garrays = [[(0, 0)] for _ in range(conc)]
-#
-# for offset in range(len(cfgs)):
-# for acc, new_arr in zip(garrays, arrays[offset * conc:(offset + 1) * conc]):
-# last = acc[-1][0]
-# for off, lat in new_arr:
-# acc.append((off / 1000. + last, lat / 1000.))
-#
-# for cfg, arr in zip(cfgs, garrays):
-# plt.plot(*zip(*arr[1:]))
-# plt.show()
-# exit(1)
-#
-#
-# def make_io_report(dinfo, comment, path, lab_info=None):
-# lab_info = {
-# "total_disk": "None",
-# "total_memory": "None",
-# "nodes_count": "None",
-# "processor_count": "None"
-# }
-#
-# try:
-# res_fields = sorted(v.name for v in dinfo)
-#
-# found = False
-# for fields, name, func in report_funcs:
-# for field in fields:
-# pos = bisect.bisect_left(res_fields, field)
-#
-# if pos == len(res_fields):
-# break
-#
-# if not res_fields[pos].startswith(field):
-# break
-# else:
-# found = True
-# hpath = path.format(name)
-#
-# try:
-# report = func(dinfo, lab_info, comment)
-# except:
-# logger.exception("Diring {0} report generation".format(name))
-# continue
-#
-# if report is not None:
-# try:
-# with open(hpath, "w") as fd:
-# fd.write(report)
-# except:
-# logger.exception("Diring saving {0} report".format(name))
-# continue
-# logger.info("Report {0} saved into {1}".format(name, hpath))
-# else:
-# logger.warning("No report produced by {0!r}".format(name))
-#
-# if not found:
-# logger.warning("No report generator found for this load")
-#
-# except Exception as exc:
-# import traceback
-# traceback.print_exc()
-# logger.error("Failed to generate html report:" + str(exc))
-#
-#
-# # @classmethod
-# # def prepare_data(cls, results) -> List[Dict[str, Any]]:
-# # """create a table with io performance report for console"""
-# #
-# # def key_func(data: FioRunResult) -> Tuple[str, str, str, str, int]:
-# # tpl = data.summary_tpl()
-# # return (data.name,
-# # tpl.oper,
-# # tpl.mode,
-# # ssize2b(tpl.bsize),
-# # int(tpl.th_count) * int(tpl.vm_count))
-# # res = []
-# #
-# # for item in sorted(results, key=key_func):
-# # test_dinfo = item.disk_perf_info()
-# # testnodes_count = len(item.config.nodes)
-# #
-# # iops, _ = test_dinfo.iops.rounded_average_conf()
-# #
-# # if test_dinfo.iops_sys is not None:
-# # iops_sys, iops_sys_conf = test_dinfo.iops_sys.rounded_average_conf()
-# # _, iops_sys_dev = test_dinfo.iops_sys.rounded_average_dev()
-# # iops_sys_per_vm = round_3_digit(iops_sys / testnodes_count)
-# # iops_sys = round_3_digit(iops_sys)
-# # else:
-# # iops_sys = None
-# # iops_sys_per_vm = None
-# # iops_sys_dev = None
-# # iops_sys_conf = None
-# #
-# # bw, bw_conf = test_dinfo.bw.rounded_average_conf()
-# # _, bw_dev = test_dinfo.bw.rounded_average_dev()
-# # conf_perc = int(round(bw_conf * 100 / bw))
-# # dev_perc = int(round(bw_dev * 100 / bw))
-# #
-# # lat_50 = round_3_digit(int(test_dinfo.lat_50))
-# # lat_95 = round_3_digit(int(test_dinfo.lat_95))
-# # lat_avg = round_3_digit(int(test_dinfo.lat_avg))
-# #
-# # iops_per_vm = round_3_digit(iops / testnodes_count)
-# # bw_per_vm = round_3_digit(bw / testnodes_count)
-# #
-# # iops = round_3_digit(iops)
-# # bw = round_3_digit(bw)
-# #
-# # summ = "{0.oper}{0.mode} {0.bsize:>4} {0.th_count:>3}th {0.vm_count:>2}vm".format(item.summary_tpl())
-# #
-# # res.append({"name": key_func(item)[0],
-# # "key": key_func(item)[:4],
-# # "summ": summ,
-# # "iops": int(iops),
-# # "bw": int(bw),
-# # "conf": str(conf_perc),
-# # "dev": str(dev_perc),
-# # "iops_per_vm": int(iops_per_vm),
-# # "bw_per_vm": int(bw_per_vm),
-# # "lat_50": lat_50,
-# # "lat_95": lat_95,
-# # "lat_avg": lat_avg,
-# #
-# # "iops_sys": iops_sys,
-# # "iops_sys_per_vm": iops_sys_per_vm,
-# # "sys_conf": iops_sys_conf,
-# # "sys_dev": iops_sys_dev})
-# #
-# # return res
-# #
-# # Field = collections.namedtuple("Field", ("header", "attr", "allign", "size"))
-# # fiels_and_header = [
-# # Field("Name", "name", "l", 7),
-# # Field("Description", "summ", "l", 19),
-# # Field("IOPS\ncum", "iops", "r", 3),
-# # # Field("IOPS_sys\ncum", "iops_sys", "r", 3),
-# # Field("KiBps\ncum", "bw", "r", 6),
-# # Field("Cnf %\n95%", "conf", "r", 3),
-# # Field("Dev%", "dev", "r", 3),
-# # Field("iops\n/vm", "iops_per_vm", "r", 3),
-# # Field("KiBps\n/vm", "bw_per_vm", "r", 6),
-# # Field("lat ms\nmedian", "lat_50", "r", 3),
-# # Field("lat ms\n95%", "lat_95", "r", 3),
-# # Field("lat\navg", "lat_avg", "r", 3),
-# # ]
-# #
-# # fiels_and_header_dct = dict((item.attr, item) for item in fiels_and_header)
-# #
-# # @classmethod
-# # def format_for_console(cls, results) -> str:
-# # """create a table with io performance report for console"""
-# #
-# # tab = texttable.Texttable(max_width=120)
-# # tab.set_deco(tab.HEADER | tab.VLINES | tab.BORDER)
-# # tab.set_cols_align([f.allign for f in cls.fiels_and_header])
-# # sep = ["-" * f.size for f in cls.fiels_and_header]
-# # tab.header([f.header for f in cls.fiels_and_header])
-# # prev_k = None
-# # for item in cls.prepare_data(results):
-# # if prev_k is not None:
-# # if prev_k != item["key"]:
-# # tab.add_row(sep)
-# #
-# # prev_k = item["key"]
-# # tab.add_row([item[f.attr] for f in cls.fiels_and_header])
-# #
-# # return tab.draw()
-# #
-# # @classmethod
-# # def format_diff_for_console(cls, list_of_results: List[Any]) -> str:
-# # """create a table with io performance report for console"""
-# #
-# # tab = texttable.Texttable(max_width=200)
-# # tab.set_deco(tab.HEADER | tab.VLINES | tab.BORDER)
-# #
-# # header = [
-# # cls.fiels_and_header_dct["name"].header,
-# # cls.fiels_and_header_dct["summ"].header,
-# # ]
-# # allign = ["l", "l"]
-# #
-# # header.append("IOPS ~ Cnf% ~ Dev%")
-# # allign.extend(["r"] * len(list_of_results))
-# # header.extend(
-# # "IOPS_{0} %".format(i + 2) for i in range(len(list_of_results[1:]))
-# # )
-# #
-# # header.append("BW")
-# # allign.extend(["r"] * len(list_of_results))
-# # header.extend(
-# # "BW_{0} %".format(i + 2) for i in range(len(list_of_results[1:]))
-# # )
-# #
-# # header.append("LAT")
-# # allign.extend(["r"] * len(list_of_results))
-# # header.extend(
-# # "LAT_{0}".format(i + 2) for i in range(len(list_of_results[1:]))
-# # )
-# #
-# # tab.header(header)
-# # sep = ["-" * 3] * len(header)
-# # processed_results = map(cls.prepare_data, list_of_results)
-# #
-# # key2results = []
-# # for res in processed_results:
-# # key2results.append(dict(
-# # ((item["name"], item["summ"]), item) for item in res
-# # ))
-# #
-# # prev_k = None
-# # iops_frmt = "{0[iops]} ~ {0[conf]:>2} ~ {0[dev]:>2}"
-# # for item in processed_results[0]:
-# # if prev_k is not None:
-# # if prev_k != item["key"]:
-# # tab.add_row(sep)
-# #
-# # prev_k = item["key"]
-# #
-# # key = (item['name'], item['summ'])
-# # line = list(key)
-# # base = key2results[0][key]
-# #
-# # line.append(iops_frmt.format(base))
-# #
-# # for test_results in key2results[1:]:
-# # val = test_results.get(key)
-# # if val is None:
-# # line.append("-")
-# # elif base['iops'] == 0:
-# # line.append("Nan")
-# # else:
-# # prc_val = {'dev': val['dev'], 'conf': val['conf']}
-# # prc_val['iops'] = int(100 * val['iops'] / base['iops'])
-# # line.append(iops_frmt.format(prc_val))
-# #
-# # line.append(base['bw'])
-# #
-# # for test_results in key2results[1:]:
-# # val = test_results.get(key)
-# # if val is None:
-# # line.append("-")
-# # elif base['bw'] == 0:
-# # line.append("Nan")
-# # else:
-# # line.append(int(100 * val['bw'] / base['bw']))
-# #
-# # for test_results in key2results:
-# # val = test_results.get(key)
-# # if val is None:
-# # line.append("-")
-# # else:
-# # line.append("{0[lat_50]} - {0[lat_95]}".format(val))
-# #
-# # tab.add_row(line)
-# #
-# # tab.set_cols_align(allign)
-# # return tab.draw()
-#
-#
-# # READ_IOPS_DISCSTAT_POS = 3
-# # WRITE_IOPS_DISCSTAT_POS = 7
-# #
-# #
-# # def load_sys_log_file(ftype: str, fname: str) -> TimeSeriesValue:
-# # assert ftype == 'iops'
-# # pval = None
-# # with open(fname) as fd:
-# # iops = []
-# # for ln in fd:
-# # params = ln.split()
-# # cval = int(params[WRITE_IOPS_DISCSTAT_POS]) + \
-# # int(params[READ_IOPS_DISCSTAT_POS])
-# # if pval is not None:
-# # iops.append(cval - pval)
-# # pval = cval
-# #
-# # vals = [(idx * 1000, val) for idx, val in enumerate(iops)]
-# # return TimeSeriesValue(vals)
-# #
-# #
-# # def load_test_results(folder: str, run_num: int) -> 'FioRunResult':
-# # res = {}
-# # params = None
-# #
-# # fn = os.path.join(folder, str(run_num) + '_params.yaml')
-# # params = yaml.load(open(fn).read())
-# #
-# # conn_ids_set = set()
-# # rr = r"{}_(?P<conn_id>.*?)_(?P<type>[^_.]*)\.\d+\.log$".format(run_num)
-# # for fname in os.listdir(folder):
-# # rm = re.match(rr, fname)
-# # if rm is None:
-# # continue
-# #
-# # conn_id_s = rm.group('conn_id')
-# # conn_id = conn_id_s.replace('_', ':')
-# # ftype = rm.group('type')
-# #
-# # if ftype not in ('iops', 'bw', 'lat'):
-# # continue
-# #
-# # ts = load_fio_log_file(os.path.join(folder, fname))
-# # res.setdefault(ftype, {}).setdefault(conn_id, []).append(ts)
-# #
-# # conn_ids_set.add(conn_id)
-# #
-# # rr = r"{}_(?P<conn_id>.*?)_(?P<type>[^_.]*)\.sys\.log$".format(run_num)
-# # for fname in os.listdir(folder):
-# # rm = re.match(rr, fname)
-# # if rm is None:
-# # continue
-# #
-# # conn_id_s = rm.group('conn_id')
-# # conn_id = conn_id_s.replace('_', ':')
-# # ftype = rm.group('type')
-# #
-# # if ftype not in ('iops', 'bw', 'lat'):
-# # continue
-# #
-# # ts = load_sys_log_file(ftype, os.path.join(folder, fname))
-# # res.setdefault(ftype + ":sys", {}).setdefault(conn_id, []).append(ts)
-# #
-# # conn_ids_set.add(conn_id)
-# #
-# # mm_res = {}
-# #
-# # if len(res) == 0:
-# # raise ValueError("No data was found")
-# #
-# # for key, data in res.items():
-# # conn_ids = sorted(conn_ids_set)
-# # awail_ids = [conn_id for conn_id in conn_ids if conn_id in data]
-# # matr = [data[conn_id] for conn_id in awail_ids]
-# # mm_res[key] = MeasurementMatrix(matr, awail_ids)
-# #
-# # raw_res = {}
-# # for conn_id in conn_ids:
-# # fn = os.path.join(folder, "{0}_{1}_rawres.json".format(run_num, conn_id_s))
-# #
-# # # remove message hack
-# # fc = "{" + open(fn).read().split('{', 1)[1]
-# # raw_res[conn_id] = json.loads(fc)
-# #
-# # fio_task = FioJobSection(params['name'])
-# # fio_task.vals.update(params['vals'])
-# #
-# # config = TestConfig('io', params, None, params['nodes'], folder, None)
-# # return FioRunResult(config, fio_task, mm_res, raw_res, params['intervals'], run_num)
-# #
-#
-# # class DiskPerfInfo:
-# # def __init__(self, name: str, summary: str, params: Dict[str, Any], testnodes_count: int) -> None:
-# # self.name = name
-# # self.bw = None
-# # self.iops = None
-# # self.lat = None
-# # self.lat_50 = None
-# # self.lat_95 = None
-# # self.lat_avg = None
-# #
-# # self.raw_bw = []
-# # self.raw_iops = []
-# # self.raw_lat = []
-# #
-# # self.params = params
-# # self.testnodes_count = testnodes_count
-# # self.summary = summary
-# #
-# # self.sync_mode = get_test_sync_mode(self.params['vals'])
-# # self.concurence = self.params['vals'].get('numjobs', 1)
-# #
-# #
-# # class IOTestResults:
-# # def __init__(self, suite_name: str, fio_results: 'FioRunResult', log_directory: str):
-# # self.suite_name = suite_name
-# # self.fio_results = fio_results
-# # self.log_directory = log_directory
-# #
-# # def __iter__(self):
-# # return iter(self.fio_results)
-# #
-# # def __len__(self):
-# # return len(self.fio_results)
-# #
-# # def get_yamable(self) -> Dict[str, List[str]]:
-# # items = [(fio_res.summary(), fio_res.idx) for fio_res in self]
-# # return {self.suite_name: [self.log_directory] + items}
-#
-#
-# # class FioRunResult(TestResults):
-# # """
-# # Fio run results
-# # config: TestConfig
-# # fio_task: FioJobSection
-# # ts_results: {str: MeasurementMatrix[TimeSeriesValue]}
-# # raw_result: ????
-# # run_interval:(float, float) - test tun time, used for sensors
-# # """
-# # def __init__(self, config, fio_task, ts_results, raw_result, run_interval, idx):
-# #
-# # self.name = fio_task.name.rsplit("_", 1)[0]
-# # self.fio_task = fio_task
-# # self.idx = idx
-# #
-# # self.bw = ts_results['bw']
-# # self.lat = ts_results['lat']
-# # self.iops = ts_results['iops']
-# #
-# # if 'iops:sys' in ts_results:
-# # self.iops_sys = ts_results['iops:sys']
-# # else:
-# # self.iops_sys = None
-# #
-# # res = {"bw": self.bw,
-# # "lat": self.lat,
-# # "iops": self.iops,
-# # "iops:sys": self.iops_sys}
-# #
-# # self.sensors_data = None
-# # self._pinfo = None
-# # TestResults.__init__(self, config, res, raw_result, run_interval)
-# #
-# # def get_params_from_fio_report(self):
-# # nodes = self.bw.connections_ids
-# #
-# # iops = [self.raw_result[node]['jobs'][0]['mixed']['iops'] for node in nodes]
-# # total_ios = [self.raw_result[node]['jobs'][0]['mixed']['total_ios'] for node in nodes]
-# # runtime = [self.raw_result[node]['jobs'][0]['mixed']['runtime'] / 1000 for node in nodes]
-# # flt_iops = [float(ios) / rtime for ios, rtime in zip(total_ios, runtime)]
-# #
-# # bw = [self.raw_result[node]['jobs'][0]['mixed']['bw'] for node in nodes]
-# # total_bytes = [self.raw_result[node]['jobs'][0]['mixed']['io_bytes'] for node in nodes]
-# # flt_bw = [float(tbytes) / rtime for tbytes, rtime in zip(total_bytes, runtime)]
-# #
-# # return {'iops': iops,
-# # 'flt_iops': flt_iops,
-# # 'bw': bw,
-# # 'flt_bw': flt_bw}
-# #
-# # def summary(self):
-# # return get_test_summary(self.fio_task, len(self.config.nodes))
-# #
-# # def summary_tpl(self):
-# # return get_test_summary_tuple(self.fio_task, len(self.config.nodes))
-# #
-# # def get_lat_perc_50_95_multy(self):
-# # lat_mks = collections.defaultdict(lambda: 0)
-# # num_res = 0
-# #
-# # for result in self.raw_result.values():
-# # num_res += len(result['jobs'])
-# # for job_info in result['jobs']:
-# # for k, v in job_info['latency_ms'].items():
-# # if isinstance(k, basestring) and k.startswith('>='):
-# # lat_mks[int(k[2:]) * 1000] += v
-# # else:
-# # lat_mks[int(k) * 1000] += v
-# #
-# # for k, v in job_info['latency_us'].items():
-# # lat_mks[int(k)] += v
-# #
-# # for k, v in lat_mks.items():
-# # lat_mks[k] = float(v) / num_res
-# # return get_lat_perc_50_95(lat_mks)
-# #
-# # def disk_perf_info(self, avg_interval=2.0):
-# #
-# # if self._pinfo is not None:
-# # return self._pinfo
-# #
-# # testnodes_count = len(self.config.nodes)
-# #
-# # pinfo = DiskPerfInfo(self.name,
-# # self.summary(),
-# # self.params,
-# # testnodes_count)
-# #
-# # def prepare(data, drop=1):
-# # if data is None:
-# # return data
-# #
-# # res = []
-# # for ts_data in data:
-# # if ts_data.average_interval() < avg_interval:
-# # ts_data = ts_data.derived(avg_interval)
-# #
-# # # drop last value on bounds
-# # # as they may contains ranges without activities
-# # assert len(ts_data.values) >= drop + 1, str(drop) + " " + str(ts_data.values)
-# #
-# # if drop > 0:
-# # res.append(ts_data.values[:-drop])
-# # else:
-# # res.append(ts_data.values)
-# #
-# # return res
-# #
-# # def agg_data(matr):
-# # arr = sum(matr, [])
-# # min_len = min(map(len, arr))
-# # res = []
-# # for idx in range(min_len):
-# # res.append(sum(dt[idx] for dt in arr))
-# # return res
-# #
-# # pinfo.raw_lat = map(prepare, self.lat.per_vm())
-# # num_th = sum(map(len, pinfo.raw_lat))
-# # lat_avg = [val / num_th for val in agg_data(pinfo.raw_lat)]
-# # pinfo.lat_avg = data_property(lat_avg).average / 1000 # us to ms
-# #
-# # pinfo.lat_50, pinfo.lat_95 = self.get_lat_perc_50_95_multy()
-# # pinfo.lat = pinfo.lat_50
-# #
-# # pinfo.raw_bw = map(prepare, self.bw.per_vm())
-# # pinfo.raw_iops = map(prepare, self.iops.per_vm())
-# #
-# # if self.iops_sys is not None:
-# # pinfo.raw_iops_sys = map(prepare, self.iops_sys.per_vm())
-# # pinfo.iops_sys = data_property(agg_data(pinfo.raw_iops_sys))
-# # else:
-# # pinfo.raw_iops_sys = None
-# # pinfo.iops_sys = None
-# #
-# # fparams = self.get_params_from_fio_report()
-# # fio_report_bw = sum(fparams['flt_bw'])
-# # fio_report_iops = sum(fparams['flt_iops'])
-# #
-# # agg_bw = agg_data(pinfo.raw_bw)
-# # agg_iops = agg_data(pinfo.raw_iops)
-# #
-# # log_bw_avg = average(agg_bw)
-# # log_iops_avg = average(agg_iops)
-# #
-# # # update values to match average from fio report
-# # coef_iops = fio_report_iops / float(log_iops_avg)
-# # coef_bw = fio_report_bw / float(log_bw_avg)
-# #
-# # bw_log = data_property([val * coef_bw for val in agg_bw])
-# # iops_log = data_property([val * coef_iops for val in agg_iops])
-# #
-# # bw_report = data_property([fio_report_bw])
-# # iops_report = data_property([fio_report_iops])
-# #
-# # # When IOPS/BW per thread is too low
-# # # data from logs is rounded to match
-# # iops_per_th = sum(sum(pinfo.raw_iops, []), [])
-# # if average(iops_per_th) > 10:
-# # pinfo.iops = iops_log
-# # pinfo.iops2 = iops_report
-# # else:
-# # pinfo.iops = iops_report
-# # pinfo.iops2 = iops_log
-# #
-# # bw_per_th = sum(sum(pinfo.raw_bw, []), [])
-# # if average(bw_per_th) > 10:
-# # pinfo.bw = bw_log
-# # pinfo.bw2 = bw_report
-# # else:
-# # pinfo.bw = bw_report
-# # pinfo.bw2 = bw_log
-# #
-# # self._pinfo = pinfo
-# #
-# # return pinfo
-#
-# # class TestResult:
-# # """Hold all information for a given test - test info,
-# # sensors data and performance results for test period from all nodes"""
-# # run_id = None # type: int
-# # test_info = None # type: Any
-# # begin_time = None # type: int
-# # end_time = None # type: int
-# # sensors = None # Dict[Tuple[str, str, str], TimeSeries]
-# # performance = None # Dict[Tuple[str, str], TimeSeries]
-# #
-# # class TestResults:
-# # """
-# # this class describe test results
-# #
-# # config:TestConfig - test config object
-# # params:dict - parameters from yaml file for this test
-# # results:{str:MeasurementMesh} - test results object
-# # raw_result:Any - opaque object to store raw results
-# # run_interval:(float, float) - test tun time, used for sensors
-# # """
-# #
-# # def __init__(self,
-# # config: TestConfig,
-# # results: Dict[str, Any],
-# # raw_result: Any,
-# # run_interval: Tuple[float, float]) -> None:
-# # self.config = config
-# # self.params = config.params
-# # self.results = results
-# # self.raw_result = raw_result
-# # self.run_interval = run_interval
-# #
-# # def __str__(self) -> str:
-# # res = "{0}({1}):\n results:\n".format(
-# # self.__class__.__name__,
-# # self.summary())
-# #
-# # for name, val in self.results.items():
-# # res += " {0}={1}\n".format(name, val)
-# #
-# # res += " params:\n"
-# #
-# # for name, val in self.params.items():
-# # res += " {0}={1}\n".format(name, val)
-# #
-# # return res
-# #
-# # def summary(self) -> str:
-# # raise NotImplementedError()
-# # return ""
-# #
-# # def get_yamable(self) -> Any:
-# # raise NotImplementedError()
-# # return None
-#
-#
-#
-# # class MeasurementMatrix:
-# # """
-# # data:[[MeasurementResult]] - VM_COUNT x TH_COUNT matrix of MeasurementResult
-# # """
-# # def __init__(self, data, connections_ids):
-# # self.data = data
-# # self.connections_ids = connections_ids
-# #
-# # def per_vm(self):
-# # return self.data
-# #
-# # def per_th(self):
-# # return sum(self.data, [])
-#
-#
-# # class MeasurementResults:
-# # data = None # type: List[Any]
-# #
-# # def stat(self) -> StatProps:
-# # return data_property(self.data)
-# #
-# # def __str__(self) -> str:
-# # return 'TS([' + ", ".join(map(str, self.data)) + '])'
-# #
-# #
-# # class SimpleVals(MeasurementResults):
-# # """
-# # data:[float] - list of values
-# # """
-# # def __init__(self, data: List[float]) -> None:
-# # self.data = data
-# #
-# #
-# # class TimeSeriesValue(MeasurementResults):
-# # """
-# # data:[(float, float, float)] - list of (start_time, lenght, average_value_for_interval)
-# # odata: original values
-# # """
-# # def __init__(self, data: List[Tuple[float, float]]) -> None:
-# # assert len(data) > 0
-# # self.odata = data[:]
-# # self.data = [] # type: List[Tuple[float, float, float]]
-# #
-# # cstart = 0.0
-# # for nstart, nval in data:
-# # self.data.append((cstart, nstart - cstart, nval))
-# # cstart = nstart
-# #
-# # @property
-# # def values(self) -> List[float]:
-# # return [val[2] for val in self.data]
-# #
-# # def average_interval(self) -> float:
-# # return float(sum([val[1] for val in self.data])) / len(self.data)
-# #
-# # def skip(self, seconds) -> 'TimeSeriesValue':
-# # nres = []
-# # for start, ln, val in self.data:
-# # nstart = start + ln - seconds
-# # if nstart > 0:
-# # nres.append([nstart, val])
-# # return self.__class__(nres)
-# #
-# # def derived(self, tdelta) -> 'TimeSeriesValue':
-# # end = self.data[-1][0] + self.data[-1][1]
-# # tdelta = float(tdelta)
-# #
-# # ln = end / tdelta
-# #
-# # if ln - int(ln) > 0:
-# # ln += 1
-# #
-# # res = [[tdelta * i, 0.0] for i in range(int(ln))]
-# #
-# # for start, lenght, val in self.data:
-# # start_idx = int(start / tdelta)
-# # end_idx = int((start + lenght) / tdelta)
-# #
-# # for idx in range(start_idx, end_idx + 1):
-# # rstart = tdelta * idx
-# # rend = tdelta * (idx + 1)
-# #
-# # intersection_ln = min(rend, start + lenght) - max(start, rstart)
-# # if intersection_ln > 0:
-# # try:
-# # res[idx][1] += val * intersection_ln / tdelta
-# # except IndexError:
-# # raise
-# #
-# # return self.__class__(res)
-#
-#
-# def console_report_stage(ctx: TestRun) -> None:
-# # TODO(koder): load data from storage
-# raise NotImplementedError("...")
-# # first_report = True
-# # text_rep_fname = ctx.config.text_report_file
-# #
-# # with open(text_rep_fname, "w") as fd:
-# # for tp, data in ctx.results.items():
-# # if 'io' == tp and data is not None:
-# # rep_lst = []
-# # for result in data:
-# # rep_lst.append(
-# # IOPerfTest.format_for_console(list(result)))
-# # rep = "\n\n".join(rep_lst)
-# # elif tp in ['mysql', 'pgbench'] and data is not None:
-# # rep = MysqlTest.format_for_console(data)
-# # elif tp == 'omg':
-# # rep = OmgTest.format_for_console(data)
-# # else:
-# # logger.warning("Can't generate text report for " + tp)
-# # continue
-# #
-# # fd.write(rep)
-# # fd.write("\n")
-# #
-# # if first_report:
-# # logger.info("Text report were stored in " + text_rep_fname)
-# # first_report = False
-# #
-# # print("\n" + rep + "\n")
-#
-#
-# # def test_load_report_stage(cfg: Config, ctx: TestRun) -> None:
-# # load_rep_fname = cfg.load_report_file
-# # found = False
-# # for idx, (tp, data) in enumerate(ctx.results.items()):
-# # if 'io' == tp and data is not None:
-# # if found:
-# # logger.error("Making reports for more than one " +
-# # "io block isn't supported! All " +
-# # "report, except first are skipped")
-# # continue
-# # found = True
-# # report.make_load_report(idx, cfg['results'], load_rep_fname)
-# #
-# #
-#
-# # def html_report_stage(ctx: TestRun) -> None:
-# # TODO(koder): load data from storage
-# # raise NotImplementedError("...")
-# # html_rep_fname = cfg.html_report_file
-# # found = False
-# # for tp, data in ctx.results.items():
-# # if 'io' == tp and data is not None:
-# # if found or len(data) > 1:
-# # logger.error("Making reports for more than one " +
-# # "io block isn't supported! All " +
-# # "report, except first are skipped")
-# # continue
-# # found = True
-# # report.make_io_report(list(data[0]),
-# # cfg.get('comment', ''),
-# # html_rep_fname,
-# # lab_info=ctx.nodes)
-#
-# #
-# # def load_data_from_path(test_res_dir: str) -> Mapping[str, List[Any]]:
-# # files = get_test_files(test_res_dir)
-# # raw_res = yaml_load(open(files['raw_results']).read())
-# # res = collections.defaultdict(list)
-# #
-# # for tp, test_lists in raw_res:
-# # for tests in test_lists:
-# # for suite_name, suite_data in tests.items():
-# # result_folder = suite_data[0]
-# # res[tp].append(TOOL_TYPE_MAPPER[tp].load(suite_name, result_folder))
-# #
-# # return res
-# #
-# #
-# # def load_data_from_path_stage(var_dir: str, _, ctx: TestRun) -> None:
-# # for tp, vals in load_data_from_path(var_dir).items():
-# # ctx.results.setdefault(tp, []).extend(vals)
-# #
-# #
-# # def load_data_from(var_dir: str) -> Callable[[TestRun], None]:
-# # return functools.partial(load_data_from_path_stage, var_dir)
diff --git a/wally/result_classes.py b/wally/result_classes.py
index 62e74f0..43ae721 100644
--- a/wally/result_classes.py
+++ b/wally/result_classes.py
@@ -1,16 +1,13 @@
import abc
-import array
from typing import Dict, List, Any, Optional, Tuple, cast, Type, Iterator
-from collections import OrderedDict
import numpy
from scipy.stats.mstats_basic import NormaltestResult
-
from .suits.job import JobConfig
from .node_interfaces import IRPCNode
-from .common_types import Storable, IStorable
+from .common_types import Storable
from .utils import round_digits, Number
@@ -32,14 +29,20 @@
run_uuid: str,
nodes: List[IRPCNode],
remote_dir: str,
- idx: int) -> None:
+ idx: int,
+ keep_raw_files: bool) -> None:
self.test_type = test_type
self.params = params
self.run_uuid = run_uuid
self.nodes = nodes
self.nodes_ids = [node.node_id for node in nodes]
self.remote_dir = remote_dir
- self.storage_id = "{}_{}".format(self.test_type, idx)
+ self.keep_raw_files = keep_raw_files
+
+ if 'load' in self.params:
+ self.storage_id = "{}_{}_{}".format(self.test_type, self.params['load'], idx)
+ else:
+ self.storage_id = "{}_{}".format(self.test_type, idx)
def __eq__(self, o: object) -> bool:
if type(o) is not self.__class__:
@@ -57,27 +60,45 @@
suite_id: str = None,
job_id: str = None,
node_id: str = None,
- dev: str = None,
sensor: str = None,
+ dev: str = None,
+ metric: str = None,
tag: str = None) -> None:
self.suite_id = suite_id
self.job_id = job_id
self.node_id = node_id
- self.dev = dev
self.sensor = sensor
+ self.dev = dev
+ self.metric = metric
self.tag = tag
+ @property
+ def metric_fqdn(self) -> str:
+ return "{0.sensor}.{0.dev}.{0.metric}".format(self)
+
def __call__(self, **kwargs) -> 'DataSource':
dct = self.__dict__.copy()
dct.update(kwargs)
return self.__class__(**dct)
def __str__(self) -> str:
- return "{0.suite_id}.{0.job_id}/{0.node_id}/{0.dev}.{0.sensor}.{0.tag}".format(self)
+ return ("suite={0.suite_id},job={0.job_id},node={0.node_id}," +
+ "path={0.sensor}.{0.dev}.{0.metric},tag={0.tag}").format(self)
def __repr__(self) -> str:
return str(self)
+ @property
+ def tpl(self) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str],
+ Optional[str], Optional[str], Optional[str]]:
+ return self.suite_id, self.job_id, self.node_id, self.sensor, self.dev, self.metric, self.tag
+
+ def __eq__(self, o: object) -> bool:
+ return self.tpl == cast(DataSource, o).tpl
+
+ def __hash__(self) -> int:
+ return hash(self.tpl)
+
class TimeSeries:
"""Data series from sensor - either system sensor or from load generator tool (e.g. fio)"""
@@ -88,9 +109,9 @@
data: numpy.array,
times: numpy.array,
units: str,
+ source: DataSource,
time_units: str = 'us',
- second_axis_size: int = 1,
- source: DataSource = None) -> None:
+ raw_tag: str = 'txt') -> None:
# Sensor name. Typically DEV_NAME.METRIC
self.name = name
@@ -105,20 +126,16 @@
self.times = times
self.data = data
- # Not equal to 1 in case of 2d sensors, like latency, when each measurement is a histogram.
- self.second_axis_size = second_axis_size
-
# Raw sensor data (is provided). Like log file for fio iops/bw/lat.
self.raw = raw
-
+ self.raw_tag = raw_tag
self.source = source
def __str__(self) -> str:
res = "TS({}):\n".format(self.name)
res += " source={}\n".format(self.source)
res += " times_size={}\n".format(len(self.times))
- res += " data_size={}\n".format(len(self.data))
- res += " data_shape={}x{}\n".format(len(self.data) // self.second_axis_size, self.second_axis_size)
+ res += " data_shape={}\n".format(*self.data.shape)
return res
def __repr__(self) -> str:
@@ -139,18 +156,25 @@
self.perc_95 = None # type: float
self.perc_90 = None # type: float
self.perc_50 = None # type: float
+ self.perc_10 = None # type: float
+ self.perc_5 = None # type: float
+ self.perc_1 = None # type: float
self.min = None # type: Number
self.max = None # type: Number
# bin_center: bin_count
+ self.log_bins = False
self.bins_populations = None # type: numpy.array
- self.bins_mids = None # type: numpy.array
+
+ # bin edges, one more element that in bins_populations
+ self.bins_edges = None # type: numpy.array
+
self.data = data
def __str__(self) -> str:
res = ["{}(size = {}):".format(self.__class__.__name__, len(self.data))]
- for name in ["perc_50", "perc_90", "perc_95", "perc_99"]:
+ for name in ["perc_1", "perc_5", "perc_10", "perc_50", "perc_90", "perc_95", "perc_99"]:
res.append(" {} = {}".format(name, round_digits(getattr(self, name))))
res.append(" range {} {}".format(round_digits(self.min), round_digits(self.max)))
return "\n".join(res)
@@ -174,8 +198,7 @@
class HistoStatProps(StatProps):
"""Statistic properties for 2D timeseries with unknown data distribution and histogram as input value.
Used for latency"""
- def __init__(self, data: numpy.array, second_axis_size: int) -> None:
- self.second_axis_size = second_axis_size
+ def __init__(self, data: numpy.array) -> None:
StatProps.__init__(self, data)
@@ -196,6 +219,9 @@
res = ["NormStatProps(size = {}):".format(len(self.data)),
" distr = {} ~ {}".format(round_digits(self.average), round_digits(self.deviation)),
" confidence({0.confidence_level}) = {1}".format(self, round_digits(self.confidence)),
+ " perc_1 = {}".format(round_digits(self.perc_1)),
+ " perc_5 = {}".format(round_digits(self.perc_5)),
+ " perc_10 = {}".format(round_digits(self.perc_10)),
" perc_50 = {}".format(round_digits(self.perc_50)),
" perc_90 = {}".format(round_digits(self.perc_90)),
" perc_95 = {}".format(round_digits(self.perc_95)),
@@ -240,6 +266,10 @@
pass
@abc.abstractmethod
+ def load_sensor(self, ds: DataSource) -> TimeSeries:
+ pass
+
+ @abc.abstractmethod
def put_or_check_suite(self, suite: SuiteConfig) -> None:
pass
diff --git a/wally/run_test.py b/wally/run_test.py
index d3c68b6..3fd8e64 100755
--- a/wally/run_test.py
+++ b/wally/run_test.py
@@ -13,7 +13,7 @@
from .suits.all_suits import all_suits
from .test_run_class import TestRun
from .utils import StopTestError
-from .result_classes import TestSuiteConfig
+from .result_classes import SuiteConfig
from .hlstorage import ResultStorage
@@ -72,7 +72,7 @@
for node in ctx.nodes:
if node.rpc_log_file is not None:
nid = node.node_id
- path = "rpc_logs/" + nid
+ path = "rpc_logs/{}.txt".format(nid)
node.conn.server.flush_logs()
log = node.get_file_content(node.rpc_log_file)
if path in ctx.storage:
@@ -245,12 +245,13 @@
test_cls = all_suits[name]
remote_dir = ctx.config.default_test_local_folder.format(name=name, uuid=ctx.config.run_uuid)
- suite = TestSuiteConfig(test_cls.name,
- params=params,
- run_uuid=ctx.config.run_uuid,
- nodes=test_nodes,
- remote_dir=remote_dir,
- idx=suite_idx)
+ suite = SuiteConfig(test_cls.name,
+ params=params,
+ run_uuid=ctx.config.run_uuid,
+ nodes=test_nodes,
+ remote_dir=remote_dir,
+ idx=suite_idx,
+ keep_raw_files=ctx.config.keep_raw_files)
test_cls(storage=ResultStorage(ctx.storage),
suite=suite,
diff --git a/wally/sensors.py b/wally/sensors.py
index 54ae1ad..bcdb4e3 100644
--- a/wally/sensors.py
+++ b/wally/sensors.py
@@ -1,12 +1,15 @@
import array
import logging
-import collections
from typing import List, Dict, Tuple
+import numpy
+
from . import utils
from .test_run_class import TestRun
+from .result_classes import DataSource
from . import sensors_rpc_plugin
from .stage import Stage, StepOrder
+from .hlstorage import ResultStorage
plugin_fname = sensors_rpc_plugin.__file__.rsplit(".", 1)[0] + ".py"
SENSORS_PLUGIN_CODE = open(plugin_fname, "rb").read() # type: bytes
@@ -15,6 +18,28 @@
logger = logging.getLogger("wally")
+sensor_units = {
+ "system-cpu.idle_time": "ms",
+ "system-cpu.nice_processes": "",
+ "system-cpu.procs_blocked": "",
+ "system-cpu.procs_queue_x10": "",
+ "system-cpu.system_processes": "",
+ "system-cpu.user_processes": "",
+ "net-io.recv_bytes": "B",
+ "net-io.recv_packets": "",
+ "net-io.send_bytes": "B",
+ "net-io.send_packets": "",
+ "block-io.io_queue": "",
+ "block-io.io_time": "ms",
+ "block-io.reads_completed": "",
+ "block-io.rtime": "ms",
+ "block-io.sectors_read": "B",
+ "block-io.sectors_written": "B",
+ "block-io.writes_completed": "",
+ "block-io.wtime": "ms"
+}
+
+
# TODO(koder): in case if node has more than one role sensor settings might be incorrect
class StartSensorsStage(Stage):
priority = StepOrder.START_SENSORS
@@ -80,6 +105,7 @@
def collect_sensors_data(ctx: TestRun, stop: bool = False):
+ rstorage = ResultStorage(ctx.storage)
for node in ctx.nodes:
node_id = node.node_id
if node_id in ctx.sensors_run_on:
@@ -89,9 +115,16 @@
else:
func = node.conn.sensors.get_updates
- # TODO: data is unpacked/repacked here with no reason
+ # TODO: units should came along with data
for path, value in sensors_rpc_plugin.unpack_rpc_updates(func()):
- ctx.storage.append(value, "sensors/{}_{}".format(node_id, path))
+ if path == 'collected_at':
+ ds = DataSource(node_id=node_id, metric='collected_at')
+ units = 'us'
+ else:
+ sensor, dev, metric = path.split(".")
+ ds = DataSource(node_id=node_id, metric=metric, dev=dev, sensor=sensor)
+ units = sensor_units["{}.{}".format(sensor, metric)]
+ rstorage.append_sensor(numpy.array(value), ds, units)
class CollectSensorsStage(Stage):
diff --git a/wally/sensors_rpc_plugin.py b/wally/sensors_rpc_plugin.py
index 7400ab5..ffb0abd 100644
--- a/wally/sensors_rpc_plugin.py
+++ b/wally/sensors_rpc_plugin.py
@@ -40,7 +40,7 @@
pass
@classmethod
- def unpack_results(cls, device, metrics, data, typecode):
+ def unpack_results(cls, device, metric, data, typecode):
pass
def init(self):
@@ -74,7 +74,7 @@
return {key: (arr.typecode, arr.tostring()) for key, arr in res.items()}
@classmethod
- def unpack_results(cls, device, metrics, packed, typecode):
+ def unpack_results(cls, device, metric, packed, typecode):
arr = array.array(typecode)
if sys.version_info >= (3, 0, 0):
arr.frombytes(packed)
@@ -155,15 +155,17 @@
# 13 - time spent doing I/Os (ms)
# 14 - weighted time spent doing I/Os (ms)
+ SECTOR_SIZE = 512
+
io_values_pos = [
- (3, 'reads_completed', True),
- (5, 'sectors_read', True),
- (6, 'rtime', True),
- (7, 'writes_completed', True),
- (9, 'sectors_written', True),
- (10, 'wtime', True),
- (11, 'io_queue', False),
- (13, 'io_time', True)
+ (3, 'reads_completed', True, 1),
+ (5, 'sectors_read', True, SECTOR_SIZE),
+ (6, 'rtime', True, 1),
+ (7, 'writes_completed', True, 1),
+ (9, 'sectors_written', True, SECTOR_SIZE),
+ (10, 'wtime', True, 1),
+ (11, 'io_queue', False, 1),
+ (13, 'io_time', True, 1)
]
def __init__(self, *args, **kwargs):
@@ -188,8 +190,8 @@
if dev_name not in self.allowed_names:
continue
- for pos, name, aggregated in self.io_values_pos:
- vl = int(vals[pos])
+ for pos, name, aggregated, coef in self.io_values_pos:
+ vl = int(vals[pos]) * coef
if aggregated:
self.add_relative(dev_name, name, vl)
elif not init_rel:
@@ -484,8 +486,8 @@
return res
@classmethod
- def unpack_results(cls, device, metrics, packed, typecode):
- if metrics in ('historic', 'in_flight'):
+ def unpack_results(cls, device, metric, packed, typecode):
+ if metric in ('historic', 'in_flight'):
assert typecode is None
return packed
@@ -650,8 +652,8 @@
offset_map = {}
for sensor_name, sensor in sdata.sensors.items():
- for (device, metrics), (typecode, val) in sensor.get_updates().items():
- offset_map["{}.{}.{}".format(sensor_name, device, metrics)] = (len(blob), len(val), typecode)
+ for (device, metric), (typecode, val) in sensor.get_updates().items():
+ offset_map["{}.{}.{}".format(sensor_name, device, metric)] = (len(blob), len(val), typecode)
blob += val
collected_at = sdata.collected_at
diff --git a/wally/statistic.py b/wally/statistic.py
index b80fb22..4ebfccc 100644
--- a/wally/statistic.py
+++ b/wally/statistic.py
@@ -1,8 +1,7 @@
import math
import logging
import itertools
-import statistics
-from typing import List, Callable, Iterable, cast
+from typing import List, Callable, Iterable, cast, Tuple
import numpy
from scipy import stats, optimize
@@ -40,7 +39,8 @@
res.max = data[-1]
res.min = data[0]
- res.perc_50, res.perc_90, res.perc_99, res.perc_99 = numpy.percentile(data, q=[50., 90., 95., 99.])
+ pcs = numpy.percentile(data, q=[1.0, 5.0, 10., 50., 90., 95., 99.])
+ res.perc_1, res.perc_5, res.perc_10, res.perc_50, res.perc_90, res.perc_95, res.perc_99 = pcs
if len(data) >= MIN_VALUES_FOR_CONFIDENCE:
res.confidence = stats.sem(data) * \
@@ -50,8 +50,8 @@
res.confidence = None
res.confidence_level = None
- res.bins_populations, bins_edges = numpy.histogram(data, bins=bins_count)
- res.bins_mids = (bins_edges[:-1] + bins_edges[1:]) / 2
+ res.bins_populations, res.bins_edges = numpy.histogram(data, bins=bins_count)
+ res.bins_edges = res.bins_edges[:-1]
try:
res.normtest = stats.mstats.normaltest(data)
@@ -64,79 +64,93 @@
return res
+# update this code
+def rebin_histogram(bins_populations: numpy.array,
+ bins_edges: numpy.array,
+ new_bins_count: int,
+ left_tail_idx: int = None,
+ right_tail_idx: int = None,
+ log_bins: bool = False) -> Tuple[numpy.array, numpy.array]:
+ # rebin large histogram into smaller with new_bins bins, linearly distributes across
+ # left_tail_idx:right_tail_idx range
+
+ assert len(bins_populations.shape) == 1
+ assert len(bins_edges.shape) == 1
+ assert bins_edges.shape[0] == bins_populations.shape[0]
+
+ if left_tail_idx is None:
+ min_val = bins_edges[0]
+ else:
+ min_val = bins_edges[left_tail_idx]
+
+ if right_tail_idx is None:
+ max_val = bins_edges[-1]
+ else:
+ max_val = bins_edges[right_tail_idx]
+
+ if log_bins:
+ assert min_val > 1E-3
+ step = (max_val / min_val) ** (1 / new_bins_count)
+ new_bins_edges = min_val * (step ** numpy.arange(new_bins_count)) # type: numpy.array
+ else:
+ new_bins_edges = numpy.linspace(min_val, max_val, new_bins_count + 1, dtype='float')[:-1] # type: numpy.array
+
+ old_bins_pos = numpy.searchsorted(new_bins_edges, bins_edges, side='right')
+ new_bins = numpy.zeros(new_bins_count, dtype=int) # type: numpy.array
+
+ # last source bin can't be split
+ # TODO: need to add assert for this
+ new_bins[-1] += bins_populations[-1]
+ bin_sizes = bins_edges[1:] - bins_edges[:-1]
+
+ # correct position to get bin idx from edge idx
+ old_bins_pos -= 1
+ old_bins_pos[old_bins_pos < 0] = 0
+ new_bins_sizes = new_bins_edges[1:] - new_bins_edges[:-1]
+
+ for population, begin, end, bsize in zip(bins_populations[:-1], old_bins_pos[:-1], old_bins_pos[1:], bin_sizes):
+ if begin == end:
+ new_bins[begin] += population
+ else:
+ density = population / bsize
+ for curr_box in range(begin, end):
+ cnt = min(int(new_bins_sizes[begin] * density + 0.5), population)
+ new_bins[begin] += cnt
+ population -= cnt
+
+ return new_bins, new_bins_edges
+
+
def calc_histo_stat_props(ts: TimeSeries,
bins_edges: numpy.array,
- bins_count: int,
- min_valuable: float = 0.0001) -> HistoStatProps:
- data = numpy.array(ts.data, dtype='int')
- data.shape = [len(ts.data) // ts.second_axis_size, ts.second_axis_size] # type: ignore
-
- res = HistoStatProps(ts.data, ts.second_axis_size)
+ rebins_count: int,
+ tail: float = 0.005) -> HistoStatProps:
+ log_bins = False
+ res = HistoStatProps(ts.data)
# summ across all series
- aggregated = numpy.sum(data, axis=0, dtype='int')
- total = numpy.sum(aggregated)
-
- # minimal value used for histo
- min_val_on_histo = total * min_valuable
+ aggregated = ts.data.sum(axis=0, dtype='int')
+ total = aggregated.sum()
# percentiles levels
- expected = [total * 0.5, total * 0.9, total * 0.95, total * 0.99]
- percentiles = []
+ expected = list(numpy.array([0.01, 0.05, 0.1, 0.5, 0.9, 0.95, 0.99]) * total)
+ cumsum = numpy.cumsum(aggregated)
- # all indexes, where values greater than min_val_on_histo
- valuable_idxs = []
+ percentiles_bins = numpy.searchsorted(cumsum, expected)
+ percentiles = bins_edges[percentiles_bins]
+ res.perc_1, res.perc_5, res.perc_10, res.perc_50, res.perc_90, res.perc_95, res.perc_99 = percentiles
- curr_summ = 0
- non_zero = aggregated.nonzero()[0]
-
- # calculate percentiles and valuable_indexes
- for idx in non_zero:
- val = aggregated[idx]
- while expected and curr_summ + val >= expected[0]:
- percentiles.append(bins_edges[idx])
- del expected[0]
-
- curr_summ += val
-
- if val >= min_val_on_histo:
- valuable_idxs.append(idx)
-
- res.perc_50, res.perc_90, res.perc_95, res.perc_99 = percentiles
+ # don't show tail ranges on histogram
+ left_tail_idx, right_tail_idx = numpy.searchsorted(cumsum, [tail * total, (1 - tail) * total])
# minimax and maximal non-zero elements
+ non_zero = numpy.nonzero(aggregated)[0]
res.min = bins_edges[aggregated[non_zero[0]]]
res.max = bins_edges[non_zero[-1] + (1 if non_zero[-1] != len(bins_edges) else 0)]
- # minimal and maximal valueble evelemts
- val_idx_min = valuable_idxs[0]
- val_idx_max = valuable_idxs[-1]
-
- raw_bins_populations = aggregated[val_idx_min: val_idx_max + 1]
- raw_bins_edges = bins_edges[val_idx_min: val_idx_max + 2]
- raw_bins_mids = cast(numpy.array, (raw_bins_edges[1:] + raw_bins_edges[:-1]) / 2)
-
- step = (raw_bins_mids[-1] + raw_bins_mids[0]) / bins_count
- next = raw_bins_mids[0]
-
- # aggregate raw histogram with many bins into result histogram with bins_count bins
- cidx = 0
- bins_populations = []
- bins_mids = []
-
- while cidx < len(raw_bins_mids):
- next += step
- bin_population = 0
-
- while cidx < len(raw_bins_mids) and raw_bins_mids[cidx] <= next:
- bin_population += raw_bins_populations[cidx]
- cidx += 1
-
- bins_populations.append(bin_population)
- bins_mids.append(next - step / 2)
-
- res.bins_populations = numpy.array(bins_populations, dtype='int')
- res.bins_mids = numpy.array(bins_mids, dtype='float32')
+ res.log_bins = False
+ res.bins_populations, res.bins_edges = rebin_histogram(aggregated, bins_edges, rebins_count,
+ left_tail_idx, right_tail_idx)
return res
@@ -193,6 +207,105 @@
return func_line(tpl_final, numpy.array(xnew))
+def moving_average(data: numpy.array, window: int) -> numpy.array:
+ cumsum = numpy.cumsum(data)
+ cumsum[window:] = cumsum[window:] - cumsum[:-window]
+ return cumsum[window - 1:] / window
+
+
+def moving_dev(data: numpy.array, window: int) -> numpy.array:
+ cumsum = numpy.cumsum(data)
+ cumsum2 = numpy.cumsum(data ** 2)
+ cumsum[window:] = cumsum[window:] - cumsum[:-window]
+ cumsum2[window:] = cumsum2[window:] - cumsum2[:-window]
+ return ((cumsum2[window - 1:] - cumsum[window - 1:] ** 2 / window) / (window - 1)) ** 0.5
+
+
+def find_ouliers(data: numpy.array,
+ center_range: Tuple[int, int] = (25, 75),
+ cut_range: float = 3) -> numpy.array:
+ v1, v2 = numpy.percentile(data, center_range)
+ return numpy.abs(data - (v1 + v2) / 2) > ((v2 - v1) / 2 * cut_range)
+
+
+def find_ouliers_ts(data: numpy.array,
+ windows_size: int = 30,
+ center_range: Tuple[int, int] = (25, 75),
+ cut_range: float = 3) -> numpy.array:
+ outliers = numpy.empty(data.shape, dtype=bool)
+
+ if len(data) < windows_size:
+ outliers[:] = False
+ return outliers
+
+ begin_idx = 0
+ if len(data) < windows_size * 2:
+ end_idx = (len(data) % windows_size) // 2 + windows_size
+ else:
+ end_idx = len(data)
+
+ while True:
+ cdata = data[begin_idx: end_idx]
+ outliers[begin_idx: end_idx] = find_ouliers(cdata, center_range, cut_range)
+ begin_idx = end_idx
+
+ if end_idx == len(data):
+ break
+
+ end_idx += windows_size
+ if len(data) - end_idx < windows_size:
+ end_idx = len(data)
+
+ return outliers
+
+
+def hist_outliers_nd(bin_populations: numpy.array,
+ bin_centers: numpy.array,
+ center_range: Tuple[int, int] = (25, 75),
+ cut_range: float = 3.0) -> Tuple[int, int]:
+ assert len(bin_populations) == len(bin_centers)
+ total_count = bin_populations.sum()
+
+ perc25 = total_count / 100.0 * center_range[0]
+ perc75 = total_count / 100.0 * center_range[1]
+
+ perc25_idx, perc75_idx = numpy.searchsorted(numpy.cumsum(bin_populations), [perc25, perc75])
+ middle = (bin_centers[perc75_idx] + bin_centers[perc25_idx]) / 2
+ r = (bin_centers[perc75_idx] - bin_centers[perc25_idx]) / 2
+
+ lower_bound = middle - r * cut_range
+ upper_bound = middle + r * cut_range
+
+ lower_cut_idx, upper_cut_idx = numpy.searchsorted(bin_centers, [lower_bound, upper_bound])
+ return lower_cut_idx, upper_cut_idx
+
+
+def hist_outliers_perc(bin_populations: numpy.array,
+ bounds_perc: Tuple[float, float] = (0.01, 0.99)) -> Tuple[int, int]:
+ assert len(bin_populations.shape) == 1
+ total_count = bin_populations.sum()
+ lower_perc = total_count * bounds_perc[0]
+ upper_perc = total_count * bounds_perc[1]
+ return numpy.searchsorted(numpy.cumsum(bin_populations), [lower_perc, upper_perc])
+
+
+def ts_hist_outliers_perc(bin_populations: numpy.array,
+ window_size: int = 10,
+ bounds_perc: Tuple[float, float] = (0.01, 0.99)) -> Tuple[int, int]:
+ assert len(bin_populations.shape) == 2
+
+ points = list(range(0, len(bin_populations), window_size))
+ if len(bin_populations) % window_size != 0:
+ points.append(points[-1] + window_size)
+
+ ranges = []
+ for begin, end in zip(points[:-1], points[1:]):
+ window_hist = bin_populations[begin:end].sum(axis=0)
+ ranges.append(hist_outliers_perc(window_hist, bounds_perc=bounds_perc))
+
+ return min(i[0] for i in ranges), max(i[1] for i in ranges)
+
+
# TODO: revise next
# def difference(y, ynew):
# """returns average and maximum relative and
diff --git a/wally/storage.py b/wally/storage.py
index 3e8bbab..c8edf5d 100644
--- a/wally/storage.py
+++ b/wally/storage.py
@@ -5,7 +5,6 @@
import os
import re
import abc
-import array
import shutil
import sqlite3
import logging
@@ -16,9 +15,10 @@
from yaml import CLoader as Loader, CDumper as Dumper # type: ignore
except ImportError:
from yaml import Loader, Dumper # type: ignore
-
+import numpy
from .common_types import IStorable
+from .utils import shape2str, str2shape
logger = logging.getLogger("wally")
@@ -214,6 +214,10 @@
create_on_fail = True
mode = "rb+"
os.makedirs(os.path.dirname(jpath), exist_ok=True)
+ elif "ct" == mode:
+ create_on_fail = True
+ mode = "rt+"
+ os.makedirs(os.path.dirname(jpath), exist_ok=True)
else:
create_on_fail = False
@@ -222,7 +226,11 @@
except IOError:
if not create_on_fail:
raise
- fd = open(jpath, "wb")
+
+ if 't' in mode:
+ fd = open(jpath, "wt")
+ else:
+ fd = open(jpath, "wb")
return cast(IO[bytes], fd)
@@ -280,12 +288,12 @@
pass
+csv_file_encoding = 'ascii'
+
+
class Storage:
"""interface for storage"""
- typechar_pad_size = 16
- typepad = bytes(0 for i in range(typechar_pad_size - 1))
-
def __init__(self, fs_storage: ISimpleStorage, db_storage: ISimpleStorage, serializer: ISerializer) -> None:
self.fs = fs_storage
self.db = db_storage
@@ -346,43 +354,71 @@
def get_fd(self, path: str, mode: str = "r") -> IO:
return self.fs.get_fd(path, mode)
- def put_array(self, value: array.array, *path: str) -> None:
- typechar = value.typecode.encode('ascii')
- assert len(typechar) == 1
- with self.get_fd("/".join(path), "wb") as fd:
- fd.write(typechar + self.typepad)
- value.tofile(fd) # type: ignore
+ def put_array(self, header: List[str], value: numpy.array, *path: str) -> None:
+ for val in header:
+ assert isinstance(val, str) and ',' not in val, \
+ "Can't convert {!r} to array header, as it's values contains comma".format(header)
- def get_array(self, *path: str) -> array.array:
+ fpath = "/".join(path)
+ with self.get_fd(fpath, "wb") as fd:
+ self.do_append(fd, header, value, fpath)
+
+ def get_array(self, *path: str) -> Tuple[List[str], numpy.array]:
path_s = "/".join(path)
with self.get_fd(path_s, "rb") as fd:
- fd.seek(0, os.SEEK_END)
- size = fd.tell() - self.typechar_pad_size
- fd.seek(0, os.SEEK_SET)
- typecode = chr(fd.read(self.typechar_pad_size)[0])
- res = array.array(typecode)
- assert size % res.itemsize == 0, "Storage object at path {} contains no array of {} or corrupted."\
- .format(path_s, typecode)
- res.fromfile(fd, size // res.itemsize) # type: ignore
- return res
+ header = fd.readline().decode(csv_file_encoding).rstrip().split(",")
+ type_code, second_axis = header[-2:]
+ res = numpy.genfromtxt(fd, dtype=type_code, delimiter=',')
- def append(self, value: array.array, *path: str) -> None:
- typechar = value.typecode.encode('ascii')
- assert len(typechar) == 1
- expected_typeheader = typechar + self.typepad
- with self.get_fd("/".join(path), "cb") as fd:
+ if '0' == second_axis:
+ res.shape = (len(res),)
+
+ return header[:-2], res
+
+ def append(self, header: List[str], value: numpy.array, *path: str) -> None:
+ for val in header:
+ assert isinstance(val, str) and ',' not in val, \
+ "Can't convert {!r} to array header, as it's values contains comma".format(header)
+
+ fpath = "/".join(path)
+ with self.get_fd(fpath, "cb") as fd:
+ self.do_append(fd, header, value, fpath, maybe_append=True)
+
+ def do_append(self, fd, header: List[str], value: numpy.array, path: str, fmt="%lu",
+ maybe_append: bool = False) -> None:
+
+ if len(value.shape) == 1:
+ second_axis = 0
+ else:
+ second_axis = value.shape[1]
+ header += [value.dtype.name, str(second_axis)]
+
+ write_header = False
+
+ if maybe_append:
fd.seek(0, os.SEEK_END)
if fd.tell() != 0:
fd.seek(0, os.SEEK_SET)
- real_typecode = fd.read(self.typechar_pad_size)
- if real_typecode[0] != expected_typeheader[0]:
- logger.error("Try to append array with typechar %r to array with typechar %r at path %r",
- value.typecode, typechar, "/".join(path))
- raise StopIteration()
+ # check header match
+ curr_header = fd.readline().decode(csv_file_encoding).rstrip().split(",")
+ assert header == curr_header, \
+ "Path {!r}. Expected header ({!r}) and current header ({!r}) don't match"\
+ .format(path, header, curr_header)
fd.seek(0, os.SEEK_END)
else:
- fd.write(expected_typeheader)
- value.tofile(fd) # type: ignore
+ write_header = True
+ else:
+ write_header = True
+
+ if write_header:
+ fd.write((",".join(header) + "\n").encode(csv_file_encoding))
+
+ if len(value.shape) == 1:
+ # make array vertical to simplify reading
+ vw = value.view().reshape((value.shape[0], 1))
+ else:
+ vw = value
+ numpy.savetxt(fd, vw, delimiter=',', newline="\n", fmt=fmt)
def load_list(self, obj_class: Type[ObjClass], *path: str) -> List[ObjClass]:
path_s = "/".join(path)
diff --git a/wally/storage_structure.yaml b/wally/storage_structure.yaml
index 8493e29..db632c0 100644
--- a/wally/storage_structure.yaml
+++ b/wally/storage_structure.yaml
@@ -15,24 +15,8 @@
fuel_version: List[int] # FUEL master node version
fuel_os_creds: OSCreds # openstack creds, discovered from fuel (or None)
openstack_openrc: OSCreds # openrc used for openstack cluster
-
-info:
- comment : str # run comment
- run_uuid : str # run uuid
- run_time : float # unix time when test first starts
-
-'results/{suite}_{profile}_{id}':
- config: TestSuiteConfig # test job(iteration) input config, {id} is id of first job in suite
- '{descr}_{id}':
- config: TestJobConfig # test job(iteration) input config
-
- # dev in next line is tool name - fio/vdbench/....
- '{node}_{dev}.{metric_name}:raw' : bytes # raw log, where name from {'bw', 'iops', 'lat', ..}
- '{node}_{dev}.{metric_name}:stat' : StatProps # type of props detected by content
- '{node}_{dev}.{metric_name}': List[uint64] # measurements data concatenated with collect times in
- # microseconds from unix epoch and typechars
-'sensors/{node}_{sensor}.{dev}.{metric_name}': typechar + array[uint64] # sensor values
-'sensors/{node}_{sensor}.{dev}.{metric_name}:stat': StatProps # statistic data
-'sensors/{node}_collected_at': typechar + array[uint64] # collection time
-
-'rpc_logs/{node}' : bytes # rpc server log from node
+'results/{suite}.info.yaml': SuiteConfig # test job(iteration) input config, {id} is id of first job in suite
+'results/{suite}.{job}/{node}.{loader}.{metric}.{tag}':
+'sensors/{node}_{sensor}.{dev}.{metric_name}.{tag}': # sensor values
+'sensors/{node}_collected_at.csv':
+'rpc_logs/{node}.txt' : bytes # rpc server log from node
diff --git a/wally/suits/io/fio.py b/wally/suits/io/fio.py
index bf2e6b3..77d7a75 100644
--- a/wally/suits/io/fio.py
+++ b/wally/suits/io/fio.py
@@ -1,10 +1,10 @@
-import array
import os.path
import logging
-from typing import cast, Any, Tuple, List
+from typing import cast, Any, List, Union
+
+import numpy
import wally
-
from ...utils import StopTestError, ssize2b, b2ssize
from ...node_interfaces import IRPCNode
from ...node_utils import get_os
@@ -36,7 +36,7 @@
self.use_system_fio = get('use_system_fio', False) # type: bool
self.use_sudo = get("use_sudo", True) # type: bool
self.force_prefill = get('force_prefill', False) # type: bool
-
+ self.skip_prefill = get('skip_prefill', False) # type: bool
self.load_profile_name = self.suite.params['load'] # type: str
if os.path.isfile(self.load_profile_name):
@@ -71,6 +71,11 @@
self.file_size = list(sizes)[0]
logger.info("Detected test file size is %sB", b2ssize(self.file_size))
+ if self.file_size % (4 * 1024 ** 2) != 0:
+ tail = self.file_size % (4 * 1024 ** 2)
+ logger.warning("File size is not proportional to 4M, %sb at the end will not be used for test",
+ str(tail // 1024) + "Kb" if tail > 1024 else str(tail) + "b")
+ self.file_size -= self.file_size % (4 * 1024 ** 2)
self.load_params['FILESIZE'] = self.file_size
else:
self.file_size = ssize2b(self.load_params['FILESIZE'])
@@ -107,16 +112,18 @@
self.install_utils(node)
- mb = int(self.file_size / 1024 ** 2)
- logger.info("Filling test file %s on node %s with %sMiB of random data", self.file_name, node.info, mb)
- is_prefilled, fill_bw = node.conn.fio.fill_file(self.file_name, mb,
- force=self.force_prefill,
- fio_path=self.fio_path)
-
- if not is_prefilled:
- logger.info("Test file on node %s is already prefilled", node.info)
- elif fill_bw is not None:
- logger.info("Initial fio fill bw is %s MiBps for %s", fill_bw, node.info)
+ if self.skip_prefill:
+ logger.info("Prefill is skipped due to 'skip_prefill' set to true")
+ else:
+ mb = int(self.file_size / 1024 ** 2)
+ logger.info("Filling test file %s on node %s with %sMiB of random data", self.file_name, node.info, mb)
+ is_prefilled, fill_bw = node.conn.fio.fill_file(self.file_name, mb,
+ force=self.force_prefill,
+ fio_path=self.fio_path)
+ if not is_prefilled:
+ logger.info("Test file on node %s is already prefilled", node.info)
+ elif fill_bw is not None:
+ logger.info("Initial fio fill bw is %s MiBps for %s", fill_bw, node.info)
def install_utils(self, node: IRPCNode) -> None:
os_info = get_os(node)
@@ -170,16 +177,16 @@
path = DataSource(suite_id=self.suite.storage_id,
job_id=job.storage_id,
node_id=node.node_id,
- dev='fio',
- sensor='stdout',
+ sensor='fio',
+ dev=None,
+ metric='stdout',
tag='json')
-
self.storage.put_extra(fio_out, path)
node.conn.fs.unlink(self.remote_output_file)
files = [name for name in node.conn.fs.listdir(self.exec_folder)]
result = []
- for name, file_path in get_log_files(cast(FioJobConfig, job)):
+ for name, file_path, units in get_log_files(cast(FioJobConfig, job)):
log_files = [fname for fname in files if fname.startswith(file_path)]
if len(log_files) != 1:
logger.error("Found %s files, match log pattern %s(%s) - %s",
@@ -196,8 +203,10 @@
logger.exception("Error during parse %s fio log file - can't decode usint UTF8", name)
raise StopTestError()
- parsed = array.array('L' if name == 'lat' else 'Q')
- times = array.array('Q')
+ # TODO: fix units, need to get array type from stream
+
+ parsed = [] # type: List[Union[List[int], int]]
+ times = []
for idx, line in enumerate(log_data):
line = line.strip()
@@ -214,19 +223,23 @@
.format(expected_lat_bins, len(vals), time_ms_s))
raise StopTestError()
- parsed.extend(vals)
+ parsed.append(vals)
else:
parsed.append(int(val_s.strip()))
except ValueError:
logger.exception("Error during parse %s fio log file in line %s: %r", name, idx, line)
raise StopTestError()
+ if not self.suite.keep_raw_files:
+ raw_result = None
+
result.append(TimeSeries(name=name,
raw=raw_result,
- second_axis_size=expected_lat_bins if name == 'lat' else 1,
- data=parsed,
- times=times,
- source=path(sensor=name, tag=None)))
+ data=numpy.array(parsed, dtype='uint64'),
+ units=units,
+ times=numpy.array(times, dtype='uint64'),
+ time_units='ms',
+ source=path(metric=name, tag='csv')))
return result
def format_for_console(self, data: Any) -> str:
diff --git a/wally/suits/io/fio_job.py b/wally/suits/io/fio_job.py
index 0f55e91..2d8d78a 100644
--- a/wally/suits/io/fio_job.py
+++ b/wally/suits/io/fio_job.py
@@ -53,15 +53,26 @@
@property
def long_summary(self) -> str:
"""Readable long summary for management and deployment engineers"""
- res = "{0[sync_mode_long]} {0[oper]} {1}".format(self, b2ssize(self['bsize'] * 1024))
+ res = "{0[oper]}, {0.sync_mode_long}, block size {1}B".format(self, b2ssize(self['bsize'] * 1024))
if self['qd'] is not None:
- res += " QD = " + str(self['qd'])
+ res += ", QD = " + str(self['qd'])
if self['thcount'] not in (1, None):
- res += " threads={0[thcount]}".format(self)
+ res += ", threads={0[thcount]}".format(self)
if self['write_perc'] is not None:
- res += " write_perc={0[write_perc]}%".format(self)
+ res += ", write_perc={0[write_perc]}%".format(self)
return res
+ def copy(self, **kwargs: Dict[str, Any]) -> 'FioJobParams':
+ np = self.params.copy()
+ np.update(kwargs)
+ return self.__class__(**np)
+
+ @property
+ def char_tpl(self) -> Tuple[Union[str, int], ...]:
+ mint = lambda x: -10000000000 if x is None else int(x)
+ return self['oper'], mint(self['bsize']), self['sync_mode'], \
+ mint(self['thcount']), mint(self['qd']), mint(self['write_perc'])
+
class FioJobConfig(JobConfig):
"""Fio job configuration"""
@@ -157,7 +168,7 @@
return len(list(self.required_vars())) == 0
def __str__(self) -> str:
- res = "[{0}]\n".format(self.params.summary)
+ res = "[{0}]\n".format(self.summary)
for name, val in self.vals.items():
if name.startswith('_') or name == name.upper():
@@ -180,4 +191,6 @@
@classmethod
def fromraw(cls, data: Dict[str, Any]) -> 'FioJobConfig':
data['vals'] = OrderedDict(data['vals'])
+ data['_sync_mode'] = None
+ data['_params'] = None
return cast(FioJobConfig, super().fromraw(data))
diff --git a/wally/suits/io/fio_task_parser.py b/wally/suits/io/fio_task_parser.py
index c1b4bc3..bdcec23 100644
--- a/wally/suits/io/fio_task_parser.py
+++ b/wally/suits/io/fio_task_parser.py
@@ -288,19 +288,18 @@
return fio_config_parse(fio_config_lexer(source, fname))
-def get_log_files(sec: FioJobConfig, iops: bool = False) -> List[Tuple[str, str]]:
- res = [] # type: List[Tuple[str, str]]
+def get_log_files(sec: FioJobConfig, iops: bool = False) -> Iterator[Tuple[str, str, str]]:
+ res = [] # type: List[Tuple[str, str, str]]
- keys = [('write_bw_log', 'bw'), ('write_hist_log', 'lat')]
+ keys = [('write_bw_log', 'bw', 'kibps'),
+ ('write_hist_log', 'lat', 'us')]
if iops:
- keys.append(('write_iops_log', 'iops'))
+ keys.append(('write_iops_log', 'iops', 'iops'))
- for key, name in keys:
+ for key, name, units in keys:
log = sec.vals.get(key)
if log is not None:
- res.append((name, log))
-
- return res
+ yield (name, log, units)
def fio_cfg_compile(source: str, fname: str, test_params: FioParams) -> Iterator[FioJobConfig]:
diff --git a/wally/suits/io/hdd.cfg b/wally/suits/io/hdd.cfg
index 95c8cec..eff64cd 100644
--- a/wally/suits/io/hdd.cfg
+++ b/wally/suits/io/hdd.cfg
@@ -1,44 +1,23 @@
[global]
-include defaults.cfg
+include defaults_qd.cfg
-# NUMJOBS={% 1, 5, 10, 15, 20, 30, 40, 80 %}
-
-NUMJOBS={% 1, 5, 10, 15, 25, 40 %}
-
-ramp_time=30
-runtime=120
-
+QD={% 1, 2, 4, 8, 16, 32, 64 %}
+runtime=300
direct=1
# ---------------------------------------------------------------------
-# check different thread count, sync mode. (latency, iops) = func(th_count)
+# check different thread count, direct read mode. (latency, iops) = func(QD)
# ---------------------------------------------------------------------
[hdd_{TEST_SUMM}]
blocksize=4k
-rw=randwrite
-sync=1
-numjobs={NUMJOBS}
+rw={% randread, randwrite %}
+iodepth={QD}
# ---------------------------------------------------------------------
-# check different thread count, direct read mode. (latency, iops) = func(th_count)
-# also check iops for randread
-# ---------------------------------------------------------------------
-[hdd_{TEST_SUMM}]
-blocksize=4k
-rw=randread
-numjobs={NUMJOBS}
-
-# ---------------------------------------------------------------------
-# No reason for th count > 1 in case of sequantial operations
+# No reason for QD > 1 in case of sequential operations
# ot they became random
# ---------------------------------------------------------------------
[hdd_{TEST_SUMM}]
blocksize=1m
rw={% read, write %}
-
-# ---------------------------------------------------------------------
-# check IOPS randwrite.
-# ---------------------------------------------------------------------
-[hdd_{TEST_SUMM}]
-blocksize=4k
-rw=randwrite
+iodepth=1
diff --git a/wally/suits/itest.py b/wally/suits/itest.py
index ac9e1c1..9aa4ce6 100644
--- a/wally/suits/itest.py
+++ b/wally/suits/itest.py
@@ -2,13 +2,13 @@
import time
import logging
import os.path
-from typing import Any, List, Optional, Callable, Tuple, Iterable, cast
+from typing import Any, List, Optional, Callable, Iterable, cast
-from concurrent.futures import ThreadPoolExecutor, wait, Future
+from concurrent.futures import ThreadPoolExecutor, wait
from ..utils import StopTestError, get_time_interval_printable_info
from ..node_interfaces import IRPCNode
-from ..result_classes import TestSuiteConfig, TestJobConfig, JobMetrics, TimeSeries, IResultStorage
+from ..result_classes import SuiteConfig, JobConfig, TimeSeries, IResultStorage
logger = logging.getLogger("wally")
@@ -24,7 +24,7 @@
retry_time = 30
job_config_cls = None # type: type
- def __init__(self, storage: IResultStorage, suite: TestSuiteConfig, on_idle: Callable[[], None] = None) -> None:
+ def __init__(self, storage: IResultStorage, suite: SuiteConfig, on_idle: Callable[[], None] = None) -> None:
self.suite = suite
self.stop_requested = False
self.sorted_nodes_ids = sorted(node.node_id for node in self.suite.nodes)
@@ -57,16 +57,16 @@
def __init__(self, *args, **kwargs) -> None:
PerfTest.__init__(self, *args, **kwargs)
- self.job_configs = None # type: List[TestJobConfig]
+ self.job_configs = None # type: List[JobConfig]
@abc.abstractmethod
- def get_expected_runtime(self, iter_cfg: TestJobConfig) -> Optional[int]:
+ def get_expected_runtime(self, iter_cfg: JobConfig) -> Optional[int]:
pass
- def get_not_done_jobs(self) -> Iterable[TestJobConfig]:
+ def get_not_done_jobs(self) -> Iterable[JobConfig]:
jobs_map = {job.storage_id: job for job in self.job_configs}
already_in_storage = set()
- for db_config in cast(List[TestJobConfig], self.storage.iter_job(self.suite)):
+ for db_config in cast(List[JobConfig], self.storage.iter_job(self.suite)):
if db_config.storage_id in jobs_map:
job = jobs_map[db_config.storage_id]
if job != db_config:
@@ -113,7 +113,7 @@
for job in not_in_storage:
results = [] # type: List[TimeSeries]
for retry_idx in range(self.max_retry):
- logger.debug("Prepare job %s", job.summary)
+ logger.debug("Prepare job %s", job.params.summary)
# prepare nodes for new iterations
wait([pool.submit(self.prepare_iteration, node, job) for node in self.suite.nodes])
@@ -172,8 +172,7 @@
self.name, job.summary,
max_start_time - min_start_time, self.max_time_diff)
- job.reliable_info_starts_at = max_start_time
- job.reliable_info_stops_at = min_stop_time
+ job.reliable_info_range = (int(max_start_time), int(min_stop_time))
self.storage.put_job(self.suite, job)
self.storage.sync()
@@ -186,11 +185,11 @@
pass
@abc.abstractmethod
- def prepare_iteration(self, node: IRPCNode, job: TestJobConfig) -> None:
+ def prepare_iteration(self, node: IRPCNode, job: JobConfig) -> None:
pass
@abc.abstractmethod
- def run_iteration(self, node: IRPCNode, job: TestJobConfig) -> List[TimeSeries]:
+ def run_iteration(self, node: IRPCNode, job: JobConfig) -> List[TimeSeries]:
pass
@@ -204,7 +203,7 @@
# TODO: fix job_configs field
raise NotImplementedError("Fix job configs")
- def get_expected_runtime(self, job: TestJobConfig) -> Optional[int]:
+ def get_expected_runtime(self, job: JobConfig) -> Optional[int]:
return None
def config_node(self, node: IRPCNode) -> None:
@@ -215,10 +214,10 @@
cmd += ' ' + self.suite.params.get('prerun_opts', '')
node.run(cmd, timeout=self.prerun_tout)
- def prepare_iteration(self, node: IRPCNode, job: TestJobConfig) -> None:
+ def prepare_iteration(self, node: IRPCNode, job: JobConfig) -> None:
pass
- def run_iteration(self, node: IRPCNode, job: TestJobConfig) -> List[TimeSeries]:
+ def run_iteration(self, node: IRPCNode, job: JobConfig) -> List[TimeSeries]:
# TODO: have to store logs
cmd = self.join_remote(self.run_script)
cmd += ' ' + self.suite.params.get('run_opts', '')
diff --git a/wally/suits/job.py b/wally/suits/job.py
index ce32e0e..91822cb 100644
--- a/wally/suits/job.py
+++ b/wally/suits/job.py
@@ -1,5 +1,5 @@
import abc
-from typing import Dict, Any, Tuple
+from typing import Dict, Any, Tuple, cast, Union
from collections import OrderedDict
from ..common_types import Storable
@@ -24,6 +24,10 @@
"""Readable long summary for management and deployment engineers"""
pass
+ @abc.abstractmethod
+ def copy(self, **updated) -> 'JobParams':
+ pass
+
def __getitem__(self, name: str) -> Any:
return self.params[name]
@@ -31,10 +35,21 @@
self.params[name] = val
def __hash__(self) -> int:
- return hash(tuple(sorted(self.params.items())))
+ return hash(self.char_tpl)
- def __eq__(self, o: 'JobParams') -> bool:
- return sorted(self.params.items()) == sorted(o.params.items())
+ def __eq__(self, o: object) -> bool:
+ if not isinstance(o, self.__class__):
+ raise TypeError("Can't compare {!r} to {!r}".format(self.__class__.__qualname__, type(o).__qualname__))
+ return sorted(self.params.items()) == sorted(cast(JobParams, o).params.items())
+
+ def __lt__(self, o: object) -> bool:
+ if not isinstance(o, self.__class__):
+ raise TypeError("Can't compare {!r} to {!r}".format(self.__class__.__qualname__, type(o).__qualname__))
+ return self.char_tpl < cast(JobParams, o).char_tpl
+
+ @abc.abstractproperty
+ def char_tpl(self) -> Tuple[Union[str, int, float, bool], ...]:
+ pass
class JobConfig(Storable, metaclass=abc.ABCMeta):
@@ -45,7 +60,7 @@
self.idx = idx
# time interval, in seconds, when test was running on all nodes
- self.reliable_info_time_range = None # type: Tuple[int, int]
+ self.reliable_info_range = None # type: Tuple[int, int]
# all job parameters, both from suite file and config file
self.vals = OrderedDict() # type: Dict[str, Any]
@@ -53,9 +68,13 @@
@property
def storage_id(self) -> str:
"""unique string, used as key in storage"""
- return "{}_{}".format(self.params.summary, self.idx)
+ return "{}_{}".format(self.summary, self.idx)
@abc.abstractproperty
def params(self) -> JobParams:
"""Should return a copy"""
pass
+
+ @property
+ def summary(self) -> str:
+ return self.params.summary
diff --git a/wally/utils.py b/wally/utils.py
index 4551952..5904aa7 100644
--- a/wally/utils.py
+++ b/wally/utils.py
@@ -33,6 +33,9 @@
Number = Union[int, float]
+STORAGE_ROLES = {'ceph-osd'}
+
+
class StopTestError(RuntimeError):
pass
@@ -164,49 +167,83 @@
('T', 1024 ** 4)]
-def b2ssize(size: int) -> str:
- if size < 1024:
- return str(size)
+def b2ssize(value: Union[int, float]) -> str:
+ if isinstance(value, float) and value < 100:
+ return b2ssize_10(value)
+
+ value = int(value)
+ if value < 1024:
+ return str(value) + " "
# make mypy happy
scale = 1
name = ""
for name, scale in RSMAP:
- if size < 1024 * scale:
- if size % scale == 0:
- return "{} {}i".format(size // scale, name)
+ if value < 1024 * scale:
+ if value % scale == 0:
+ return "{} {}i".format(value // scale, name)
else:
- return "{:.1f} {}i".format(float(size) / scale, name)
+ return "{:.1f} {}i".format(float(value) / scale, name)
- return "{}{}i".format(size // scale, name)
+ return "{}{}i".format(value // scale, name)
-RSMAP_10 = [('k', 1000),
- ('m', 1000 ** 2),
- ('g', 1000 ** 3),
- ('t', 1000 ** 4)]
+RSMAP_10 = [(' f', 0.001 ** 4),
+ (' n', 0.001 ** 3),
+ (' u', 0.001 ** 2),
+ (' m', 0.001),
+ (' ', 1),
+ (' K', 1000),
+ (' M', 1000 ** 2),
+ (' G', 1000 ** 3),
+ (' T', 1000 ** 4),
+ (' P', 1000 ** 5),
+ (' E', 1000 ** 6)]
-def b2ssize_10(size: int) -> str:
- if size < 1000:
- return str(size)
+def has_next_digit_after_coma(x: float) -> bool:
+ return x * 10 - int(x * 10) > 1
+
+def has_second_digit_after_coma(x: float) -> bool:
+ return (x * 10 - int(x * 10)) * 10 > 1
+
+
+def b2ssize_10(value: Union[int, float]) -> str:
# make mypy happy
scale = 1
- name = ""
+ name = " "
+
+ if value == 0.0:
+ return "0 "
+
+ if value / RSMAP_10[0][1] < 1.0:
+ return "{:.2e} ".format(value)
for name, scale in RSMAP_10:
- if size < 1000 * scale:
- if size % scale == 0:
- return "{} {}".format(size // scale, name)
- else:
- return "{:.1f} {}".format(float(size) / scale, name)
+ cval = value / scale
+ if cval < 1000:
+ # detect how many digits after dot to show
+ if cval > 100:
+ return "{}{}".format(int(cval), name)
+ if cval > 10:
+ if has_next_digit_after_coma(cval):
+ return "{:.1f}{}".format(cval, name)
+ else:
+ return "{}{}".format(int(cval), name)
+ if cval >= 1:
+ if has_second_digit_after_coma(cval):
+ return "{:.2f}{}".format(cval, name)
+ elif has_next_digit_after_coma(cval):
+ return "{:.1f}{}".format(cval, name)
+ return "{}{}".format(int(cval), name)
+ raise AssertionError("Can't get here")
- return "{}{}".format(size // scale, name)
+ return "{}{}".format(int(value // scale), name)
-def run_locally(cmd: Union[str, List[str]], input_data: str="", timeout:int =20) -> str:
+def run_locally(cmd: Union[str, List[str]], input_data: str = "", timeout: int = 20) -> str:
if isinstance(cmd, str):
shell = True
cmd_str = cmd
@@ -434,7 +471,15 @@
assert u1 == u2, "Can't convert {!r} to {!r}".format(from_unit, to_unit)
- if isinstance(int, f1) and isinstance(int, f2) and f1 % f2 != 0:
+ if isinstance(f1, int) and isinstance(f2, int) and f1 % f2 != 0:
return Fraction(f1, f2)
return f1 // f2
+
+
+def shape2str(shape: Iterable[int]) -> str:
+ return "*".join(map(str, shape))
+
+
+def str2shape(shape: str) -> Tuple[int, ...]:
+ return tuple(map(int, shape.split('*')))