koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 1 | import re |
| 2 | import time |
| 3 | import json |
koder aka kdanilov | f236b9c | 2015-06-24 18:17:22 +0300 | [diff] [blame] | 4 | import stat |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 5 | import random |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 6 | import os.path |
| 7 | import logging |
| 8 | import datetime |
| 9 | import functools |
| 10 | import subprocess |
| 11 | import collections |
| 12 | |
| 13 | import yaml |
| 14 | import paramiko |
| 15 | import texttable |
| 16 | from paramiko.ssh_exception import SSHException |
| 17 | from concurrent.futures import ThreadPoolExecutor |
| 18 | |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 19 | import wally |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 20 | from wally.pretty_yaml import dumps |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 21 | from wally.statistic import round_3_digit, data_property, average |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 22 | from wally.utils import ssize2b, sec_to_str, StopTestError, Barrier, get_os |
| 23 | from wally.ssh_utils import (save_to_remote, read_from_remote, BGSSHTask, reconnect) |
| 24 | |
| 25 | from .fio_task_parser import (execution_time, fio_cfg_compile, |
koder aka kdanilov | f236b9c | 2015-06-24 18:17:22 +0300 | [diff] [blame] | 26 | get_test_summary, get_test_summary_tuple, |
| 27 | get_test_sync_mode, FioJobSection) |
| 28 | |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 29 | from ..itest import (TimeSeriesValue, PerfTest, TestResults, |
| 30 | run_on_node, TestConfig, MeasurementMatrix) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 31 | |
| 32 | logger = logging.getLogger("wally") |
| 33 | |
| 34 | |
| 35 | # Results folder structure |
| 36 | # results/ |
| 37 | # {loadtype}_{num}/ |
| 38 | # config.yaml |
| 39 | # ...... |
| 40 | |
| 41 | |
| 42 | class NoData(object): |
| 43 | pass |
| 44 | |
| 45 | |
| 46 | def cached_prop(func): |
| 47 | @property |
| 48 | @functools.wraps(func) |
| 49 | def closure(self): |
| 50 | val = getattr(self, "_" + func.__name__) |
| 51 | if val is NoData: |
| 52 | val = func(self) |
| 53 | setattr(self, "_" + func.__name__, val) |
| 54 | return val |
| 55 | return closure |
| 56 | |
| 57 | |
| 58 | def load_fio_log_file(fname): |
| 59 | with open(fname) as fd: |
| 60 | it = [ln.split(',')[:2] for ln in fd] |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 61 | |
| 62 | vals = [(float(off) / 1000, # convert us to ms |
| 63 | float(val.strip()) + 0.5) # add 0.5 to compemsate average value |
| 64 | # as fio trimm all values in log to integer |
| 65 | for off, val in it] |
| 66 | |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 67 | return TimeSeriesValue(vals) |
| 68 | |
| 69 | |
koder aka kdanilov | 0fdaaee | 2015-06-30 11:10:48 +0300 | [diff] [blame] | 70 | def load_test_results(folder, run_num): |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 71 | res = {} |
| 72 | params = None |
| 73 | |
| 74 | fn = os.path.join(folder, str(run_num) + '_params.yaml') |
| 75 | params = yaml.load(open(fn).read()) |
| 76 | |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 77 | conn_ids_set = set() |
| 78 | rr = r"{0}_(?P<conn_id>.*?)_(?P<type>[^_.]*)\.\d+\.log$".format(run_num) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 79 | for fname in os.listdir(folder): |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 80 | rm = re.match(rr, fname) |
| 81 | if rm is None: |
| 82 | continue |
| 83 | |
| 84 | conn_id_s = rm.group('conn_id') |
| 85 | conn_id = conn_id_s.replace('_', ':') |
| 86 | ftype = rm.group('type') |
| 87 | |
| 88 | if ftype not in ('iops', 'bw', 'lat'): |
| 89 | continue |
| 90 | |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 91 | ts = load_fio_log_file(os.path.join(folder, fname)) |
| 92 | res.setdefault(ftype, {}).setdefault(conn_id, []).append(ts) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 93 | |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 94 | conn_ids_set.add(conn_id) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 95 | |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 96 | mm_res = {} |
| 97 | |
| 98 | for key, data in res.items(): |
| 99 | conn_ids = sorted(conn_ids_set) |
| 100 | matr = [data[conn_id] for conn_id in conn_ids] |
| 101 | |
| 102 | mm_res[key] = MeasurementMatrix(matr, conn_ids) |
| 103 | |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 104 | raw_res = {} |
| 105 | for conn_id in conn_ids: |
| 106 | fn = os.path.join(folder, "{0}_{1}_rawres.json".format(run_num, conn_id_s)) |
| 107 | |
| 108 | # remove message hack |
| 109 | fc = "{" + open(fn).read().split('{', 1)[1] |
| 110 | raw_res[conn_id] = json.loads(fc) |
| 111 | |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 112 | fio_task = FioJobSection(params['name']) |
| 113 | fio_task.vals.update(params['vals']) |
| 114 | |
| 115 | config = TestConfig('io', params, None, params['nodes'], folder, None) |
koder aka kdanilov | 0fdaaee | 2015-06-30 11:10:48 +0300 | [diff] [blame] | 116 | return FioRunResult(config, fio_task, mm_res, raw_res, params['intervals'], run_num) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 117 | |
| 118 | |
| 119 | class Attrmapper(object): |
| 120 | def __init__(self, dct): |
| 121 | self.__dct = dct |
| 122 | |
| 123 | def __getattr__(self, name): |
| 124 | try: |
| 125 | return self.__dct[name] |
| 126 | except KeyError: |
| 127 | raise AttributeError(name) |
| 128 | |
| 129 | |
| 130 | class DiskPerfInfo(object): |
| 131 | def __init__(self, name, summary, params, testnodes_count): |
| 132 | self.name = name |
| 133 | self.bw = None |
| 134 | self.iops = None |
| 135 | self.lat = None |
| 136 | self.lat_50 = None |
| 137 | self.lat_95 = None |
koder aka kdanilov | 170936a | 2015-06-27 22:51:17 +0300 | [diff] [blame] | 138 | self.lat_avg = None |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 139 | |
| 140 | self.raw_bw = [] |
| 141 | self.raw_iops = [] |
| 142 | self.raw_lat = [] |
| 143 | |
| 144 | self.params = params |
| 145 | self.testnodes_count = testnodes_count |
| 146 | self.summary = summary |
| 147 | self.p = Attrmapper(self.params['vals']) |
| 148 | |
| 149 | self.sync_mode = get_test_sync_mode(self.params['vals']) |
| 150 | self.concurence = self.params['vals'].get('numjobs', 1) |
| 151 | |
| 152 | |
| 153 | def get_lat_perc_50_95(lat_mks): |
| 154 | curr_perc = 0 |
| 155 | perc_50 = None |
| 156 | perc_95 = None |
| 157 | pkey = None |
| 158 | for key, val in sorted(lat_mks.items()): |
| 159 | if curr_perc + val >= 50 and perc_50 is None: |
| 160 | if pkey is None or val < 1.: |
| 161 | perc_50 = key |
| 162 | else: |
| 163 | perc_50 = (50. - curr_perc) / val * (key - pkey) + pkey |
| 164 | |
| 165 | if curr_perc + val >= 95: |
| 166 | if pkey is None or val < 1.: |
| 167 | perc_95 = key |
| 168 | else: |
| 169 | perc_95 = (95. - curr_perc) / val * (key - pkey) + pkey |
| 170 | break |
| 171 | |
| 172 | pkey = key |
| 173 | curr_perc += val |
| 174 | |
koder aka kdanilov | f236b9c | 2015-06-24 18:17:22 +0300 | [diff] [blame] | 175 | # for k, v in sorted(lat_mks.items()): |
| 176 | # if k / 1000 > 0: |
| 177 | # print "{0:>4}".format(k / 1000), v |
| 178 | |
| 179 | # print perc_50 / 1000., perc_95 / 1000. |
| 180 | # exit(1) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 181 | return perc_50 / 1000., perc_95 / 1000. |
| 182 | |
| 183 | |
koder aka kdanilov | 0fdaaee | 2015-06-30 11:10:48 +0300 | [diff] [blame] | 184 | class IOTestResults(object): |
| 185 | def __init__(self, suite_name, fio_results, log_directory): |
| 186 | self.suite_name = suite_name |
| 187 | self.fio_results = fio_results |
| 188 | self.log_directory = log_directory |
| 189 | |
| 190 | def __iter__(self): |
| 191 | return iter(self.fio_results) |
| 192 | |
| 193 | def __len__(self): |
| 194 | return len(self.fio_results) |
| 195 | |
| 196 | def get_yamable(self): |
| 197 | items = [(fio_res.summary(), fio_res.idx) for fio_res in self] |
| 198 | return {self.suite_name: [self.log_directory] + items} |
| 199 | |
| 200 | |
| 201 | class FioRunResult(TestResults): |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 202 | """ |
| 203 | Fio run results |
| 204 | config: TestConfig |
| 205 | fio_task: FioJobSection |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 206 | ts_results: {str: MeasurementMatrix[TimeSeriesValue]} |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 207 | raw_result: ???? |
| 208 | run_interval:(float, float) - test tun time, used for sensors |
| 209 | """ |
koder aka kdanilov | 0fdaaee | 2015-06-30 11:10:48 +0300 | [diff] [blame] | 210 | def __init__(self, config, fio_task, ts_results, raw_result, run_interval, idx): |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 211 | |
koder aka kdanilov | 170936a | 2015-06-27 22:51:17 +0300 | [diff] [blame] | 212 | self.name = fio_task.name.rsplit("_", 1)[0] |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 213 | self.fio_task = fio_task |
koder aka kdanilov | 0fdaaee | 2015-06-30 11:10:48 +0300 | [diff] [blame] | 214 | self.idx = idx |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 215 | |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 216 | self.bw = ts_results.get('bw') |
| 217 | self.lat = ts_results.get('lat') |
| 218 | self.iops = ts_results.get('iops') |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 219 | |
| 220 | res = {"bw": self.bw, "lat": self.lat, "iops": self.iops} |
| 221 | |
| 222 | self.sensors_data = None |
| 223 | self._pinfo = None |
| 224 | TestResults.__init__(self, config, res, raw_result, run_interval) |
| 225 | |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 226 | def get_params_from_fio_report(self): |
| 227 | nodes = self.bw.connections_ids |
| 228 | |
| 229 | iops = [self.raw_result[node]['jobs'][0]['mixed']['iops'] for node in nodes] |
| 230 | total_ios = [self.raw_result[node]['jobs'][0]['mixed']['total_ios'] for node in nodes] |
| 231 | runtime = [self.raw_result[node]['jobs'][0]['mixed']['runtime'] / 1000 for node in nodes] |
| 232 | flt_iops = [float(ios) / rtime for ios, rtime in zip(total_ios, runtime)] |
| 233 | |
| 234 | bw = [self.raw_result[node]['jobs'][0]['mixed']['bw'] for node in nodes] |
| 235 | total_bytes = [self.raw_result[node]['jobs'][0]['mixed']['io_bytes'] for node in nodes] |
| 236 | flt_bw = [float(tbytes) / rtime for tbytes, rtime in zip(total_bytes, runtime)] |
| 237 | |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 238 | return {'iops': iops, |
| 239 | 'flt_iops': flt_iops, |
| 240 | 'bw': bw, |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 241 | 'flt_bw': flt_bw} |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 242 | |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 243 | def summary(self): |
koder aka kdanilov | f236b9c | 2015-06-24 18:17:22 +0300 | [diff] [blame] | 244 | return get_test_summary(self.fio_task, len(self.config.nodes)) |
| 245 | |
| 246 | def summary_tpl(self): |
| 247 | return get_test_summary_tuple(self.fio_task, len(self.config.nodes)) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 248 | |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 249 | def get_lat_perc_50_95_multy(self): |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 250 | lat_mks = collections.defaultdict(lambda: 0) |
| 251 | num_res = 0 |
| 252 | |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 253 | for result in self.raw_result.values(): |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 254 | num_res += len(result['jobs']) |
| 255 | for job_info in result['jobs']: |
| 256 | for k, v in job_info['latency_ms'].items(): |
| 257 | if isinstance(k, basestring) and k.startswith('>='): |
| 258 | lat_mks[int(k[2:]) * 1000] += v |
| 259 | else: |
| 260 | lat_mks[int(k) * 1000] += v |
| 261 | |
| 262 | for k, v in job_info['latency_us'].items(): |
| 263 | lat_mks[int(k)] += v |
| 264 | |
| 265 | for k, v in lat_mks.items(): |
| 266 | lat_mks[k] = float(v) / num_res |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 267 | return get_lat_perc_50_95(lat_mks) |
| 268 | |
| 269 | def disk_perf_info(self, avg_interval=2.0): |
| 270 | |
| 271 | if self._pinfo is not None: |
| 272 | return self._pinfo |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 273 | |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 274 | testnodes_count = len(self.config.nodes) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 275 | |
| 276 | pinfo = DiskPerfInfo(self.name, |
| 277 | self.summary(), |
| 278 | self.params, |
| 279 | testnodes_count) |
| 280 | |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 281 | def prepare(data, drop=1): |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 282 | if data is None: |
| 283 | return data |
| 284 | |
| 285 | res = [] |
| 286 | for ts_data in data: |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 287 | if ts_data.average_interval() < avg_interval: |
| 288 | ts_data = ts_data.derived(avg_interval) |
| 289 | |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 290 | # drop last value on bounds |
| 291 | # as they may contains ranges without activities |
koder aka kdanilov | 0fdaaee | 2015-06-30 11:10:48 +0300 | [diff] [blame] | 292 | assert len(ts_data.values) >= drop + 1, str(drop) + " " + str(ts_data.values) |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 293 | |
| 294 | if drop > 0: |
| 295 | res.append(ts_data.values[:-drop]) |
| 296 | else: |
| 297 | res.append(ts_data.values) |
| 298 | |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 299 | return res |
| 300 | |
| 301 | def agg_data(matr): |
| 302 | arr = sum(matr, []) |
| 303 | min_len = min(map(len, arr)) |
| 304 | res = [] |
| 305 | for idx in range(min_len): |
| 306 | res.append(sum(dt[idx] for dt in arr)) |
| 307 | return res |
| 308 | |
koder aka kdanilov | 170936a | 2015-06-27 22:51:17 +0300 | [diff] [blame] | 309 | pinfo.raw_lat = map(prepare, self.lat.per_vm()) |
| 310 | num_th = sum(map(len, pinfo.raw_lat)) |
| 311 | lat_avg = [val / num_th for val in agg_data(pinfo.raw_lat)] |
| 312 | pinfo.lat_avg = data_property(lat_avg).average / 1000 # us to ms |
| 313 | |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 314 | pinfo.lat_50, pinfo.lat_95 = self.get_lat_perc_50_95_multy() |
| 315 | pinfo.lat = pinfo.lat_50 |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 316 | |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 317 | pinfo.raw_bw = map(prepare, self.bw.per_vm()) |
| 318 | pinfo.raw_iops = map(prepare, self.iops.per_vm()) |
| 319 | |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 320 | fparams = self.get_params_from_fio_report() |
| 321 | fio_report_bw = sum(fparams['flt_bw']) |
| 322 | fio_report_iops = sum(fparams['flt_iops']) |
| 323 | |
| 324 | agg_bw = agg_data(pinfo.raw_bw) |
| 325 | agg_iops = agg_data(pinfo.raw_iops) |
| 326 | |
| 327 | log_bw_avg = average(agg_bw) |
| 328 | log_iops_avg = average(agg_iops) |
| 329 | |
| 330 | # update values to match average from fio report |
| 331 | coef_iops = fio_report_iops / float(log_iops_avg) |
| 332 | coef_bw = fio_report_bw / float(log_bw_avg) |
| 333 | |
| 334 | bw_log = data_property([val * coef_bw for val in agg_bw]) |
| 335 | iops_log = data_property([val * coef_iops for val in agg_iops]) |
| 336 | |
| 337 | bw_report = data_property([fio_report_bw]) |
| 338 | iops_report = data_property([fio_report_iops]) |
| 339 | |
| 340 | # When IOPS/BW per thread is too low |
| 341 | # data from logs is rounded to match |
koder aka kdanilov | f236b9c | 2015-06-24 18:17:22 +0300 | [diff] [blame] | 342 | iops_per_th = sum(sum(pinfo.raw_iops, []), []) |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 343 | if average(iops_per_th) > 10: |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 344 | pinfo.iops = iops_log |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 345 | pinfo.iops2 = iops_report |
| 346 | else: |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 347 | pinfo.iops = iops_report |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 348 | pinfo.iops2 = iops_log |
| 349 | |
koder aka kdanilov | f236b9c | 2015-06-24 18:17:22 +0300 | [diff] [blame] | 350 | bw_per_th = sum(sum(pinfo.raw_bw, []), []) |
| 351 | if average(bw_per_th) > 10: |
| 352 | pinfo.bw = bw_log |
koder aka kdanilov | f236b9c | 2015-06-24 18:17:22 +0300 | [diff] [blame] | 353 | pinfo.bw2 = bw_report |
koder aka kdanilov | 170936a | 2015-06-27 22:51:17 +0300 | [diff] [blame] | 354 | else: |
| 355 | pinfo.bw = bw_report |
| 356 | pinfo.bw2 = bw_log |
koder aka kdanilov | f236b9c | 2015-06-24 18:17:22 +0300 | [diff] [blame] | 357 | |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 358 | self._pinfo = pinfo |
| 359 | |
| 360 | return pinfo |
| 361 | |
| 362 | |
| 363 | class IOPerfTest(PerfTest): |
| 364 | tcp_conn_timeout = 30 |
| 365 | max_pig_timeout = 5 |
| 366 | soft_runcycle = 5 * 60 |
| 367 | |
| 368 | def __init__(self, config): |
| 369 | PerfTest.__init__(self, config) |
| 370 | |
| 371 | get = self.config.params.get |
| 372 | do_get = self.config.params.__getitem__ |
| 373 | |
| 374 | self.config_fname = do_get('cfg') |
| 375 | |
| 376 | if '/' not in self.config_fname and '.' not in self.config_fname: |
| 377 | cfgs_dir = os.path.dirname(__file__) |
| 378 | self.config_fname = os.path.join(cfgs_dir, |
| 379 | self.config_fname + '.cfg') |
| 380 | |
| 381 | self.alive_check_interval = get('alive_check_interval') |
| 382 | self.use_system_fio = get('use_system_fio', False) |
| 383 | |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 384 | if get('prefill_files') is not None: |
| 385 | logger.warning("prefill_files option is depricated. Use force_prefill instead") |
| 386 | |
| 387 | self.force_prefill = get('force_prefill', False) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 388 | self.config_params = get('params', {}).copy() |
| 389 | |
| 390 | self.io_py_remote = self.join_remote("agent.py") |
| 391 | self.results_file = self.join_remote("results.json") |
| 392 | self.pid_file = self.join_remote("pid") |
| 393 | self.task_file = self.join_remote("task.cfg") |
| 394 | self.sh_file = self.join_remote("cmd.sh") |
| 395 | self.err_out_file = self.join_remote("fio_err_out") |
| 396 | self.exit_code_file = self.join_remote("exit_code") |
| 397 | |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 398 | self.max_latency = get("max_lat", None) |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 399 | self.min_bw_per_thread = get("min_bw", None) |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 400 | |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 401 | self.use_sudo = get("use_sudo", True) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 402 | |
| 403 | self.raw_cfg = open(self.config_fname).read() |
| 404 | self.fio_configs = fio_cfg_compile(self.raw_cfg, |
| 405 | self.config_fname, |
koder aka kdanilov | f236b9c | 2015-06-24 18:17:22 +0300 | [diff] [blame] | 406 | self.config_params) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 407 | self.fio_configs = list(self.fio_configs) |
| 408 | |
| 409 | @classmethod |
koder aka kdanilov | 0fdaaee | 2015-06-30 11:10:48 +0300 | [diff] [blame] | 410 | def load(cls, suite_name, folder): |
| 411 | res = [] |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 412 | for fname in os.listdir(folder): |
| 413 | if re.match("\d+_params.yaml$", fname): |
| 414 | num = int(fname.split('_')[0]) |
koder aka kdanilov | 0fdaaee | 2015-06-30 11:10:48 +0300 | [diff] [blame] | 415 | res.append(load_test_results(folder, num)) |
| 416 | return IOTestResults(suite_name, res, folder) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 417 | |
| 418 | def cleanup(self): |
| 419 | # delete_file(conn, self.io_py_remote) |
| 420 | # Need to remove tempo files, used for testing |
| 421 | pass |
| 422 | |
koder aka kdanilov | f236b9c | 2015-06-24 18:17:22 +0300 | [diff] [blame] | 423 | # size is megabytes |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 424 | def check_prefill_required(self, rossh, fname, size, num_blocks=16): |
koder aka kdanilov | 170936a | 2015-06-27 22:51:17 +0300 | [diff] [blame] | 425 | try: |
| 426 | with rossh.connection.open_sftp() as sftp: |
| 427 | fstats = sftp.stat(fname) |
koder aka kdanilov | f236b9c | 2015-06-24 18:17:22 +0300 | [diff] [blame] | 428 | |
koder aka kdanilov | 170936a | 2015-06-27 22:51:17 +0300 | [diff] [blame] | 429 | if stat.S_ISREG(fstats.st_mode) and fstats.st_size < size * 1024 ** 2: |
| 430 | return True |
| 431 | except EnvironmentError: |
koder aka kdanilov | f95cfc1 | 2015-06-23 03:33:19 +0300 | [diff] [blame] | 432 | return True |
| 433 | |
koder aka kdanilov | f236b9c | 2015-06-24 18:17:22 +0300 | [diff] [blame] | 434 | cmd = 'python -c "' + \ |
| 435 | "import sys;" + \ |
| 436 | "fd = open('{0}', 'rb');" + \ |
| 437 | "fd.seek({1});" + \ |
| 438 | "data = fd.read(1024); " + \ |
| 439 | "sys.stdout.write(data + ' ' * ( 1024 - len(data)))\" | md5sum" |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 440 | |
| 441 | if self.use_sudo: |
| 442 | cmd = "sudo " + cmd |
| 443 | |
koder aka kdanilov | f95cfc1 | 2015-06-23 03:33:19 +0300 | [diff] [blame] | 444 | zero_md5 = '0f343b0931126a20f133d67c2b018a3b' |
koder aka kdanilov | 8fbb27f | 2015-07-17 22:23:31 +0300 | [diff] [blame^] | 445 | bsize = size * (1024 ** 2) |
| 446 | offsets = [random.randrange(bsize - 1024) for _ in range(num_blocks)] |
| 447 | offsets.append(bsize - 1024) |
| 448 | offsets.append(0) |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 449 | |
koder aka kdanilov | f95cfc1 | 2015-06-23 03:33:19 +0300 | [diff] [blame] | 450 | for offset in offsets: |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 451 | data = rossh(cmd.format(fname, offset), nolog=True) |
koder aka kdanilov | f236b9c | 2015-06-24 18:17:22 +0300 | [diff] [blame] | 452 | |
| 453 | md = "" |
| 454 | for line in data.split("\n"): |
| 455 | if "unable to resolve" not in line: |
| 456 | md = line.split()[0].strip() |
| 457 | break |
koder aka kdanilov | f95cfc1 | 2015-06-23 03:33:19 +0300 | [diff] [blame] | 458 | |
| 459 | if len(md) != 32: |
| 460 | logger.error("File data check is failed - " + data) |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 461 | return True |
koder aka kdanilov | f95cfc1 | 2015-06-23 03:33:19 +0300 | [diff] [blame] | 462 | |
| 463 | if zero_md5 == md: |
| 464 | return True |
| 465 | |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 466 | return False |
| 467 | |
| 468 | def prefill_test_files(self, rossh, files, force=False): |
| 469 | if self.use_system_fio: |
| 470 | cmd_templ = "fio " |
| 471 | else: |
| 472 | cmd_templ = "{0}/fio ".format(self.config.remote_dir) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 473 | |
| 474 | if self.use_sudo: |
| 475 | cmd_templ = "sudo " + cmd_templ |
| 476 | |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 477 | cmd_templ += "--name=xxx --filename={0} --direct=1" + \ |
| 478 | " --bs=4m --size={1}m --rw=write" |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 479 | |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 480 | ssize = 0 |
| 481 | |
| 482 | if force: |
| 483 | logger.info("File prefilling is forced") |
| 484 | |
| 485 | ddtime = 0 |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 486 | for fname, curr_sz in files.items(): |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 487 | if not force: |
| 488 | if not self.check_prefill_required(rossh, fname, curr_sz): |
koder aka kdanilov | f236b9c | 2015-06-24 18:17:22 +0300 | [diff] [blame] | 489 | logger.debug("prefill is skipped") |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 490 | continue |
| 491 | |
| 492 | logger.info("Prefilling file {0}".format(fname)) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 493 | cmd = cmd_templ.format(fname, curr_sz) |
| 494 | ssize += curr_sz |
| 495 | |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 496 | stime = time.time() |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 497 | rossh(cmd, timeout=curr_sz) |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 498 | ddtime += time.time() - stime |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 499 | |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 500 | if ddtime > 1.0: |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 501 | fill_bw = int(ssize / ddtime) |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 502 | mess = "Initiall fio fill bw is {0} MiBps for this vm" |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 503 | logger.info(mess.format(fill_bw)) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 504 | |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 505 | def install_utils(self, node, rossh, max_retry=3, timeout=5): |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 506 | need_install = [] |
| 507 | packs = [('screen', 'screen')] |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 508 | os_info = get_os(rossh) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 509 | |
| 510 | if self.use_system_fio: |
| 511 | packs.append(('fio', 'fio')) |
| 512 | else: |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 513 | packs.append(('bzip2', 'bzip2')) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 514 | |
| 515 | for bin_name, package in packs: |
| 516 | if bin_name is None: |
| 517 | need_install.append(package) |
| 518 | continue |
| 519 | |
| 520 | try: |
| 521 | rossh('which ' + bin_name, nolog=True) |
| 522 | except OSError: |
| 523 | need_install.append(package) |
| 524 | |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 525 | if len(need_install) != 0: |
| 526 | if 'redhat' == os_info.distro: |
| 527 | cmd = "sudo yum -y install " + " ".join(need_install) |
| 528 | else: |
| 529 | cmd = "sudo apt-get -y install " + " ".join(need_install) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 530 | |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 531 | for _ in range(max_retry): |
| 532 | try: |
| 533 | rossh(cmd) |
| 534 | break |
| 535 | except OSError as err: |
| 536 | time.sleep(timeout) |
| 537 | else: |
| 538 | raise OSError("Can't install - " + str(err)) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 539 | |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 540 | if not self.use_system_fio: |
| 541 | fio_dir = os.path.dirname(os.path.dirname(wally.__file__)) |
| 542 | fio_dir = os.path.join(os.getcwd(), fio_dir) |
| 543 | fio_dir = os.path.join(fio_dir, 'fio_binaries') |
| 544 | fname = 'fio_{0.release}_{0.arch}.bz2'.format(os_info) |
| 545 | fio_path = os.path.join(fio_dir, fname) |
| 546 | |
| 547 | if not os.path.exists(fio_path): |
| 548 | raise RuntimeError("No prebuild fio available for {0}".format(os_info)) |
| 549 | |
| 550 | bz_dest = self.join_remote('fio.bz2') |
| 551 | with node.connection.open_sftp() as sftp: |
| 552 | sftp.put(fio_path, bz_dest) |
| 553 | |
| 554 | rossh("bzip2 --decompress " + bz_dest, nolog=True) |
| 555 | rossh("chmod a+x " + self.join_remote("fio"), nolog=True) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 556 | |
| 557 | def pre_run(self): |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 558 | files = {} |
| 559 | for section in self.fio_configs: |
| 560 | sz = ssize2b(section.vals['size']) |
| 561 | msz = sz / (1024 ** 2) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 562 | |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 563 | if sz % (1024 ** 2) != 0: |
| 564 | msz += 1 |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 565 | |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 566 | fname = section.vals['filename'] |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 567 | |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 568 | # if already has other test with the same file name |
| 569 | # take largest size |
| 570 | files[fname] = max(files.get(fname, 0), msz) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 571 | |
| 572 | with ThreadPoolExecutor(len(self.config.nodes)) as pool: |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 573 | fc = functools.partial(self.pre_run_th, |
| 574 | files=files, |
| 575 | force=self.force_prefill) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 576 | list(pool.map(fc, self.config.nodes)) |
| 577 | |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 578 | def pre_run_th(self, node, files, force): |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 579 | try: |
koder aka kdanilov | 8fbb27f | 2015-07-17 22:23:31 +0300 | [diff] [blame^] | 580 | # fill files with pseudo-random data |
| 581 | rossh = run_on_node(node) |
| 582 | rossh.connection = node.connection |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 583 | |
koder aka kdanilov | 8fbb27f | 2015-07-17 22:23:31 +0300 | [diff] [blame^] | 584 | try: |
| 585 | cmd = 'mkdir -p "{0}"'.format(self.config.remote_dir) |
| 586 | if self.use_sudo: |
| 587 | cmd = "sudo " + cmd |
| 588 | cmd += " ; sudo chown {0} {1}".format(node.get_user(), |
| 589 | self.config.remote_dir) |
| 590 | rossh(cmd, nolog=True) |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 591 | |
koder aka kdanilov | 8fbb27f | 2015-07-17 22:23:31 +0300 | [diff] [blame^] | 592 | assert self.config.remote_dir != "" and self.config.remote_dir != "/" |
| 593 | rossh("rm -rf {0}/*".format(self.config.remote_dir), nolog=True) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 594 | |
koder aka kdanilov | 8fbb27f | 2015-07-17 22:23:31 +0300 | [diff] [blame^] | 595 | except Exception as exc: |
| 596 | msg = "Failed to create folder {0} on remote {1}. Error: {2!s}" |
| 597 | msg = msg.format(self.config.remote_dir, node.get_conn_id(), exc) |
| 598 | logger.exception(msg) |
| 599 | raise StopTestError(msg, exc) |
| 600 | |
| 601 | self.install_utils(node, rossh) |
| 602 | self.prefill_test_files(rossh, files, force) |
| 603 | except: |
| 604 | logger.exception("XXXX") |
| 605 | raise |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 606 | |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 607 | def show_test_execution_time(self): |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 608 | if len(self.fio_configs) > 1: |
| 609 | # +10% - is a rough estimation for additional operations |
| 610 | # like sftp, etc |
| 611 | exec_time = int(sum(map(execution_time, self.fio_configs)) * 1.1) |
| 612 | exec_time_s = sec_to_str(exec_time) |
| 613 | now_dt = datetime.datetime.now() |
| 614 | end_dt = now_dt + datetime.timedelta(0, exec_time) |
| 615 | msg = "Entire test should takes aroud: {0} and finished at {1}" |
| 616 | logger.info(msg.format(exec_time_s, |
| 617 | end_dt.strftime("%H:%M:%S"))) |
| 618 | |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 619 | def run(self): |
| 620 | logger.debug("Run preparation") |
| 621 | self.pre_run() |
| 622 | self.show_test_execution_time() |
| 623 | |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 624 | tname = os.path.basename(self.config_fname) |
| 625 | if tname.endswith('.cfg'): |
| 626 | tname = tname[:-4] |
| 627 | |
| 628 | barrier = Barrier(len(self.config.nodes)) |
| 629 | results = [] |
| 630 | |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 631 | # set of Operation_Mode_BlockSize str's |
| 632 | # which should not be tested anymore, as |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 633 | # they already too slow with previous thread count |
| 634 | lat_bw_limit_reached = set() |
| 635 | |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 636 | with ThreadPoolExecutor(len(self.config.nodes)) as pool: |
| 637 | for pos, fio_cfg in enumerate(self.fio_configs): |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 638 | test_descr = get_test_summary(fio_cfg.vals).split("th")[0] |
| 639 | if test_descr in lat_bw_limit_reached: |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 640 | continue |
| 641 | else: |
| 642 | logger.info("Will run {0} test".format(fio_cfg.name)) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 643 | |
| 644 | templ = "Test should takes about {0}." + \ |
| 645 | " Should finish at {1}," + \ |
| 646 | " will wait at most till {2}" |
| 647 | exec_time = execution_time(fio_cfg) |
| 648 | exec_time_str = sec_to_str(exec_time) |
| 649 | timeout = int(exec_time + max(300, exec_time)) |
| 650 | |
| 651 | now_dt = datetime.datetime.now() |
| 652 | end_dt = now_dt + datetime.timedelta(0, exec_time) |
| 653 | wait_till = now_dt + datetime.timedelta(0, timeout) |
| 654 | |
| 655 | logger.info(templ.format(exec_time_str, |
| 656 | end_dt.strftime("%H:%M:%S"), |
| 657 | wait_till.strftime("%H:%M:%S"))) |
| 658 | |
| 659 | func = functools.partial(self.do_run, |
| 660 | barrier=barrier, |
| 661 | fio_cfg=fio_cfg, |
| 662 | pos=pos) |
| 663 | |
| 664 | max_retr = 3 |
| 665 | for idx in range(max_retr): |
| 666 | try: |
| 667 | intervals = list(pool.map(func, self.config.nodes)) |
| 668 | break |
| 669 | except (EnvironmentError, SSHException) as exc: |
| 670 | logger.exception("During fio run") |
| 671 | if idx == max_retr - 1: |
| 672 | raise StopTestError("Fio failed", exc) |
| 673 | |
| 674 | logger.info("Sleeping 30s and retrying") |
| 675 | time.sleep(30) |
| 676 | |
| 677 | fname = "{0}_task.fio".format(pos) |
| 678 | with open(os.path.join(self.config.log_directory, fname), "w") as fd: |
| 679 | fd.write(str(fio_cfg)) |
| 680 | |
| 681 | params = {'vm_count': len(self.config.nodes)} |
| 682 | params['name'] = fio_cfg.name |
| 683 | params['vals'] = dict(fio_cfg.vals.items()) |
| 684 | params['intervals'] = intervals |
| 685 | params['nodes'] = [node.get_conn_id() for node in self.config.nodes] |
| 686 | |
| 687 | fname = "{0}_params.yaml".format(pos) |
| 688 | with open(os.path.join(self.config.log_directory, fname), "w") as fd: |
| 689 | fd.write(dumps(params)) |
| 690 | |
koder aka kdanilov | 0fdaaee | 2015-06-30 11:10:48 +0300 | [diff] [blame] | 691 | res = load_test_results(self.config.log_directory, pos) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 692 | results.append(res) |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 693 | |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 694 | if self.max_latency is not None: |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 695 | lat_50, _ = res.get_lat_perc_50_95_multy() |
| 696 | |
| 697 | # conver us to ms |
| 698 | if self.max_latency < lat_50: |
| 699 | logger.info(("Will skip all subsequent tests of {0} " + |
| 700 | "due to lat/bw limits").format(fio_cfg.name)) |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 701 | lat_bw_limit_reached.add(test_descr) |
| 702 | |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 703 | test_res = res.get_params_from_fio_report() |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 704 | if self.min_bw_per_thread is not None: |
| 705 | if self.min_bw_per_thread > average(test_res['bw']): |
| 706 | lat_bw_limit_reached.add(test_descr) |
| 707 | |
koder aka kdanilov | 0fdaaee | 2015-06-30 11:10:48 +0300 | [diff] [blame] | 708 | return IOTestResults(self.config.params['cfg'], |
| 709 | results, self.config.log_directory) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 710 | |
| 711 | def do_run(self, node, barrier, fio_cfg, pos, nolog=False): |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 712 | if self.use_sudo: |
| 713 | sudo = "sudo " |
| 714 | else: |
| 715 | sudo = "" |
| 716 | |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 717 | bash_file = "#!/bin/bash\n" + \ |
| 718 | "cd {exec_folder}\n" + \ |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 719 | "{fio_path}fio --output-format=json --output={out_file} " + \ |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 720 | "--alloc-size=262144 {job_file} " + \ |
| 721 | " >{err_out_file} 2>&1 \n" + \ |
| 722 | "echo $? >{res_code_file}\n" |
| 723 | |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 724 | exec_folder = self.config.remote_dir |
| 725 | |
| 726 | if self.use_system_fio: |
| 727 | fio_path = "" |
| 728 | else: |
| 729 | if not exec_folder.endswith("/"): |
| 730 | fio_path = exec_folder + "/" |
| 731 | else: |
| 732 | fio_path = exec_folder |
| 733 | |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 734 | bash_file = bash_file.format(out_file=self.results_file, |
| 735 | job_file=self.task_file, |
| 736 | err_out_file=self.err_out_file, |
| 737 | res_code_file=self.exit_code_file, |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 738 | exec_folder=exec_folder, |
| 739 | fio_path=fio_path) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 740 | |
| 741 | with node.connection.open_sftp() as sftp: |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 742 | save_to_remote(sftp, self.task_file, str(fio_cfg)) |
| 743 | save_to_remote(sftp, self.sh_file, bash_file) |
| 744 | |
| 745 | exec_time = execution_time(fio_cfg) |
| 746 | |
| 747 | timeout = int(exec_time + max(300, exec_time)) |
| 748 | soft_tout = exec_time |
| 749 | |
| 750 | begin = time.time() |
| 751 | |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 752 | fnames_before = run_on_node(node)("ls -1 " + exec_folder, nolog=True) |
| 753 | |
| 754 | barrier.wait() |
| 755 | |
koder aka kdanilov | 5414a99 | 2015-06-13 03:07:25 +0300 | [diff] [blame] | 756 | task = BGSSHTask(node, self.use_sudo) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 757 | task.start(sudo + "bash " + self.sh_file) |
| 758 | |
| 759 | while True: |
| 760 | try: |
| 761 | task.wait(soft_tout, timeout) |
| 762 | break |
| 763 | except paramiko.SSHException: |
| 764 | pass |
| 765 | |
| 766 | try: |
| 767 | node.connection.close() |
| 768 | except: |
| 769 | pass |
| 770 | |
| 771 | reconnect(node.connection, node.conn_url) |
| 772 | |
| 773 | end = time.time() |
| 774 | rossh = run_on_node(node) |
| 775 | fnames_after = rossh("ls -1 " + exec_folder, nolog=True) |
| 776 | |
| 777 | conn_id = node.get_conn_id().replace(":", "_") |
| 778 | if not nolog: |
| 779 | logger.debug("Test on node {0} is finished".format(conn_id)) |
| 780 | |
| 781 | log_files_pref = [] |
| 782 | if 'write_lat_log' in fio_cfg.vals: |
| 783 | fname = fio_cfg.vals['write_lat_log'] |
| 784 | log_files_pref.append(fname + '_clat') |
| 785 | log_files_pref.append(fname + '_lat') |
| 786 | log_files_pref.append(fname + '_slat') |
| 787 | |
| 788 | if 'write_iops_log' in fio_cfg.vals: |
| 789 | fname = fio_cfg.vals['write_iops_log'] |
| 790 | log_files_pref.append(fname + '_iops') |
| 791 | |
| 792 | if 'write_bw_log' in fio_cfg.vals: |
| 793 | fname = fio_cfg.vals['write_bw_log'] |
| 794 | log_files_pref.append(fname + '_bw') |
| 795 | |
| 796 | files = collections.defaultdict(lambda: []) |
| 797 | all_files = [os.path.basename(self.results_file)] |
| 798 | new_files = set(fnames_after.split()) - set(fnames_before.split()) |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 799 | |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 800 | for fname in new_files: |
| 801 | if fname.endswith('.log') and fname.split('.')[0] in log_files_pref: |
| 802 | name, _ = os.path.splitext(fname) |
| 803 | if fname.count('.') == 1: |
| 804 | tp = name.split("_")[-1] |
| 805 | cnt = 0 |
| 806 | else: |
| 807 | tp_cnt = name.split("_")[-1] |
| 808 | tp, cnt = tp_cnt.split('.') |
| 809 | files[tp].append((int(cnt), fname)) |
| 810 | all_files.append(fname) |
| 811 | |
| 812 | arch_name = self.join_remote('wally_result.tar.gz') |
| 813 | tmp_dir = os.path.join(self.config.log_directory, 'tmp_' + conn_id) |
| 814 | os.mkdir(tmp_dir) |
| 815 | loc_arch_name = os.path.join(tmp_dir, 'wally_result.{0}.tar.gz'.format(conn_id)) |
| 816 | file_full_names = " ".join(all_files) |
| 817 | |
| 818 | try: |
| 819 | os.unlink(loc_arch_name) |
| 820 | except: |
| 821 | pass |
| 822 | |
| 823 | with node.connection.open_sftp() as sftp: |
| 824 | exit_code = read_from_remote(sftp, self.exit_code_file) |
| 825 | err_out = read_from_remote(sftp, self.err_out_file) |
| 826 | exit_code = exit_code.strip() |
| 827 | |
| 828 | if exit_code != '0': |
| 829 | msg = "fio exit with code {0}: {1}".format(exit_code, err_out) |
| 830 | logger.critical(msg.strip()) |
| 831 | raise StopTestError("fio failed") |
| 832 | |
| 833 | rossh("rm -f {0}".format(arch_name), nolog=True) |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 834 | pack_files_cmd = "cd {0} ; tar zcvf {1} {2}".format(exec_folder, arch_name, file_full_names) |
| 835 | rossh(pack_files_cmd, nolog=True) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 836 | sftp.get(arch_name, loc_arch_name) |
| 837 | |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 838 | unpack_files_cmd = "cd {0} ; tar xvzf {1} >/dev/null".format(tmp_dir, loc_arch_name) |
| 839 | subprocess.check_call(unpack_files_cmd, shell=True) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 840 | os.unlink(loc_arch_name) |
| 841 | |
| 842 | for ftype, fls in files.items(): |
| 843 | for idx, fname in fls: |
| 844 | cname = os.path.join(tmp_dir, fname) |
| 845 | loc_fname = "{0}_{1}_{2}.{3}.log".format(pos, conn_id, ftype, idx) |
| 846 | loc_path = os.path.join(self.config.log_directory, loc_fname) |
| 847 | os.rename(cname, loc_path) |
| 848 | |
| 849 | cname = os.path.join(tmp_dir, |
| 850 | os.path.basename(self.results_file)) |
| 851 | loc_fname = "{0}_{1}_rawres.json".format(pos, conn_id) |
| 852 | loc_path = os.path.join(self.config.log_directory, loc_fname) |
| 853 | os.rename(cname, loc_path) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 854 | os.rmdir(tmp_dir) |
koder aka kdanilov | 6ab4d43 | 2015-06-22 00:26:28 +0300 | [diff] [blame] | 855 | |
| 856 | remove_remote_res_files_cmd = "cd {0} ; rm -f {1} {2}".format(exec_folder, |
| 857 | arch_name, |
| 858 | file_full_names) |
| 859 | rossh(remove_remote_res_files_cmd, nolog=True) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 860 | return begin, end |
| 861 | |
| 862 | @classmethod |
koder aka kdanilov | 6b87266 | 2015-06-23 01:58:36 +0300 | [diff] [blame] | 863 | def prepare_data(cls, results): |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 864 | """ |
| 865 | create a table with io performance report |
| 866 | for console |
| 867 | """ |
| 868 | |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 869 | def key_func(data): |
koder aka kdanilov | f236b9c | 2015-06-24 18:17:22 +0300 | [diff] [blame] | 870 | tpl = data.summary_tpl() |
koder aka kdanilov | 170936a | 2015-06-27 22:51:17 +0300 | [diff] [blame] | 871 | return (data.name, |
koder aka kdanilov | f236b9c | 2015-06-24 18:17:22 +0300 | [diff] [blame] | 872 | tpl.oper, |
| 873 | tpl.mode, |
koder aka kdanilov | 170936a | 2015-06-27 22:51:17 +0300 | [diff] [blame] | 874 | ssize2b(tpl.bsize), |
koder aka kdanilov | f236b9c | 2015-06-24 18:17:22 +0300 | [diff] [blame] | 875 | int(tpl.th_count) * int(tpl.vm_count)) |
koder aka kdanilov | 6b87266 | 2015-06-23 01:58:36 +0300 | [diff] [blame] | 876 | res = [] |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 877 | |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 878 | for item in sorted(results, key=key_func): |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 879 | test_dinfo = item.disk_perf_info() |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 880 | |
| 881 | iops, _ = test_dinfo.iops.rounded_average_conf() |
| 882 | |
| 883 | bw, bw_conf = test_dinfo.bw.rounded_average_conf() |
| 884 | _, bw_dev = test_dinfo.bw.rounded_average_dev() |
| 885 | conf_perc = int(round(bw_conf * 100 / bw)) |
| 886 | dev_perc = int(round(bw_dev * 100 / bw)) |
| 887 | |
koder aka kdanilov | 6b87266 | 2015-06-23 01:58:36 +0300 | [diff] [blame] | 888 | lat_50 = round_3_digit(int(test_dinfo.lat_50)) |
| 889 | lat_95 = round_3_digit(int(test_dinfo.lat_95)) |
koder aka kdanilov | 170936a | 2015-06-27 22:51:17 +0300 | [diff] [blame] | 890 | lat_avg = round_3_digit(int(test_dinfo.lat_avg)) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 891 | |
koder aka kdanilov | bb6d6cd | 2015-06-20 02:55:07 +0300 | [diff] [blame] | 892 | testnodes_count = len(item.config.nodes) |
| 893 | iops_per_vm = round_3_digit(iops / testnodes_count) |
| 894 | bw_per_vm = round_3_digit(bw / testnodes_count) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 895 | |
| 896 | iops = round_3_digit(iops) |
| 897 | bw = round_3_digit(bw) |
| 898 | |
koder aka kdanilov | f236b9c | 2015-06-24 18:17:22 +0300 | [diff] [blame] | 899 | summ = "{0.oper}{0.mode} {0.bsize:>4} {0.th_count:>3}th {0.vm_count:>2}vm".format(item.summary_tpl()) |
| 900 | |
| 901 | res.append({"name": key_func(item)[0], |
| 902 | "key": key_func(item)[:4], |
| 903 | "summ": summ, |
koder aka kdanilov | 6b87266 | 2015-06-23 01:58:36 +0300 | [diff] [blame] | 904 | "iops": int(iops), |
| 905 | "bw": int(bw), |
koder aka kdanilov | f236b9c | 2015-06-24 18:17:22 +0300 | [diff] [blame] | 906 | "conf": str(conf_perc), |
| 907 | "dev": str(dev_perc), |
koder aka kdanilov | 6b87266 | 2015-06-23 01:58:36 +0300 | [diff] [blame] | 908 | "iops_per_vm": int(iops_per_vm), |
| 909 | "bw_per_vm": int(bw_per_vm), |
| 910 | "lat_50": lat_50, |
koder aka kdanilov | 170936a | 2015-06-27 22:51:17 +0300 | [diff] [blame] | 911 | "lat_95": lat_95, |
| 912 | "lat_avg": lat_avg}) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 913 | |
koder aka kdanilov | 6b87266 | 2015-06-23 01:58:36 +0300 | [diff] [blame] | 914 | return res |
| 915 | |
| 916 | Field = collections.namedtuple("Field", ("header", "attr", "allign", "size")) |
| 917 | fiels_and_header = [ |
| 918 | Field("Name", "name", "l", 7), |
koder aka kdanilov | f236b9c | 2015-06-24 18:17:22 +0300 | [diff] [blame] | 919 | Field("Description", "summ", "l", 19), |
koder aka kdanilov | 6b87266 | 2015-06-23 01:58:36 +0300 | [diff] [blame] | 920 | Field("IOPS\ncum", "iops", "r", 3), |
koder aka kdanilov | f236b9c | 2015-06-24 18:17:22 +0300 | [diff] [blame] | 921 | Field("KiBps\ncum", "bw", "r", 6), |
| 922 | Field("Cnf %\n95%", "conf", "r", 3), |
| 923 | Field("Dev%", "dev", "r", 3), |
| 924 | Field("iops\n/vm", "iops_per_vm", "r", 3), |
| 925 | Field("KiBps\n/vm", "bw_per_vm", "r", 6), |
koder aka kdanilov | 6b87266 | 2015-06-23 01:58:36 +0300 | [diff] [blame] | 926 | Field("lat ms\nmedian", "lat_50", "r", 3), |
koder aka kdanilov | 170936a | 2015-06-27 22:51:17 +0300 | [diff] [blame] | 927 | Field("lat ms\n95%", "lat_95", "r", 3), |
| 928 | Field("lat\navg", "lat_avg", "r", 3), |
koder aka kdanilov | 6b87266 | 2015-06-23 01:58:36 +0300 | [diff] [blame] | 929 | ] |
| 930 | |
| 931 | fiels_and_header_dct = dict((item.attr, item) for item in fiels_and_header) |
| 932 | |
| 933 | @classmethod |
| 934 | def format_for_console(cls, results): |
| 935 | """ |
| 936 | create a table with io performance report |
| 937 | for console |
| 938 | """ |
| 939 | |
| 940 | tab = texttable.Texttable(max_width=120) |
| 941 | tab.set_deco(tab.HEADER | tab.VLINES | tab.BORDER) |
| 942 | tab.set_cols_align([f.allign for f in cls.fiels_and_header]) |
| 943 | sep = ["-" * f.size for f in cls.fiels_and_header] |
| 944 | tab.header([f.header for f in cls.fiels_and_header]) |
koder aka kdanilov | 6b87266 | 2015-06-23 01:58:36 +0300 | [diff] [blame] | 945 | prev_k = None |
| 946 | for item in cls.prepare_data(results): |
koder aka kdanilov | 6b87266 | 2015-06-23 01:58:36 +0300 | [diff] [blame] | 947 | if prev_k is not None: |
koder aka kdanilov | f236b9c | 2015-06-24 18:17:22 +0300 | [diff] [blame] | 948 | if prev_k != item["key"]: |
koder aka kdanilov | 6b87266 | 2015-06-23 01:58:36 +0300 | [diff] [blame] | 949 | tab.add_row(sep) |
| 950 | |
koder aka kdanilov | f236b9c | 2015-06-24 18:17:22 +0300 | [diff] [blame] | 951 | prev_k = item["key"] |
koder aka kdanilov | 6b87266 | 2015-06-23 01:58:36 +0300 | [diff] [blame] | 952 | tab.add_row([item[f.attr] for f in cls.fiels_and_header]) |
| 953 | |
| 954 | return tab.draw() |
| 955 | |
| 956 | @classmethod |
| 957 | def format_diff_for_console(cls, list_of_results): |
| 958 | """ |
| 959 | create a table with io performance report |
| 960 | for console |
| 961 | """ |
| 962 | |
| 963 | tab = texttable.Texttable(max_width=200) |
| 964 | tab.set_deco(tab.HEADER | tab.VLINES | tab.BORDER) |
| 965 | |
| 966 | header = [ |
| 967 | cls.fiels_and_header_dct["name"].header, |
| 968 | cls.fiels_and_header_dct["summ"].header, |
| 969 | ] |
| 970 | allign = ["l", "l"] |
| 971 | |
| 972 | header.append("IOPS ~ Cnf% ~ Dev%") |
| 973 | allign.extend(["r"] * len(list_of_results)) |
| 974 | header.extend( |
| 975 | "IOPS_{0} %".format(i + 2) for i in range(len(list_of_results[1:])) |
| 976 | ) |
| 977 | |
| 978 | header.append("BW") |
| 979 | allign.extend(["r"] * len(list_of_results)) |
| 980 | header.extend( |
| 981 | "BW_{0} %".format(i + 2) for i in range(len(list_of_results[1:])) |
| 982 | ) |
| 983 | |
| 984 | header.append("LAT") |
| 985 | allign.extend(["r"] * len(list_of_results)) |
| 986 | header.extend( |
| 987 | "LAT_{0}".format(i + 2) for i in range(len(list_of_results[1:])) |
| 988 | ) |
| 989 | |
| 990 | tab.header(header) |
| 991 | sep = ["-" * 3] * len(header) |
| 992 | processed_results = map(cls.prepare_data, list_of_results) |
| 993 | |
| 994 | key2results = [] |
| 995 | for res in processed_results: |
| 996 | key2results.append(dict( |
| 997 | ((item["name"], item["summ"]), item) for item in res |
| 998 | )) |
| 999 | |
| 1000 | prev_k = None |
koder aka kdanilov | f236b9c | 2015-06-24 18:17:22 +0300 | [diff] [blame] | 1001 | iops_frmt = "{0[iops]} ~ {0[conf]:>2} ~ {0[dev]:>2}" |
koder aka kdanilov | 6b87266 | 2015-06-23 01:58:36 +0300 | [diff] [blame] | 1002 | for item in processed_results[0]: |
koder aka kdanilov | 6b87266 | 2015-06-23 01:58:36 +0300 | [diff] [blame] | 1003 | if prev_k is not None: |
koder aka kdanilov | f236b9c | 2015-06-24 18:17:22 +0300 | [diff] [blame] | 1004 | if prev_k != item["key"]: |
koder aka kdanilov | 6b87266 | 2015-06-23 01:58:36 +0300 | [diff] [blame] | 1005 | tab.add_row(sep) |
| 1006 | |
koder aka kdanilov | f236b9c | 2015-06-24 18:17:22 +0300 | [diff] [blame] | 1007 | prev_k = item["key"] |
koder aka kdanilov | 6b87266 | 2015-06-23 01:58:36 +0300 | [diff] [blame] | 1008 | |
| 1009 | key = (item['name'], item['summ']) |
| 1010 | line = list(key) |
| 1011 | base = key2results[0][key] |
| 1012 | |
| 1013 | line.append(iops_frmt.format(base)) |
| 1014 | |
| 1015 | for test_results in key2results[1:]: |
| 1016 | val = test_results.get(key) |
| 1017 | if val is None: |
| 1018 | line.append("-") |
| 1019 | elif base['iops'] == 0: |
| 1020 | line.append("Nan") |
| 1021 | else: |
koder aka kdanilov | f236b9c | 2015-06-24 18:17:22 +0300 | [diff] [blame] | 1022 | prc_val = {'dev': val['dev'], 'conf': val['conf']} |
koder aka kdanilov | 6b87266 | 2015-06-23 01:58:36 +0300 | [diff] [blame] | 1023 | prc_val['iops'] = int(100 * val['iops'] / base['iops']) |
| 1024 | line.append(iops_frmt.format(prc_val)) |
| 1025 | |
| 1026 | line.append(base['bw']) |
| 1027 | |
| 1028 | for test_results in key2results[1:]: |
| 1029 | val = test_results.get(key) |
| 1030 | if val is None: |
| 1031 | line.append("-") |
| 1032 | elif base['bw'] == 0: |
| 1033 | line.append("Nan") |
| 1034 | else: |
| 1035 | line.append(int(100 * val['bw'] / base['bw'])) |
| 1036 | |
| 1037 | for test_results in key2results: |
| 1038 | val = test_results.get(key) |
| 1039 | if val is None: |
| 1040 | line.append("-") |
| 1041 | else: |
| 1042 | line.append("{0[lat_50]} - {0[lat_95]}".format(val)) |
| 1043 | |
| 1044 | tab.add_row(line) |
| 1045 | |
| 1046 | tab.set_cols_align(allign) |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 1047 | return tab.draw() |