blob: 374e5a4491ff253054a279d06e233bc7127a3547 [file] [log] [blame]
koder aka kdanilovbc2c8982015-06-13 02:50:43 +03001import os.path
2import logging
kdanylov aka koder026e5f22017-05-15 01:04:39 +03003from typing import cast, Any, List, Union, Tuple, Optional
koder aka kdanilova732a602017-02-01 20:29:56 +02004
5import numpy
koder aka kdanilovbc2c8982015-06-13 02:50:43 +03006
kdanylov aka koder026e5f22017-05-15 01:04:39 +03007from cephlib.units import ssize2b, b2ssize
8from cephlib.node import IRPCNode, get_os
9
koder aka kdanilov6ab4d432015-06-22 00:26:28 +030010import wally
kdanylov aka koder026e5f22017-05-15 01:04:39 +030011from ...utils import StopTestError
koder aka kdanilovf2865172016-12-30 03:35:11 +020012from ..itest import ThreadedTest
koder aka kdanilovf90de852017-01-20 18:12:27 +020013from ...result_classes import TimeSeries, DataSource
14from ..job import JobConfig
koder aka kdanilovf2865172016-12-30 03:35:11 +020015from .fio_task_parser import execution_time, fio_cfg_compile, FioJobConfig, FioParams, get_log_files
koder aka kdanilovbbbe1dc2016-12-20 01:19:56 +020016from . import rpc_plugin
kdanylov aka kodercdfcdaf2017-04-29 10:03:39 +030017from .fio_hist import get_lat_vals, expected_lat_bins
koder aka kdanilov7f59d562016-12-26 01:34:23 +020018
koder aka kdanilovbc2c8982015-06-13 02:50:43 +030019
20logger = logging.getLogger("wally")
21
22
koder aka kdanilov108ac362017-01-19 20:17:16 +020023class FioTest(ThreadedTest):
koder aka kdanilovbc2c8982015-06-13 02:50:43 +030024 soft_runcycle = 5 * 60
Michael Semenov8ba6e232015-08-28 10:57:18 +000025 retry_time = 30
koder aka kdanilov70227062016-11-26 23:23:21 +020026 configs_dir = os.path.dirname(__file__) # type: str
koder aka kdanilov7f59d562016-12-26 01:34:23 +020027 name = 'fio'
koder aka kdanilovf2865172016-12-30 03:35:11 +020028 job_config_cls = FioJobConfig
koder aka kdanilovbc2c8982015-06-13 02:50:43 +030029
koder aka kdanilov70227062016-11-26 23:23:21 +020030 def __init__(self, *args, **kwargs) -> None:
31 super().__init__(*args, **kwargs)
koder aka kdanilovbc2c8982015-06-13 02:50:43 +030032
koder aka kdanilov108ac362017-01-19 20:17:16 +020033 get = self.suite.params.get
koder aka kdanilovbc2c8982015-06-13 02:50:43 +030034
koder aka kdanilovbbbe1dc2016-12-20 01:19:56 +020035 self.remote_task_file = self.join_remote("task.fio")
36 self.remote_output_file = self.join_remote("fio_result.json")
37 self.use_system_fio = get('use_system_fio', False) # type: bool
38 self.use_sudo = get("use_sudo", True) # type: bool
39 self.force_prefill = get('force_prefill', False) # type: bool
koder aka kdanilova732a602017-02-01 20:29:56 +020040 self.skip_prefill = get('skip_prefill', False) # type: bool
koder aka kdanilov108ac362017-01-19 20:17:16 +020041 self.load_profile_name = self.suite.params['load'] # type: str
koder aka kdanilovbc2c8982015-06-13 02:50:43 +030042
koder aka kdanilov70227062016-11-26 23:23:21 +020043 if os.path.isfile(self.load_profile_name):
koder aka kdanilovbbbe1dc2016-12-20 01:19:56 +020044 self.load_profile_path = self.load_profile_name # type: str
koder aka kdanilov6ab4d432015-06-22 00:26:28 +030045 else:
koder aka kdanilovbbbe1dc2016-12-20 01:19:56 +020046 self.load_profile_path = os.path.join(self.configs_dir, self.load_profile_name+ '.cfg')
koder aka kdanilovbc2c8982015-06-13 02:50:43 +030047
koder aka kdanilov70227062016-11-26 23:23:21 +020048 self.load_profile = open(self.load_profile_path, 'rt').read() # type: str
koder aka kdanilovbc2c8982015-06-13 02:50:43 +030049
koder aka kdanilovbc2c8982015-06-13 02:50:43 +030050 if self.use_system_fio:
koder aka kdanilov70227062016-11-26 23:23:21 +020051 self.fio_path = "fio" # type: str
koder aka kdanilovbc2c8982015-06-13 02:50:43 +030052 else:
koder aka kdanilov108ac362017-01-19 20:17:16 +020053 self.fio_path = os.path.join(self.suite.remote_dir, "fio")
koder aka kdanilovbc2c8982015-06-13 02:50:43 +030054
koder aka kdanilov108ac362017-01-19 20:17:16 +020055 self.load_params = self.suite.params['params']
koder aka kdanilovbbbe1dc2016-12-20 01:19:56 +020056 self.file_name = self.load_params['FILENAME']
koder aka kdanilovbc2c8982015-06-13 02:50:43 +030057
koder aka kdanilovbbbe1dc2016-12-20 01:19:56 +020058 if 'FILESIZE' not in self.load_params:
59 logger.debug("Getting test file sizes on all nodes")
60 try:
koder aka kdanilov108ac362017-01-19 20:17:16 +020061 sizes = {node.conn.fs.file_stat(self.file_name)[b'size']
62 for node in self.suite.nodes}
koder aka kdanilovbbbe1dc2016-12-20 01:19:56 +020063 except Exception:
64 logger.exception("FILESIZE is not set in config file and fail to detect it." +
65 "Set FILESIZE or fix error and rerun test")
66 raise StopTestError()
koder aka kdanilova94dfe12015-08-19 13:04:51 +030067
koder aka kdanilovbbbe1dc2016-12-20 01:19:56 +020068 if len(sizes) != 1:
69 logger.error("IO target file %r has different sizes on test nodes - %r",
70 self.file_name, sizes)
71 raise StopTestError()
koder aka kdanilova94dfe12015-08-19 13:04:51 +030072
koder aka kdanilovbbbe1dc2016-12-20 01:19:56 +020073 self.file_size = list(sizes)[0]
koder aka kdanilov108ac362017-01-19 20:17:16 +020074 logger.info("Detected test file size is %sB", b2ssize(self.file_size))
koder aka kdanilova732a602017-02-01 20:29:56 +020075 if self.file_size % (4 * 1024 ** 2) != 0:
76 tail = self.file_size % (4 * 1024 ** 2)
77 logger.warning("File size is not proportional to 4M, %sb at the end will not be used for test",
78 str(tail // 1024) + "Kb" if tail > 1024 else str(tail) + "b")
79 self.file_size -= self.file_size % (4 * 1024 ** 2)
koder aka kdanilovbbbe1dc2016-12-20 01:19:56 +020080 self.load_params['FILESIZE'] = self.file_size
81 else:
82 self.file_size = ssize2b(self.load_params['FILESIZE'])
koder aka kdanilov70227062016-11-26 23:23:21 +020083
koder aka kdanilovf2865172016-12-30 03:35:11 +020084 self.job_configs = list(fio_cfg_compile(self.load_profile, self.load_profile_path,
koder aka kdanilovbbbe1dc2016-12-20 01:19:56 +020085 cast(FioParams, self.load_params)))
koder aka kdanilov70227062016-11-26 23:23:21 +020086
koder aka kdanilovf2865172016-12-30 03:35:11 +020087 if len(self.job_configs) == 0:
koder aka kdanilovbbbe1dc2016-12-20 01:19:56 +020088 logger.error("Empty fio config provided")
89 raise StopTestError()
koder aka kdanilov70227062016-11-26 23:23:21 +020090
koder aka kdanilov108ac362017-01-19 20:17:16 +020091 self.exec_folder = self.suite.remote_dir
koder aka kdanilovbc2c8982015-06-13 02:50:43 +030092
koder aka kdanilov70227062016-11-26 23:23:21 +020093 def config_node(self, node: IRPCNode) -> None:
koder aka kdanilov23e6bdf2016-12-24 02:18:54 +020094 plugin_code = open(rpc_plugin.__file__.rsplit(".", 1)[0] + ".py", "rb").read() # type: bytes
95 node.upload_plugin("fio", plugin_code)
koder aka kdanilovbbbe1dc2016-12-20 01:19:56 +020096
koder aka kdanilovbc2c8982015-06-13 02:50:43 +030097 try:
koder aka kdanilov108ac362017-01-19 20:17:16 +020098 node.conn.fs.rmtree(self.suite.remote_dir)
koder aka kdanilovbbbe1dc2016-12-20 01:19:56 +020099 except Exception:
100 pass
101
102 try:
koder aka kdanilov108ac362017-01-19 20:17:16 +0200103 node.conn.fs.makedirs(self.suite.remote_dir)
koder aka kdanilovbbbe1dc2016-12-20 01:19:56 +0200104 except Exception:
koder aka kdanilov108ac362017-01-19 20:17:16 +0200105 msg = "Failed to recreate folder {} on remote {}.".format(self.suite.remote_dir, node)
koder aka kdanilov3b4da8b2016-10-17 00:17:53 +0300106 logger.exception(msg)
koder aka kdanilovbbbe1dc2016-12-20 01:19:56 +0200107 raise StopTestError()
koder aka kdanilovbc2c8982015-06-13 02:50:43 +0300108
koder aka kdanilov108ac362017-01-19 20:17:16 +0200109 # TODO: check this during config validation
110 if self.file_size % (4 * (1024 ** 2)) != 0:
111 logger.error("Test file size must be proportional to 4MiB")
112 raise StopTestError()
113
koder aka kdanilov3b4da8b2016-10-17 00:17:53 +0300114 self.install_utils(node)
koder aka kdanilovbbbe1dc2016-12-20 01:19:56 +0200115
koder aka kdanilova732a602017-02-01 20:29:56 +0200116 if self.skip_prefill:
117 logger.info("Prefill is skipped due to 'skip_prefill' set to true")
118 else:
119 mb = int(self.file_size / 1024 ** 2)
120 logger.info("Filling test file %s on node %s with %sMiB of random data", self.file_name, node.info, mb)
121 is_prefilled, fill_bw = node.conn.fio.fill_file(self.file_name, mb,
122 force=self.force_prefill,
123 fio_path=self.fio_path)
124 if not is_prefilled:
125 logger.info("Test file on node %s is already prefilled", node.info)
126 elif fill_bw is not None:
127 logger.info("Initial fio fill bw is %s MiBps for %s", fill_bw, node.info)
koder aka kdanilovbbbe1dc2016-12-20 01:19:56 +0200128
koder aka kdanilov70227062016-11-26 23:23:21 +0200129 def install_utils(self, node: IRPCNode) -> None:
koder aka kdanilovbbbe1dc2016-12-20 01:19:56 +0200130 os_info = get_os(node)
koder aka kdanilov6ab4d432015-06-22 00:26:28 +0300131 if self.use_system_fio:
koder aka kdanilovbbbe1dc2016-12-20 01:19:56 +0200132 if os_info.distro != 'ubuntu':
133 logger.error("Only ubuntu supported on test VM")
134 raise StopTestError()
135 node.conn.fio.install('fio', binary='fio')
136 else:
137 node.conn.fio.install('bzip2', binary='bzip2')
koder aka kdanilov70227062016-11-26 23:23:21 +0200138 fio_dir = os.path.dirname(os.path.dirname(wally.__file__)) # type: str
139 fio_dir = os.path.join(os.getcwd(), fio_dir)
140 fio_dir = os.path.join(fio_dir, 'fio_binaries')
141 fname = 'fio_{0.release}_{0.arch}.bz2'.format(os_info)
142 fio_path = os.path.join(fio_dir, fname) # type: str
143
144 if not os.path.exists(fio_path):
koder aka kdanilovbbbe1dc2016-12-20 01:19:56 +0200145 logger.error("No prebuild fio binary available for {0}".format(os_info))
146 raise StopTestError()
koder aka kdanilov70227062016-11-26 23:23:21 +0200147
148 bz_dest = self.join_remote('fio.bz2') # type: str
koder aka kdanilov108ac362017-01-19 20:17:16 +0200149 node.copy_file(fio_path, bz_dest, compress=False)
koder aka kdanilovbbbe1dc2016-12-20 01:19:56 +0200150 node.run("bzip2 --decompress {} ; chmod a+x {}".format(bz_dest, self.join_remote("fio")))
koder aka kdanilov70227062016-11-26 23:23:21 +0200151
koder aka kdanilovf90de852017-01-20 18:12:27 +0200152 def get_expected_runtime(self, job_config: JobConfig) -> int:
koder aka kdanilovf2865172016-12-30 03:35:11 +0200153 return execution_time(cast(FioJobConfig, job_config))
koder aka kdanilov70227062016-11-26 23:23:21 +0200154
koder aka kdanilovf90de852017-01-20 18:12:27 +0200155 def prepare_iteration(self, node: IRPCNode, job: JobConfig) -> None:
koder aka kdanilov108ac362017-01-19 20:17:16 +0200156 node.put_to_file(self.remote_task_file, str(job).encode("utf8"))
koder aka kdanilov23e6bdf2016-12-24 02:18:54 +0200157
158 # TODO: get a link to substorage as a parameter
koder aka kdanilovf90de852017-01-20 18:12:27 +0200159 def run_iteration(self, node: IRPCNode, job: JobConfig) -> List[TimeSeries]:
koder aka kdanilov108ac362017-01-19 20:17:16 +0200160 exec_time = execution_time(cast(FioJobConfig, job))
161
koder aka kdanilov23e6bdf2016-12-24 02:18:54 +0200162
koder aka kdanilovbbbe1dc2016-12-20 01:19:56 +0200163 fio_cmd_templ = "cd {exec_folder}; " + \
164 "{fio_path} --output-format=json --output={out_file} --alloc-size=262144 {job_file}"
165
koder aka kdanilovbbbe1dc2016-12-20 01:19:56 +0200166 cmd = fio_cmd_templ.format(exec_folder=self.exec_folder,
167 fio_path=self.fio_path,
168 out_file=self.remote_output_file,
169 job_file=self.remote_task_file)
koder aka kdanilov23e6bdf2016-12-24 02:18:54 +0200170 must_be_empty = node.run(cmd, timeout=exec_time + max(300, exec_time), check_timeout=1).strip()
koder aka kdanilovbbbe1dc2016-12-20 01:19:56 +0200171
kdanylov aka koder150b2192017-04-01 16:53:01 +0300172 for line in must_be_empty.split("\n"):
173 if line.strip():
174 if 'only root may flush block devices' in line:
175 continue
176 logger.error("Unexpected fio output: %r", must_be_empty)
177 break
koder aka kdanilovbc2c8982015-06-13 02:50:43 +0300178
koder aka kdanilovf2865172016-12-30 03:35:11 +0200179 # put fio output into storage
180 fio_out = node.get_file_content(self.remote_output_file)
koder aka kdanilov108ac362017-01-19 20:17:16 +0200181
182 path = DataSource(suite_id=self.suite.storage_id,
183 job_id=job.storage_id,
184 node_id=node.node_id,
koder aka kdanilova732a602017-02-01 20:29:56 +0200185 sensor='fio',
186 dev=None,
187 metric='stdout',
koder aka kdanilov108ac362017-01-19 20:17:16 +0200188 tag='json')
koder aka kdanilov108ac362017-01-19 20:17:16 +0200189 self.storage.put_extra(fio_out, path)
koder aka kdanilov23e6bdf2016-12-24 02:18:54 +0200190 node.conn.fs.unlink(self.remote_output_file)
191
192 files = [name for name in node.conn.fs.listdir(self.exec_folder)]
kdanylov aka koder026e5f22017-05-15 01:04:39 +0300193 result = [] # type: List[TimeSeries]
koder aka kdanilova732a602017-02-01 20:29:56 +0200194 for name, file_path, units in get_log_files(cast(FioJobConfig, job)):
koder aka kdanilov108ac362017-01-19 20:17:16 +0200195 log_files = [fname for fname in files if fname.startswith(file_path)]
koder aka kdanilov23e6bdf2016-12-24 02:18:54 +0200196 if len(log_files) != 1:
197 logger.error("Found %s files, match log pattern %s(%s) - %s",
koder aka kdanilov108ac362017-01-19 20:17:16 +0200198 len(log_files), file_path, name, ",".join(log_files[10:]))
koder aka kdanilov23e6bdf2016-12-24 02:18:54 +0200199 raise StopTestError()
200
201 fname = os.path.join(self.exec_folder, log_files[0])
202 raw_result = node.get_file_content(fname) # type: bytes
koder aka kdanilov23e6bdf2016-12-24 02:18:54 +0200203 node.conn.fs.unlink(fname)
204
205 try:
206 log_data = raw_result.decode("utf8").split("\n")
207 except UnicodeEncodeError:
208 logger.exception("Error during parse %s fio log file - can't decode usint UTF8", name)
209 raise StopTestError()
210
koder aka kdanilova732a602017-02-01 20:29:56 +0200211 # TODO: fix units, need to get array type from stream
212
213 parsed = [] # type: List[Union[List[int], int]]
214 times = []
koder aka kdanilov23e6bdf2016-12-24 02:18:54 +0200215
koder aka kdanilov23e6bdf2016-12-24 02:18:54 +0200216 for idx, line in enumerate(log_data):
217 line = line.strip()
218 if line:
219 try:
220 time_ms_s, val_s, _, *rest = line.split(",")
koder aka kdanilovf2865172016-12-30 03:35:11 +0200221 times.append(int(time_ms_s.strip()))
koder aka kdanilovffaf48d2016-12-27 02:25:29 +0200222
koder aka kdanilov23e6bdf2016-12-24 02:18:54 +0200223 if name == 'lat':
224 vals = [int(i.strip()) for i in rest]
225
226 if len(vals) != expected_lat_bins:
kdanylov aka kodercdfcdaf2017-04-29 10:03:39 +0300227 msg = "Expect {} bins in latency histogram, but found {} at time {}" \
228 .format(expected_lat_bins, len(vals), time_ms_s)
229 logger.error(msg)
230 raise StopTestError(msg)
koder aka kdanilov23e6bdf2016-12-24 02:18:54 +0200231
koder aka kdanilova732a602017-02-01 20:29:56 +0200232 parsed.append(vals)
koder aka kdanilov23e6bdf2016-12-24 02:18:54 +0200233 else:
234 parsed.append(int(val_s.strip()))
235 except ValueError:
236 logger.exception("Error during parse %s fio log file in line %s: %r", name, idx, line)
237 raise StopTestError()
koder aka kdanilovffaf48d2016-12-27 02:25:29 +0200238
kdanylov aka koder026e5f22017-05-15 01:04:39 +0300239 assert not self.suite.keep_raw_files, "keep_raw_files is not supported"
koder aka kdanilova732a602017-02-01 20:29:56 +0200240
kdanylov aka kodercdfcdaf2017-04-29 10:03:39 +0300241 histo_bins = None if name != 'lat' else numpy.array(get_lat_vals())
kdanylov aka koder026e5f22017-05-15 01:04:39 +0300242 ts = TimeSeries(data=numpy.array(parsed, dtype='uint64'),
243 units=units,
244 times=numpy.array(times, dtype='uint64'),
245 time_units='ms',
246 source=path(metric=name, tag='csv'),
247 histo_bins=histo_bins)
248 result.append(ts)
koder aka kdanilov108ac362017-01-19 20:17:16 +0200249 return result
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200250
251 def format_for_console(self, data: Any) -> str:
252 raise NotImplementedError()