blob: 31c6b7ac27e3275bb77dc1c03ee04404297a018a [file] [log] [blame]
Alex0989ecf2022-03-29 13:43:21 -05001# Author: Alex Savatieiev (osavatieiev@mirantis.com; a.savex@gmail.com)
2# Copyright 2019-2022 Mirantis, Inc.
Alex0bcf31b2022-03-29 17:38:58 -05003import os
4
Alex5cace3b2021-11-10 16:40:37 -06005from cfg_checker.agent.fio_runner import get_fio_options
Alex0bcf31b2022-03-29 17:38:58 -05006from cfg_checker.agent.fio_runner import seq_modes, mix_modes, rand_modes
Alexdcb792f2021-10-04 14:24:21 -05007from cfg_checker.common import logger_cli
Alex0bcf31b2022-03-29 17:38:58 -05008from cfg_checker.common.other import utils
Alexdcb792f2021-10-04 14:24:21 -05009from cfg_checker.common.settings import ENV_TYPE_KUBE
Alex0bcf31b2022-03-29 17:38:58 -050010from cfg_checker.common.exception import CheckerException
Alexdcb792f2021-10-04 14:24:21 -050011from cfg_checker.helpers import args_utils
12from cfg_checker.modules.ceph import info, bench
13
Alexeb934de2022-10-06 13:49:30 -050014
Alexdcb792f2021-10-04 14:24:21 -050015command_help = "Ceph Storage information and benchmarks"
16supported_envs = [ENV_TYPE_KUBE]
17
18
19# def _selectClass(_env, strClassHint="checker"):
20# _class = None
21# if _env == ENV_TYPE_SALT:
22# if strClassHint == "info":
23# _class = info.SaltCephInfo
24# elif strClassHint == "bench":
25# _class = bench.SaltCephInfo
26# elif _env == ENV_TYPE_KUBE:
27# if strClassHint == "info":
28# _class = info.KubeCephInfo
29# elif strClassHint == "bench":
30# _class = bench.KubeCephBench
31# if not _class:
32# raise CheckerException(
33# "Unknown hint for selecting Ceph handler Class: '{}'".format(
34# strClassHint
35# )
36# )
37# else:
38# return _class
39
Alex0bcf31b2022-03-29 17:38:58 -050040
41def _validate_option_type(value, type_list):
42 _s, _t = utils.split_option_type(value)
43 if _t not in type_list:
44 raise CheckerException(
45 "Invalid option type '{}'. Expected types: {}".format(
46 value,
47 ", ".join(type_list)
48 )
49 )
50 else:
51 return
52
53
54def _validate_option(value, type_list):
55 if value not in type_list:
56 raise CheckerException(
57 "Invalid option '{}'. Expected one of: {}".format(
58 value,
59 ", ".join(type_list)
60 )
61 )
62 else:
63 return
64
65
Alex90ac1532021-12-09 11:13:14 -060066def _get_param_and_log(arg, param_str):
67 _value = args_utils.get_arg(arg, param_str)
68 logger_cli.info(" {}={}".format(param_str, _value))
69 return _value
70
Alexdcb792f2021-10-04 14:24:21 -050071
72def init_parser(_parser):
73 # network subparser
74 ceph_subparsers = _parser.add_subparsers(dest='type')
75
76 ceph_info_parser = ceph_subparsers.add_parser(
77 'info',
78 help="Gather Ceph Cluster information"
79 )
80
81 ceph_info_parser.add_argument(
82 '--detailed',
83 action="store_true", default=False,
Alex30a00642021-12-30 14:20:48 -060084 help="Print additional details. (Not implemented yet)"
Alexdcb792f2021-10-04 14:24:21 -050085 )
86
87 ceph_info_parser.add_argument(
Alexeb934de2022-10-06 13:49:30 -050088 '--client-name',
89 metavar='client_name',
90 help="Client name for archive naming"
Alexdcb792f2021-10-04 14:24:21 -050091 )
92
Alexeb934de2022-10-06 13:49:30 -050093 ceph_info_parser.add_argument(
94 '--project-name',
95 metavar='projectname',
96 help="Project name for archive naming"
Alexdcb792f2021-10-04 14:24:21 -050097 )
98
Alexeb934de2022-10-06 13:49:30 -050099 ceph_info_parser.add_argument(
Alexdcb792f2021-10-04 14:24:21 -0500100 '--html',
101 metavar='ceph_html_filename',
102 help="HTML filename to save report"
103 )
104
105 ceph_bench_parser = ceph_subparsers.add_parser(
106 'bench',
107 help="Run ceph benchmark"
108 )
109
110 ceph_bench_parser.add_argument(
Alex5cace3b2021-11-10 16:40:37 -0600111 '--agents',
112 type=int, metavar='agent_count', default=5,
Alex30a00642021-12-30 14:20:48 -0600113 help="Number of agents to use in all test runs. Default: 5"
Alex5cace3b2021-11-10 16:40:37 -0600114 )
115 ceph_bench_parser.add_argument(
116 '--html',
117 metavar='ceph_html_filename',
118 help="HTML filename to save report"
119 )
120 ceph_bench_parser.add_argument(
121 '--storage-class',
122 metavar='storage_class',
123 help="Storage class to be used in benchmark"
124 )
125 ceph_bench_parser.add_argument(
126 '--task-file',
Alexb2129542021-11-23 15:49:42 -0600127 metavar='task_file',
Alex30a00642021-12-30 14:20:48 -0600128 help="Task file for benchmark with parameters to use"
Alex5cace3b2021-11-10 16:40:37 -0600129 )
Alex2a7657c2021-11-10 20:51:34 -0600130 ceph_bench_parser.add_argument(
131 '--no-cleanup',
132 action="store_true", default=False,
133 help="Do not cleanup services, agents, pvc, and pv"
134 )
Alexb2129542021-11-23 15:49:42 -0600135 ceph_bench_parser.add_argument(
136 '--cleanup-only',
137 action="store_true", default=False,
138 help="Cleanup resources related to benchmark"
139 )
140 ceph_bench_parser.add_argument(
Alex30380a42021-12-20 16:11:20 -0600141 '--report-only',
142 action="store_true", default=False,
Alex30a00642021-12-30 14:20:48 -0600143 help="Just create report using files in '--dump-path' folder"
Alex30380a42021-12-20 16:11:20 -0600144 )
145 ceph_bench_parser.add_argument(
Alexb2129542021-11-23 15:49:42 -0600146 '--dump-path',
Alex30380a42021-12-20 16:11:20 -0600147 metavar="dump_results",
Alex30a00642021-12-30 14:20:48 -0600148 help="Dump result after each test run to use them later. "
149 "Default: '/tmp'"
Alexb2129542021-11-23 15:49:42 -0600150 )
Alex90ac1532021-12-09 11:13:14 -0600151 ceph_bench_parser.add_argument(
152 '--name',
153 metavar="name", default="cephbench",
Alex30a00642021-12-30 14:20:48 -0600154 help="Job name to use for running fio. "
155 "Can be used to grep results. Default: 'cephbench'"
Alex90ac1532021-12-09 11:13:14 -0600156 )
157 ceph_bench_parser.add_argument(
158 '--bs',
159 metavar="blocksize", default="16k",
Alex30a00642021-12-30 14:20:48 -0600160 help="Block size for single run. Default: '16k'"
Alex90ac1532021-12-09 11:13:14 -0600161 )
162 ceph_bench_parser.add_argument(
163 '--iodepth',
164 metavar="iodepth", default="16",
Alex30a00642021-12-30 14:20:48 -0600165 help="IO Depth for single run. Default: '16'"
Alex90ac1532021-12-09 11:13:14 -0600166 )
167 ceph_bench_parser.add_argument(
168 '--size',
169 metavar="size", default="10G",
Alex30a00642021-12-30 14:20:48 -0600170 help="Persistent volume size (M, G). Default: '10G'"
Alex90ac1532021-12-09 11:13:14 -0600171 )
172 ceph_bench_parser.add_argument(
173 '--readwrite',
174 metavar="readwrite", default="randrw",
Alex30a00642021-12-30 14:20:48 -0600175 help="Test mode for single run (read, write, randrw, "
176 "randread, randwrite). Default: 'randrw'"
Alex90ac1532021-12-09 11:13:14 -0600177 )
178 ceph_bench_parser.add_argument(
179 '--rwmixread',
180 metavar="rwmixread", default="50",
Alex30a00642021-12-30 14:20:48 -0600181 help="Percent of read in random mixed mode (randrw). Default: '50'"
Alex90ac1532021-12-09 11:13:14 -0600182 )
183 ceph_bench_parser.add_argument(
184 '--ramp-time',
185 metavar="ramp_time", default="5s",
Alex30a00642021-12-30 14:20:48 -0600186 help="Warmup time before test. Default: '5s'"
Alex90ac1532021-12-09 11:13:14 -0600187 )
188 ceph_bench_parser.add_argument(
189 '--runtime',
190 metavar="runtime", default="60s",
Alex30a00642021-12-30 14:20:48 -0600191 help="How long to run test. Default: '60s'"
Alex90ac1532021-12-09 11:13:14 -0600192 )
193 ceph_bench_parser.add_argument(
194 '--ioengine',
195 metavar="ioengine", default="libaio",
Alex30a00642021-12-30 14:20:48 -0600196 help="IO Engine used by fio. See 'fio eng-help' output for list. "
197 "Default: 'libaio'"
Alex90ac1532021-12-09 11:13:14 -0600198 )
199 ceph_bench_parser.add_argument(
200 '--offset-increment',
201 metavar="offset_increment", default="500M",
Alex41dd0cc2022-02-09 17:33:23 -0600202 help="Offset to be used in 'read' and 'write' modes if multiple jobs "
203 "used"
Alex30a00642021-12-30 14:20:48 -0600204 "Default: '500M'"
Alex90ac1532021-12-09 11:13:14 -0600205 )
Alexdcb792f2021-10-04 14:24:21 -0500206
207 return _parser
208
209
210def do_info(args, config):
211 # Ceph info
212 # Gather ceph info and create an archive with data
213 args_utils.check_supported_env(ENV_TYPE_KUBE, args, config)
Alexdcb792f2021-10-04 14:24:21 -0500214
Alexeb934de2022-10-06 13:49:30 -0500215 # check client and project names
216 if not args.client_name or not args.project_name:
217 logger_cli.error(
218 "ERROR: Missing '--client-name' or '--project-name' options"
219 )
220 return
Alexdcb792f2021-10-04 14:24:21 -0500221 # _class = _selectClass(_env)
222 ceph_info = info.KubeCephInfo(config)
Alexeb934de2022-10-06 13:49:30 -0500223 _tgzfilename = ceph_info.get_info_archive_filename(
224 args.client_name,
225 args.project_name
226 )
227 logger_cli.info("# Archive will be generated to '{}'".format(_tgzfilename))
228 # get html
229 _htmlfilename = args_utils.get_arg(args, 'html')
Alexdcb792f2021-10-04 14:24:21 -0500230
231 logger_cli.info("# Collecting Ceph cluster information")
Alexdcb792f2021-10-04 14:24:21 -0500232 ceph_info.gather_info()
Alex41dd0cc2022-02-09 17:33:23 -0600233 ceph_info.gather_osd_configs()
Alexdcb792f2021-10-04 14:24:21 -0500234
235 # Debug, enable if needed to debug report generation
236 # without actuall data collecting each time
237 # ceph_info.dump_info()
238 # ceph_info.load_info()
239 # end debug
240
Alexdcb792f2021-10-04 14:24:21 -0500241 ceph_info.get_transposed_latency_table()
242 ceph_info.get_latest_health_readout()
Alexeb934de2022-10-06 13:49:30 -0500243 ceph_info.create_html_report(_htmlfilename)
244
245 # handle cli part
246 ceph_info.generate_archive(_tgzfilename)
247 ceph_info.print_summary()
Alexdcb792f2021-10-04 14:24:21 -0500248
249 return
250
251
252def do_bench(args, config):
253 # Ceph Benchmark using multiple pods
Alexb2129542021-11-23 15:49:42 -0600254 # if only cleanup needed do it and exit
255 _cleanup_only = args_utils.get_arg(args, 'cleanup_only')
Alex30380a42021-12-20 16:11:20 -0600256 _report_only = args_utils.get_arg(args, 'report_only')
Alexb2129542021-11-23 15:49:42 -0600257 config.resource_prefix = "cfgagent"
258 if _cleanup_only:
259 # Do forced resource cleanup and exit
260 config.bench_mode = "cleanup"
261 config.bench_agent_count = -1
262 ceph_bench = bench.KubeCephBench(config)
263 logger_cli.info(
264 "# Discovering benchmark resources using prefix of '{}'".format(
265 config.resource_prefix
266 )
267 )
268 ceph_bench.prepare_cleanup()
269 ceph_bench.cleanup()
270 return
271
Alex30380a42021-12-20 16:11:20 -0600272 # dump results options
273 _dump_path = args_utils.get_arg(args, "dump_path")
274 if _dump_path:
Alex0bcf31b2022-03-29 17:38:58 -0500275 if not os.path.exists(_dump_path):
276 raise CheckerException(
277 "ERROR: Dump path invalid: '{}'".format(_dump_path)
278 )
Alex30380a42021-12-20 16:11:20 -0600279 logger_cli.info("# Results will be dumped to '{}'".format(_dump_path))
280 config.bench_results_dump_path = _dump_path
281 else:
282 _p = "/tmp"
283 logger_cli.info(
284 "# No result dump path set. Defaulting to {}"
285 "Consider setting it if running long task_file "
286 "based test runs".format(_p)
287 )
288 config.bench_results_dump_path = _p
289
290 # Report filename
291 _filename = args_utils.get_arg(args, 'html')
Alexb2129542021-11-23 15:49:42 -0600292 # gather Ceph info
293 logger_cli.info("# Collecting Ceph cluster information")
294 ceph_info = info.KubeCephInfo(config)
295
Alex30380a42021-12-20 16:11:20 -0600296 # Task files or options
297 _opts = get_fio_options()
298 # Load name and announce it
299 config.bench_name = args_utils.get_arg(args, "name")
300 _opts["name"] = config.bench_name
301 logger_cli.info(
302 "# Using '{}' as ceph bench jobs name".format(_opts["name"])
303 )
304
305 if _report_only:
306 # Do forced report creation and exit
307 config.bench_mode = "report"
308 config.bench_agent_count = -1
309 ceph_bench = bench.KubeCephBench(config)
310 ceph_bench.set_ceph_info_class(ceph_info)
311 logger_cli.info(
312 "# Preparing to generate report '{}'".format(
313 config.resource_prefix
314 )
315 )
316 # Preload previous results for this name
317 ceph_bench.preload_results()
318 # Gather ceph data
319 ceph_bench.wait_ceph_cooldown()
320 # Generate report
321 ceph_bench.create_report(_filename)
322 return
323
Alex5cace3b2021-11-10 16:40:37 -0600324 # Prepare the tasks and do synced testrun or a single one
Alexb2129542021-11-23 15:49:42 -0600325 logger_cli.info("# Initializing ceph benchmark module")
Alexdcb792f2021-10-04 14:24:21 -0500326 args_utils.check_supported_env(ENV_TYPE_KUBE, args, config)
Alex5cace3b2021-11-10 16:40:37 -0600327 # agents count option
Alex2a7657c2021-11-10 20:51:34 -0600328 config.bench_agent_count = args_utils.get_arg(args, "agents")
329 logger_cli.info("-> using {} agents".format(config.bench_agent_count))
Alex90ac1532021-12-09 11:13:14 -0600330 # Cleaning option
Alex2a7657c2021-11-10 20:51:34 -0600331 config.no_cleaning_after_benchmark = args_utils.get_arg(args, "no_cleanup")
Alex5cace3b2021-11-10 16:40:37 -0600332 # storage class
333 _storage_class = args_utils.get_arg(args, "storage_class")
334 logger_cli.info("-> using storage class of '{}'".format(_storage_class))
335 config.bench_storage_class = _storage_class
Alexb2129542021-11-23 15:49:42 -0600336 if _dump_path:
337 logger_cli.info("# Results will be dumped to '{}'".format(_dump_path))
338 config.bench_results_dump_path = _dump_path
339 else:
340 logger_cli.info(
341 "# No result dump path set. "
342 "Consider setting it if running long task_file based test runs"
343 )
344 config.bench_results_dump_path = _dump_path
Alex30380a42021-12-20 16:11:20 -0600345
Alex5cace3b2021-11-10 16:40:37 -0600346 _task_file = args_utils.get_arg(args, "task_file", nofail=True)
347 if not _task_file:
Alex90ac1532021-12-09 11:13:14 -0600348 logger_cli.info("-> Running single benchmark run")
Alex5cace3b2021-11-10 16:40:37 -0600349 config.bench_mode = "single"
Alex90ac1532021-12-09 11:13:14 -0600350 # Updating _opts from arguments
351 _params = [
352 "bs",
353 "iodepth",
354 "size",
355 "readwrite",
356 "ramp_time",
357 "runtime",
358 "ioengine"
359 ]
360 for _p in _params:
361 _opts[_p] = _get_param_and_log(args, _p)
362 if _opts["readwrite"] in seq_modes:
363 _p = "offset_increment"
364 _opts[_p] = _get_param_and_log(args, _p)
365 elif _opts["readwrite"] in mix_modes:
366 _p = "rwmixread"
367 _opts[_p] = _get_param_and_log(args, _p)
Alex5cace3b2021-11-10 16:40:37 -0600368 else:
369 logger_cli.info("-> running with tasks from '{}'".format(_task_file))
370 config.bench_task_file = _task_file
371 config.bench_mode = "tasks"
Alexe4de1142022-11-04 19:26:03 -0500372 # Add default size to options
373 _opts["size"] = _get_param_and_log(args, "size")
Alex5cace3b2021-11-10 16:40:37 -0600374 logger_cli.debug("... default/selected options for fio:")
375 for _k in _opts.keys():
376 # TODO: Update options for single run
377 logger_cli.debug(" {} = {}".format(_k, _opts[_k]))
Alexdcb792f2021-10-04 14:24:21 -0500378
Alex3034ba52021-11-13 17:06:45 -0600379 # init the Bench class
Alexdcb792f2021-10-04 14:24:21 -0500380 ceph_bench = bench.KubeCephBench(config)
Alexb2129542021-11-23 15:49:42 -0600381 ceph_bench.set_ceph_info_class(ceph_info)
Alex0bcf31b2022-03-29 17:38:58 -0500382
383 # Validate options
384 logger_cli.info("-> Validating options")
385 # size
386 _validate_option_type(_opts["size"], ["G", "M"])
387 _validate_option_type(_opts["ramp_time"], ["s", "m"])
388 _validate_option_type(_opts["runtime"], ["s", "m"])
389 _modes = seq_modes + mix_modes + rand_modes
390 _validate_option(_opts["readwrite"], _modes)
391
392 if _task_file:
393 _s, _ = utils.split_option_type(_opts["size"])
394 for idx in range(len(ceph_bench.tasks)):
395 # size
396 _ts, _ = utils.split_option_type(ceph_bench.tasks[idx]["size"])
397 if _s < _ts:
398 logger_cli.error(
399 "ERROR: Task #{} file size is to big:"
400 " {} (volume) < {} (testfile)".format(
401 idx,
402 _opts["size"],
403 ceph_bench.tasks[idx]["size"]
404 )
405 )
406 # other
407 _validate_option(ceph_bench.tasks[idx]["readwrite"], _modes)
408 # Print defaults
409 logger_cli.debug("... default/selected options for fio:")
410 for _k in _opts.keys():
411 # TODO: Update options for single run
412 logger_cli.debug(" {} = {}".format(_k, _opts[_k]))
413
Alex90ac1532021-12-09 11:13:14 -0600414 # Preload previous results for this name
415 ceph_bench.preload_results()
Alexdcb792f2021-10-04 14:24:21 -0500416 # Do the testrun
Alex5cace3b2021-11-10 16:40:37 -0600417 ceph_bench.prepare_agents(_opts)
Alexb2129542021-11-23 15:49:42 -0600418 ceph_bench.wait_ceph_cooldown()
419
420 # DEBUG of report in progress
Alex5cace3b2021-11-10 16:40:37 -0600421 if not ceph_bench.run_benchmark(_opts):
Alex2a7657c2021-11-10 20:51:34 -0600422 # No cleaning and/or report if benchmark was not finished
Alexbfa947c2021-11-11 18:14:28 -0600423 logger_cli.info("# Abnormal benchmark run, no cleaning performed")
Alex5cace3b2021-11-10 16:40:37 -0600424 return
Alexb2129542021-11-23 15:49:42 -0600425 # Remove after DEBUG
426 # ceph_bench.collect_results(_opts)
427 # END DEBUG
428
Alex3034ba52021-11-13 17:06:45 -0600429 # Cleaning
Alex2a7657c2021-11-10 20:51:34 -0600430 if not config.no_cleaning_after_benchmark:
431 ceph_bench.cleanup()
Alexbfa947c2021-11-11 18:14:28 -0600432 else:
433 logger_cli.info(
434 "# '--no-cleaning' option set. Cleaning not conducted."
435 )
Alexdcb792f2021-10-04 14:24:21 -0500436
437 # Create report
Alex5cace3b2021-11-10 16:40:37 -0600438 ceph_bench.create_report(_filename)
Alexdcb792f2021-10-04 14:24:21 -0500439
440 return