blob: 5c9357b5441a7d6664fe20acc337a67c3c9b1ca8 [file] [log] [blame]
Alex0989ecf2022-03-29 13:43:21 -05001# Author: Alex Savatieiev (osavatieiev@mirantis.com; a.savex@gmail.com)
2# Copyright 2019-2022 Mirantis, Inc.
Alex5cace3b2021-11-10 16:40:37 -06003from cfg_checker.agent.fio_runner import get_fio_options
Alex90ac1532021-12-09 11:13:14 -06004from cfg_checker.agent.fio_runner import seq_modes, mix_modes
Alexdcb792f2021-10-04 14:24:21 -05005from cfg_checker.common import logger_cli
6from cfg_checker.common.settings import ENV_TYPE_KUBE
7from cfg_checker.helpers import args_utils
8from cfg_checker.modules.ceph import info, bench
9
Alexeb934de2022-10-06 13:49:30 -050010
Alexdcb792f2021-10-04 14:24:21 -050011command_help = "Ceph Storage information and benchmarks"
12supported_envs = [ENV_TYPE_KUBE]
13
14
15# def _selectClass(_env, strClassHint="checker"):
16# _class = None
17# if _env == ENV_TYPE_SALT:
18# if strClassHint == "info":
19# _class = info.SaltCephInfo
20# elif strClassHint == "bench":
21# _class = bench.SaltCephInfo
22# elif _env == ENV_TYPE_KUBE:
23# if strClassHint == "info":
24# _class = info.KubeCephInfo
25# elif strClassHint == "bench":
26# _class = bench.KubeCephBench
27# if not _class:
28# raise CheckerException(
29# "Unknown hint for selecting Ceph handler Class: '{}'".format(
30# strClassHint
31# )
32# )
33# else:
34# return _class
35
Alex90ac1532021-12-09 11:13:14 -060036def _get_param_and_log(arg, param_str):
37 _value = args_utils.get_arg(arg, param_str)
38 logger_cli.info(" {}={}".format(param_str, _value))
39 return _value
40
Alexdcb792f2021-10-04 14:24:21 -050041
42def init_parser(_parser):
43 # network subparser
44 ceph_subparsers = _parser.add_subparsers(dest='type')
45
46 ceph_info_parser = ceph_subparsers.add_parser(
47 'info',
48 help="Gather Ceph Cluster information"
49 )
50
51 ceph_info_parser.add_argument(
52 '--detailed',
53 action="store_true", default=False,
Alex30a00642021-12-30 14:20:48 -060054 help="Print additional details. (Not implemented yet)"
Alexdcb792f2021-10-04 14:24:21 -050055 )
56
57 ceph_info_parser.add_argument(
Alexeb934de2022-10-06 13:49:30 -050058 '--client-name',
59 metavar='client_name',
60 help="Client name for archive naming"
Alexdcb792f2021-10-04 14:24:21 -050061 )
62
Alexeb934de2022-10-06 13:49:30 -050063 ceph_info_parser.add_argument(
64 '--project-name',
65 metavar='projectname',
66 help="Project name for archive naming"
Alexdcb792f2021-10-04 14:24:21 -050067 )
68
Alexeb934de2022-10-06 13:49:30 -050069 ceph_info_parser.add_argument(
Alexdcb792f2021-10-04 14:24:21 -050070 '--html',
71 metavar='ceph_html_filename',
72 help="HTML filename to save report"
73 )
74
75 ceph_bench_parser = ceph_subparsers.add_parser(
76 'bench',
77 help="Run ceph benchmark"
78 )
79
80 ceph_bench_parser.add_argument(
Alex5cace3b2021-11-10 16:40:37 -060081 '--agents',
82 type=int, metavar='agent_count', default=5,
Alex30a00642021-12-30 14:20:48 -060083 help="Number of agents to use in all test runs. Default: 5"
Alex5cace3b2021-11-10 16:40:37 -060084 )
85 ceph_bench_parser.add_argument(
86 '--html',
87 metavar='ceph_html_filename',
88 help="HTML filename to save report"
89 )
90 ceph_bench_parser.add_argument(
91 '--storage-class',
92 metavar='storage_class',
93 help="Storage class to be used in benchmark"
94 )
95 ceph_bench_parser.add_argument(
96 '--task-file',
Alexb2129542021-11-23 15:49:42 -060097 metavar='task_file',
Alex30a00642021-12-30 14:20:48 -060098 help="Task file for benchmark with parameters to use"
Alex5cace3b2021-11-10 16:40:37 -060099 )
Alex2a7657c2021-11-10 20:51:34 -0600100 ceph_bench_parser.add_argument(
101 '--no-cleanup',
102 action="store_true", default=False,
103 help="Do not cleanup services, agents, pvc, and pv"
104 )
Alexb2129542021-11-23 15:49:42 -0600105 ceph_bench_parser.add_argument(
106 '--cleanup-only',
107 action="store_true", default=False,
108 help="Cleanup resources related to benchmark"
109 )
110 ceph_bench_parser.add_argument(
Alex30380a42021-12-20 16:11:20 -0600111 '--report-only',
112 action="store_true", default=False,
Alex30a00642021-12-30 14:20:48 -0600113 help="Just create report using files in '--dump-path' folder"
Alex30380a42021-12-20 16:11:20 -0600114 )
115 ceph_bench_parser.add_argument(
Alexb2129542021-11-23 15:49:42 -0600116 '--dump-path',
Alex30380a42021-12-20 16:11:20 -0600117 metavar="dump_results",
Alex30a00642021-12-30 14:20:48 -0600118 help="Dump result after each test run to use them later. "
119 "Default: '/tmp'"
Alexb2129542021-11-23 15:49:42 -0600120 )
Alex90ac1532021-12-09 11:13:14 -0600121 ceph_bench_parser.add_argument(
122 '--name',
123 metavar="name", default="cephbench",
Alex30a00642021-12-30 14:20:48 -0600124 help="Job name to use for running fio. "
125 "Can be used to grep results. Default: 'cephbench'"
Alex90ac1532021-12-09 11:13:14 -0600126 )
127 ceph_bench_parser.add_argument(
128 '--bs',
129 metavar="blocksize", default="16k",
Alex30a00642021-12-30 14:20:48 -0600130 help="Block size for single run. Default: '16k'"
Alex90ac1532021-12-09 11:13:14 -0600131 )
132 ceph_bench_parser.add_argument(
133 '--iodepth',
134 metavar="iodepth", default="16",
Alex30a00642021-12-30 14:20:48 -0600135 help="IO Depth for single run. Default: '16'"
Alex90ac1532021-12-09 11:13:14 -0600136 )
137 ceph_bench_parser.add_argument(
138 '--size',
139 metavar="size", default="10G",
Alex30a00642021-12-30 14:20:48 -0600140 help="Persistent volume size (M, G). Default: '10G'"
Alex90ac1532021-12-09 11:13:14 -0600141 )
142 ceph_bench_parser.add_argument(
143 '--readwrite',
144 metavar="readwrite", default="randrw",
Alex30a00642021-12-30 14:20:48 -0600145 help="Test mode for single run (read, write, randrw, "
146 "randread, randwrite). Default: 'randrw'"
Alex90ac1532021-12-09 11:13:14 -0600147 )
148 ceph_bench_parser.add_argument(
149 '--rwmixread',
150 metavar="rwmixread", default="50",
Alex30a00642021-12-30 14:20:48 -0600151 help="Percent of read in random mixed mode (randrw). Default: '50'"
Alex90ac1532021-12-09 11:13:14 -0600152 )
153 ceph_bench_parser.add_argument(
154 '--ramp-time',
155 metavar="ramp_time", default="5s",
Alex30a00642021-12-30 14:20:48 -0600156 help="Warmup time before test. Default: '5s'"
Alex90ac1532021-12-09 11:13:14 -0600157 )
158 ceph_bench_parser.add_argument(
159 '--runtime',
160 metavar="runtime", default="60s",
Alex30a00642021-12-30 14:20:48 -0600161 help="How long to run test. Default: '60s'"
Alex90ac1532021-12-09 11:13:14 -0600162 )
163 ceph_bench_parser.add_argument(
164 '--ioengine',
165 metavar="ioengine", default="libaio",
Alex30a00642021-12-30 14:20:48 -0600166 help="IO Engine used by fio. See 'fio eng-help' output for list. "
167 "Default: 'libaio'"
Alex90ac1532021-12-09 11:13:14 -0600168 )
169 ceph_bench_parser.add_argument(
170 '--offset-increment',
171 metavar="offset_increment", default="500M",
Alex41dd0cc2022-02-09 17:33:23 -0600172 help="Offset to be used in 'read' and 'write' modes if multiple jobs "
173 "used"
Alex30a00642021-12-30 14:20:48 -0600174 "Default: '500M'"
Alex90ac1532021-12-09 11:13:14 -0600175 )
Alexdcb792f2021-10-04 14:24:21 -0500176
177 return _parser
178
179
180def do_info(args, config):
181 # Ceph info
182 # Gather ceph info and create an archive with data
183 args_utils.check_supported_env(ENV_TYPE_KUBE, args, config)
Alexdcb792f2021-10-04 14:24:21 -0500184
Alexeb934de2022-10-06 13:49:30 -0500185 # check client and project names
186 if not args.client_name or not args.project_name:
187 logger_cli.error(
188 "ERROR: Missing '--client-name' or '--project-name' options"
189 )
190 return
Alexdcb792f2021-10-04 14:24:21 -0500191 # _class = _selectClass(_env)
192 ceph_info = info.KubeCephInfo(config)
Alexeb934de2022-10-06 13:49:30 -0500193 _tgzfilename = ceph_info.get_info_archive_filename(
194 args.client_name,
195 args.project_name
196 )
197 logger_cli.info("# Archive will be generated to '{}'".format(_tgzfilename))
198 # get html
199 _htmlfilename = args_utils.get_arg(args, 'html')
Alexdcb792f2021-10-04 14:24:21 -0500200
201 logger_cli.info("# Collecting Ceph cluster information")
Alexdcb792f2021-10-04 14:24:21 -0500202 ceph_info.gather_info()
Alex41dd0cc2022-02-09 17:33:23 -0600203 ceph_info.gather_osd_configs()
Alexdcb792f2021-10-04 14:24:21 -0500204
205 # Debug, enable if needed to debug report generation
206 # without actuall data collecting each time
207 # ceph_info.dump_info()
208 # ceph_info.load_info()
209 # end debug
210
Alexdcb792f2021-10-04 14:24:21 -0500211 ceph_info.get_transposed_latency_table()
212 ceph_info.get_latest_health_readout()
Alexeb934de2022-10-06 13:49:30 -0500213 ceph_info.create_html_report(_htmlfilename)
214
215 # handle cli part
216 ceph_info.generate_archive(_tgzfilename)
217 ceph_info.print_summary()
Alexdcb792f2021-10-04 14:24:21 -0500218
219 return
220
221
222def do_bench(args, config):
223 # Ceph Benchmark using multiple pods
Alexb2129542021-11-23 15:49:42 -0600224 # if only cleanup needed do it and exit
225 _cleanup_only = args_utils.get_arg(args, 'cleanup_only')
Alex30380a42021-12-20 16:11:20 -0600226 _report_only = args_utils.get_arg(args, 'report_only')
Alexb2129542021-11-23 15:49:42 -0600227 config.resource_prefix = "cfgagent"
228 if _cleanup_only:
229 # Do forced resource cleanup and exit
230 config.bench_mode = "cleanup"
231 config.bench_agent_count = -1
232 ceph_bench = bench.KubeCephBench(config)
233 logger_cli.info(
234 "# Discovering benchmark resources using prefix of '{}'".format(
235 config.resource_prefix
236 )
237 )
238 ceph_bench.prepare_cleanup()
239 ceph_bench.cleanup()
240 return
241
Alex30380a42021-12-20 16:11:20 -0600242 # dump results options
243 _dump_path = args_utils.get_arg(args, "dump_path")
244 if _dump_path:
245 logger_cli.info("# Results will be dumped to '{}'".format(_dump_path))
246 config.bench_results_dump_path = _dump_path
247 else:
248 _p = "/tmp"
249 logger_cli.info(
250 "# No result dump path set. Defaulting to {}"
251 "Consider setting it if running long task_file "
252 "based test runs".format(_p)
253 )
254 config.bench_results_dump_path = _p
255
256 # Report filename
257 _filename = args_utils.get_arg(args, 'html')
Alexb2129542021-11-23 15:49:42 -0600258 # gather Ceph info
259 logger_cli.info("# Collecting Ceph cluster information")
260 ceph_info = info.KubeCephInfo(config)
261
Alex30380a42021-12-20 16:11:20 -0600262 # Task files or options
263 _opts = get_fio_options()
264 # Load name and announce it
265 config.bench_name = args_utils.get_arg(args, "name")
266 _opts["name"] = config.bench_name
267 logger_cli.info(
268 "# Using '{}' as ceph bench jobs name".format(_opts["name"])
269 )
270
271 if _report_only:
272 # Do forced report creation and exit
273 config.bench_mode = "report"
274 config.bench_agent_count = -1
275 ceph_bench = bench.KubeCephBench(config)
276 ceph_bench.set_ceph_info_class(ceph_info)
277 logger_cli.info(
278 "# Preparing to generate report '{}'".format(
279 config.resource_prefix
280 )
281 )
282 # Preload previous results for this name
283 ceph_bench.preload_results()
284 # Gather ceph data
285 ceph_bench.wait_ceph_cooldown()
286 # Generate report
287 ceph_bench.create_report(_filename)
288 return
289
Alex5cace3b2021-11-10 16:40:37 -0600290 # Prepare the tasks and do synced testrun or a single one
Alexb2129542021-11-23 15:49:42 -0600291 logger_cli.info("# Initializing ceph benchmark module")
Alexdcb792f2021-10-04 14:24:21 -0500292 args_utils.check_supported_env(ENV_TYPE_KUBE, args, config)
Alex5cace3b2021-11-10 16:40:37 -0600293 # agents count option
Alex2a7657c2021-11-10 20:51:34 -0600294 config.bench_agent_count = args_utils.get_arg(args, "agents")
295 logger_cli.info("-> using {} agents".format(config.bench_agent_count))
Alex90ac1532021-12-09 11:13:14 -0600296 # Cleaning option
Alex2a7657c2021-11-10 20:51:34 -0600297 config.no_cleaning_after_benchmark = args_utils.get_arg(args, "no_cleanup")
Alex5cace3b2021-11-10 16:40:37 -0600298 # storage class
299 _storage_class = args_utils.get_arg(args, "storage_class")
300 logger_cli.info("-> using storage class of '{}'".format(_storage_class))
301 config.bench_storage_class = _storage_class
Alexb2129542021-11-23 15:49:42 -0600302 if _dump_path:
303 logger_cli.info("# Results will be dumped to '{}'".format(_dump_path))
304 config.bench_results_dump_path = _dump_path
305 else:
306 logger_cli.info(
307 "# No result dump path set. "
308 "Consider setting it if running long task_file based test runs"
309 )
310 config.bench_results_dump_path = _dump_path
Alex30380a42021-12-20 16:11:20 -0600311
Alex5cace3b2021-11-10 16:40:37 -0600312 _task_file = args_utils.get_arg(args, "task_file", nofail=True)
313 if not _task_file:
Alex90ac1532021-12-09 11:13:14 -0600314 logger_cli.info("-> Running single benchmark run")
Alex5cace3b2021-11-10 16:40:37 -0600315 config.bench_mode = "single"
Alex90ac1532021-12-09 11:13:14 -0600316 # Updating _opts from arguments
317 _params = [
318 "bs",
319 "iodepth",
320 "size",
321 "readwrite",
322 "ramp_time",
323 "runtime",
324 "ioengine"
325 ]
326 for _p in _params:
327 _opts[_p] = _get_param_and_log(args, _p)
328 if _opts["readwrite"] in seq_modes:
329 _p = "offset_increment"
330 _opts[_p] = _get_param_and_log(args, _p)
331 elif _opts["readwrite"] in mix_modes:
332 _p = "rwmixread"
333 _opts[_p] = _get_param_and_log(args, _p)
Alex5cace3b2021-11-10 16:40:37 -0600334 else:
335 logger_cli.info("-> running with tasks from '{}'".format(_task_file))
336 config.bench_task_file = _task_file
337 config.bench_mode = "tasks"
Alexe4de1142022-11-04 19:26:03 -0500338 # Add default size to options
339 _opts["size"] = _get_param_and_log(args, "size")
Alex5cace3b2021-11-10 16:40:37 -0600340 logger_cli.debug("... default/selected options for fio:")
341 for _k in _opts.keys():
342 # TODO: Update options for single run
343 logger_cli.debug(" {} = {}".format(_k, _opts[_k]))
Alexdcb792f2021-10-04 14:24:21 -0500344
Alex3034ba52021-11-13 17:06:45 -0600345 # init the Bench class
Alexdcb792f2021-10-04 14:24:21 -0500346 ceph_bench = bench.KubeCephBench(config)
Alexb2129542021-11-23 15:49:42 -0600347 ceph_bench.set_ceph_info_class(ceph_info)
Alex90ac1532021-12-09 11:13:14 -0600348 # Preload previous results for this name
349 ceph_bench.preload_results()
Alexdcb792f2021-10-04 14:24:21 -0500350 # Do the testrun
Alex5cace3b2021-11-10 16:40:37 -0600351 ceph_bench.prepare_agents(_opts)
Alexb2129542021-11-23 15:49:42 -0600352 ceph_bench.wait_ceph_cooldown()
353
354 # DEBUG of report in progress
Alex5cace3b2021-11-10 16:40:37 -0600355 if not ceph_bench.run_benchmark(_opts):
Alex2a7657c2021-11-10 20:51:34 -0600356 # No cleaning and/or report if benchmark was not finished
Alexbfa947c2021-11-11 18:14:28 -0600357 logger_cli.info("# Abnormal benchmark run, no cleaning performed")
Alex5cace3b2021-11-10 16:40:37 -0600358 return
Alexb2129542021-11-23 15:49:42 -0600359 # Remove after DEBUG
360 # ceph_bench.collect_results(_opts)
361 # END DEBUG
362
Alex3034ba52021-11-13 17:06:45 -0600363 # Cleaning
Alex2a7657c2021-11-10 20:51:34 -0600364 if not config.no_cleaning_after_benchmark:
365 ceph_bench.cleanup()
Alexbfa947c2021-11-11 18:14:28 -0600366 else:
367 logger_cli.info(
368 "# '--no-cleaning' option set. Cleaning not conducted."
369 )
Alexdcb792f2021-10-04 14:24:21 -0500370
371 # Create report
Alex5cace3b2021-11-10 16:40:37 -0600372 ceph_bench.create_report(_filename)
Alexdcb792f2021-10-04 14:24:21 -0500373
374 return