blob: 3f726f8484585855856fe7258472ee7a897ec436 [file] [log] [blame]
Alex5cace3b2021-11-10 16:40:37 -06001from cfg_checker.agent.fio_runner import get_fio_options
Alex90ac1532021-12-09 11:13:14 -06002from cfg_checker.agent.fio_runner import seq_modes, mix_modes
Alexdcb792f2021-10-04 14:24:21 -05003from cfg_checker.common import logger_cli
4from cfg_checker.common.settings import ENV_TYPE_KUBE
5from cfg_checker.helpers import args_utils
6from cfg_checker.modules.ceph import info, bench
7
8command_help = "Ceph Storage information and benchmarks"
9supported_envs = [ENV_TYPE_KUBE]
10
11
12# def _selectClass(_env, strClassHint="checker"):
13# _class = None
14# if _env == ENV_TYPE_SALT:
15# if strClassHint == "info":
16# _class = info.SaltCephInfo
17# elif strClassHint == "bench":
18# _class = bench.SaltCephInfo
19# elif _env == ENV_TYPE_KUBE:
20# if strClassHint == "info":
21# _class = info.KubeCephInfo
22# elif strClassHint == "bench":
23# _class = bench.KubeCephBench
24# if not _class:
25# raise CheckerException(
26# "Unknown hint for selecting Ceph handler Class: '{}'".format(
27# strClassHint
28# )
29# )
30# else:
31# return _class
32
Alex90ac1532021-12-09 11:13:14 -060033def _get_param_and_log(arg, param_str):
34 _value = args_utils.get_arg(arg, param_str)
35 logger_cli.info(" {}={}".format(param_str, _value))
36 return _value
37
Alexdcb792f2021-10-04 14:24:21 -050038
39def init_parser(_parser):
40 # network subparser
41 ceph_subparsers = _parser.add_subparsers(dest='type')
42
43 ceph_info_parser = ceph_subparsers.add_parser(
44 'info',
45 help="Gather Ceph Cluster information"
46 )
47
48 ceph_info_parser.add_argument(
49 '--detailed',
50 action="store_true", default=False,
Alex30a00642021-12-30 14:20:48 -060051 help="Print additional details. (Not implemented yet)"
Alexdcb792f2021-10-04 14:24:21 -050052 )
53
54 ceph_info_parser.add_argument(
55 '--tgz',
56 metavar='ceph_tgz_filename',
Alex30a00642021-12-30 14:20:48 -060057 help="TGZ archive filename to save gathered data"
Alexdcb792f2021-10-04 14:24:21 -050058 )
59
60 ceph_report_parser = ceph_subparsers.add_parser(
61 'report',
Alex30a00642021-12-30 14:20:48 -060062 help="Generate Ceph Info report"
Alexdcb792f2021-10-04 14:24:21 -050063 )
64
65 ceph_report_parser.add_argument(
66 '--html',
67 metavar='ceph_html_filename',
68 help="HTML filename to save report"
69 )
70
71 ceph_bench_parser = ceph_subparsers.add_parser(
72 'bench',
73 help="Run ceph benchmark"
74 )
75
76 ceph_bench_parser.add_argument(
Alex5cace3b2021-11-10 16:40:37 -060077 '--agents',
78 type=int, metavar='agent_count', default=5,
Alex30a00642021-12-30 14:20:48 -060079 help="Number of agents to use in all test runs. Default: 5"
Alex5cace3b2021-11-10 16:40:37 -060080 )
81 ceph_bench_parser.add_argument(
82 '--html',
83 metavar='ceph_html_filename',
84 help="HTML filename to save report"
85 )
86 ceph_bench_parser.add_argument(
87 '--storage-class',
88 metavar='storage_class',
89 help="Storage class to be used in benchmark"
90 )
91 ceph_bench_parser.add_argument(
92 '--task-file',
Alexb2129542021-11-23 15:49:42 -060093 metavar='task_file',
Alex30a00642021-12-30 14:20:48 -060094 help="Task file for benchmark with parameters to use"
Alex5cace3b2021-11-10 16:40:37 -060095 )
Alex2a7657c2021-11-10 20:51:34 -060096 ceph_bench_parser.add_argument(
97 '--no-cleanup',
98 action="store_true", default=False,
99 help="Do not cleanup services, agents, pvc, and pv"
100 )
Alexb2129542021-11-23 15:49:42 -0600101 ceph_bench_parser.add_argument(
102 '--cleanup-only',
103 action="store_true", default=False,
104 help="Cleanup resources related to benchmark"
105 )
106 ceph_bench_parser.add_argument(
Alex30380a42021-12-20 16:11:20 -0600107 '--report-only',
108 action="store_true", default=False,
Alex30a00642021-12-30 14:20:48 -0600109 help="Just create report using files in '--dump-path' folder"
Alex30380a42021-12-20 16:11:20 -0600110 )
111 ceph_bench_parser.add_argument(
Alexb2129542021-11-23 15:49:42 -0600112 '--dump-path',
Alex30380a42021-12-20 16:11:20 -0600113 metavar="dump_results",
Alex30a00642021-12-30 14:20:48 -0600114 help="Dump result after each test run to use them later. "
115 "Default: '/tmp'"
Alexb2129542021-11-23 15:49:42 -0600116 )
Alex90ac1532021-12-09 11:13:14 -0600117 ceph_bench_parser.add_argument(
118 '--name',
119 metavar="name", default="cephbench",
Alex30a00642021-12-30 14:20:48 -0600120 help="Job name to use for running fio. "
121 "Can be used to grep results. Default: 'cephbench'"
Alex90ac1532021-12-09 11:13:14 -0600122 )
123 ceph_bench_parser.add_argument(
124 '--bs',
125 metavar="blocksize", default="16k",
Alex30a00642021-12-30 14:20:48 -0600126 help="Block size for single run. Default: '16k'"
Alex90ac1532021-12-09 11:13:14 -0600127 )
128 ceph_bench_parser.add_argument(
129 '--iodepth',
130 metavar="iodepth", default="16",
Alex30a00642021-12-30 14:20:48 -0600131 help="IO Depth for single run. Default: '16'"
Alex90ac1532021-12-09 11:13:14 -0600132 )
133 ceph_bench_parser.add_argument(
134 '--size',
135 metavar="size", default="10G",
Alex30a00642021-12-30 14:20:48 -0600136 help="Persistent volume size (M, G). Default: '10G'"
Alex90ac1532021-12-09 11:13:14 -0600137 )
138 ceph_bench_parser.add_argument(
139 '--readwrite',
140 metavar="readwrite", default="randrw",
Alex30a00642021-12-30 14:20:48 -0600141 help="Test mode for single run (read, write, randrw, "
142 "randread, randwrite). Default: 'randrw'"
Alex90ac1532021-12-09 11:13:14 -0600143 )
144 ceph_bench_parser.add_argument(
145 '--rwmixread',
146 metavar="rwmixread", default="50",
Alex30a00642021-12-30 14:20:48 -0600147 help="Percent of read in random mixed mode (randrw). Default: '50'"
Alex90ac1532021-12-09 11:13:14 -0600148 )
149 ceph_bench_parser.add_argument(
150 '--ramp-time',
151 metavar="ramp_time", default="5s",
Alex30a00642021-12-30 14:20:48 -0600152 help="Warmup time before test. Default: '5s'"
Alex90ac1532021-12-09 11:13:14 -0600153 )
154 ceph_bench_parser.add_argument(
155 '--runtime',
156 metavar="runtime", default="60s",
Alex30a00642021-12-30 14:20:48 -0600157 help="How long to run test. Default: '60s'"
Alex90ac1532021-12-09 11:13:14 -0600158 )
159 ceph_bench_parser.add_argument(
160 '--ioengine',
161 metavar="ioengine", default="libaio",
Alex30a00642021-12-30 14:20:48 -0600162 help="IO Engine used by fio. See 'fio eng-help' output for list. "
163 "Default: 'libaio'"
Alex90ac1532021-12-09 11:13:14 -0600164 )
165 ceph_bench_parser.add_argument(
166 '--offset-increment',
167 metavar="offset_increment", default="500M",
Alex41dd0cc2022-02-09 17:33:23 -0600168 help="Offset to be used in 'read' and 'write' modes if multiple jobs "
169 "used"
Alex30a00642021-12-30 14:20:48 -0600170 "Default: '500M'"
Alex90ac1532021-12-09 11:13:14 -0600171 )
Alexdcb792f2021-10-04 14:24:21 -0500172
173 return _parser
174
175
176def do_info(args, config):
177 # Ceph info
178 # Gather ceph info and create an archive with data
179 args_utils.check_supported_env(ENV_TYPE_KUBE, args, config)
180 # check tgz
181 _tgzfile = "ceph_info_archive.tgz" if not args.tgz else args.tgz
182
183 # _class = _selectClass(_env)
184 ceph_info = info.KubeCephInfo(config)
185
186 logger_cli.info("# Collecting Ceph cluster information")
Alexdcb792f2021-10-04 14:24:21 -0500187 ceph_info.gather_info()
Alex41dd0cc2022-02-09 17:33:23 -0600188 ceph_info.gather_osd_configs()
Alexdcb792f2021-10-04 14:24:21 -0500189
190 # Debug, enable if needed to debug report generation
191 # without actuall data collecting each time
192 # ceph_info.dump_info()
193 # ceph_info.load_info()
194 # end debug
195
Alexdcb792f2021-10-04 14:24:21 -0500196 ceph_info.generate_archive(_tgzfile)
Alexdf9cc3a2021-10-12 14:37:28 -0500197 ceph_info.print_summary()
Alexdcb792f2021-10-04 14:24:21 -0500198
199 return
200
201
202def do_report(args, config):
203 # Ceph Report
204 # Gather ceph info and create HTML report with all of the data
205 args_utils.check_supported_env(ENV_TYPE_KUBE, args, config)
206 _filename = args_utils.get_arg(args, 'html')
207 logger_cli.info("# Ceph cluster Configuration report")
Alexdcb792f2021-10-04 14:24:21 -0500208
209 # _class = _selectClass(_env)
210 ceph_info = info.KubeCephInfo(config)
211 # Debug, enable if needed to debug report generation
212 # without actuall data collecting each time
213 # ceph_info.load_info()
214 # end debug
215 ceph_info.gather_info()
Alex41dd0cc2022-02-09 17:33:23 -0600216 ceph_info.gather_osd_configs()
Alexdcb792f2021-10-04 14:24:21 -0500217 ceph_info.get_transposed_latency_table()
218 ceph_info.get_latest_health_readout()
219 ceph_info.create_html_report(_filename)
220
221 return
222
223
224def do_bench(args, config):
225 # Ceph Benchmark using multiple pods
Alexb2129542021-11-23 15:49:42 -0600226 # if only cleanup needed do it and exit
227 _cleanup_only = args_utils.get_arg(args, 'cleanup_only')
Alex30380a42021-12-20 16:11:20 -0600228 _report_only = args_utils.get_arg(args, 'report_only')
Alexb2129542021-11-23 15:49:42 -0600229 config.resource_prefix = "cfgagent"
230 if _cleanup_only:
231 # Do forced resource cleanup and exit
232 config.bench_mode = "cleanup"
233 config.bench_agent_count = -1
234 ceph_bench = bench.KubeCephBench(config)
235 logger_cli.info(
236 "# Discovering benchmark resources using prefix of '{}'".format(
237 config.resource_prefix
238 )
239 )
240 ceph_bench.prepare_cleanup()
241 ceph_bench.cleanup()
242 return
243
Alex30380a42021-12-20 16:11:20 -0600244 # dump results options
245 _dump_path = args_utils.get_arg(args, "dump_path")
246 if _dump_path:
247 logger_cli.info("# Results will be dumped to '{}'".format(_dump_path))
248 config.bench_results_dump_path = _dump_path
249 else:
250 _p = "/tmp"
251 logger_cli.info(
252 "# No result dump path set. Defaulting to {}"
253 "Consider setting it if running long task_file "
254 "based test runs".format(_p)
255 )
256 config.bench_results_dump_path = _p
257
258 # Report filename
259 _filename = args_utils.get_arg(args, 'html')
Alexb2129542021-11-23 15:49:42 -0600260 # gather Ceph info
261 logger_cli.info("# Collecting Ceph cluster information")
262 ceph_info = info.KubeCephInfo(config)
263
Alex30380a42021-12-20 16:11:20 -0600264 # Task files or options
265 _opts = get_fio_options()
266 # Load name and announce it
267 config.bench_name = args_utils.get_arg(args, "name")
268 _opts["name"] = config.bench_name
269 logger_cli.info(
270 "# Using '{}' as ceph bench jobs name".format(_opts["name"])
271 )
272
273 if _report_only:
274 # Do forced report creation and exit
275 config.bench_mode = "report"
276 config.bench_agent_count = -1
277 ceph_bench = bench.KubeCephBench(config)
278 ceph_bench.set_ceph_info_class(ceph_info)
279 logger_cli.info(
280 "# Preparing to generate report '{}'".format(
281 config.resource_prefix
282 )
283 )
284 # Preload previous results for this name
285 ceph_bench.preload_results()
286 # Gather ceph data
287 ceph_bench.wait_ceph_cooldown()
288 # Generate report
289 ceph_bench.create_report(_filename)
290 return
291
Alex5cace3b2021-11-10 16:40:37 -0600292 # Prepare the tasks and do synced testrun or a single one
Alexb2129542021-11-23 15:49:42 -0600293 logger_cli.info("# Initializing ceph benchmark module")
Alexdcb792f2021-10-04 14:24:21 -0500294 args_utils.check_supported_env(ENV_TYPE_KUBE, args, config)
Alex5cace3b2021-11-10 16:40:37 -0600295 # agents count option
Alex2a7657c2021-11-10 20:51:34 -0600296 config.bench_agent_count = args_utils.get_arg(args, "agents")
297 logger_cli.info("-> using {} agents".format(config.bench_agent_count))
Alex90ac1532021-12-09 11:13:14 -0600298 # Cleaning option
Alex2a7657c2021-11-10 20:51:34 -0600299 config.no_cleaning_after_benchmark = args_utils.get_arg(args, "no_cleanup")
Alex5cace3b2021-11-10 16:40:37 -0600300 # storage class
301 _storage_class = args_utils.get_arg(args, "storage_class")
302 logger_cli.info("-> using storage class of '{}'".format(_storage_class))
303 config.bench_storage_class = _storage_class
Alexb2129542021-11-23 15:49:42 -0600304 if _dump_path:
305 logger_cli.info("# Results will be dumped to '{}'".format(_dump_path))
306 config.bench_results_dump_path = _dump_path
307 else:
308 logger_cli.info(
309 "# No result dump path set. "
310 "Consider setting it if running long task_file based test runs"
311 )
312 config.bench_results_dump_path = _dump_path
Alex30380a42021-12-20 16:11:20 -0600313
Alex5cace3b2021-11-10 16:40:37 -0600314 _task_file = args_utils.get_arg(args, "task_file", nofail=True)
315 if not _task_file:
Alex90ac1532021-12-09 11:13:14 -0600316 logger_cli.info("-> Running single benchmark run")
Alex5cace3b2021-11-10 16:40:37 -0600317 config.bench_mode = "single"
Alex90ac1532021-12-09 11:13:14 -0600318 # Updating _opts from arguments
319 _params = [
320 "bs",
321 "iodepth",
322 "size",
323 "readwrite",
324 "ramp_time",
325 "runtime",
326 "ioengine"
327 ]
328 for _p in _params:
329 _opts[_p] = _get_param_and_log(args, _p)
330 if _opts["readwrite"] in seq_modes:
331 _p = "offset_increment"
332 _opts[_p] = _get_param_and_log(args, _p)
333 elif _opts["readwrite"] in mix_modes:
334 _p = "rwmixread"
335 _opts[_p] = _get_param_and_log(args, _p)
Alex5cace3b2021-11-10 16:40:37 -0600336 else:
337 logger_cli.info("-> running with tasks from '{}'".format(_task_file))
338 config.bench_task_file = _task_file
339 config.bench_mode = "tasks"
Alex5cace3b2021-11-10 16:40:37 -0600340 logger_cli.debug("... default/selected options for fio:")
341 for _k in _opts.keys():
342 # TODO: Update options for single run
343 logger_cli.debug(" {} = {}".format(_k, _opts[_k]))
Alexdcb792f2021-10-04 14:24:21 -0500344
Alex3034ba52021-11-13 17:06:45 -0600345 # init the Bench class
Alexdcb792f2021-10-04 14:24:21 -0500346 ceph_bench = bench.KubeCephBench(config)
Alexb2129542021-11-23 15:49:42 -0600347 ceph_bench.set_ceph_info_class(ceph_info)
Alex90ac1532021-12-09 11:13:14 -0600348 # Preload previous results for this name
349 ceph_bench.preload_results()
Alexdcb792f2021-10-04 14:24:21 -0500350 # Do the testrun
Alex5cace3b2021-11-10 16:40:37 -0600351 ceph_bench.prepare_agents(_opts)
Alexb2129542021-11-23 15:49:42 -0600352 ceph_bench.wait_ceph_cooldown()
353
354 # DEBUG of report in progress
Alex5cace3b2021-11-10 16:40:37 -0600355 if not ceph_bench.run_benchmark(_opts):
Alex2a7657c2021-11-10 20:51:34 -0600356 # No cleaning and/or report if benchmark was not finished
Alexbfa947c2021-11-11 18:14:28 -0600357 logger_cli.info("# Abnormal benchmark run, no cleaning performed")
Alex5cace3b2021-11-10 16:40:37 -0600358 return
Alexb2129542021-11-23 15:49:42 -0600359 # Remove after DEBUG
360 # ceph_bench.collect_results(_opts)
361 # END DEBUG
362
Alex3034ba52021-11-13 17:06:45 -0600363 # Cleaning
Alex2a7657c2021-11-10 20:51:34 -0600364 if not config.no_cleaning_after_benchmark:
365 ceph_bench.cleanup()
Alexbfa947c2021-11-11 18:14:28 -0600366 else:
367 logger_cli.info(
368 "# '--no-cleaning' option set. Cleaning not conducted."
369 )
Alexdcb792f2021-10-04 14:24:21 -0500370
371 # Create report
Alex5cace3b2021-11-10 16:40:37 -0600372 ceph_bench.create_report(_filename)
Alexdcb792f2021-10-04 14:24:21 -0500373
374 return