blob: 29b9a2980387f3c19d9f5a7970254d2620ff94a5 [file] [log] [blame]
Alex0989ecf2022-03-29 13:43:21 -05001# Author: Alex Savatieiev (osavatieiev@mirantis.com; a.savex@gmail.com)
2# Copyright 2019-2022 Mirantis, Inc.
Alex5cace3b2021-11-10 16:40:37 -06003from cfg_checker.agent.fio_runner import get_fio_options
Alex90ac1532021-12-09 11:13:14 -06004from cfg_checker.agent.fio_runner import seq_modes, mix_modes
Alexdcb792f2021-10-04 14:24:21 -05005from cfg_checker.common import logger_cli
6from cfg_checker.common.settings import ENV_TYPE_KUBE
7from cfg_checker.helpers import args_utils
8from cfg_checker.modules.ceph import info, bench
9
10command_help = "Ceph Storage information and benchmarks"
11supported_envs = [ENV_TYPE_KUBE]
12
13
14# def _selectClass(_env, strClassHint="checker"):
15# _class = None
16# if _env == ENV_TYPE_SALT:
17# if strClassHint == "info":
18# _class = info.SaltCephInfo
19# elif strClassHint == "bench":
20# _class = bench.SaltCephInfo
21# elif _env == ENV_TYPE_KUBE:
22# if strClassHint == "info":
23# _class = info.KubeCephInfo
24# elif strClassHint == "bench":
25# _class = bench.KubeCephBench
26# if not _class:
27# raise CheckerException(
28# "Unknown hint for selecting Ceph handler Class: '{}'".format(
29# strClassHint
30# )
31# )
32# else:
33# return _class
34
Alex90ac1532021-12-09 11:13:14 -060035def _get_param_and_log(arg, param_str):
36 _value = args_utils.get_arg(arg, param_str)
37 logger_cli.info(" {}={}".format(param_str, _value))
38 return _value
39
Alexdcb792f2021-10-04 14:24:21 -050040
41def init_parser(_parser):
42 # network subparser
43 ceph_subparsers = _parser.add_subparsers(dest='type')
44
45 ceph_info_parser = ceph_subparsers.add_parser(
46 'info',
47 help="Gather Ceph Cluster information"
48 )
49
50 ceph_info_parser.add_argument(
51 '--detailed',
52 action="store_true", default=False,
Alex30a00642021-12-30 14:20:48 -060053 help="Print additional details. (Not implemented yet)"
Alexdcb792f2021-10-04 14:24:21 -050054 )
55
56 ceph_info_parser.add_argument(
57 '--tgz',
58 metavar='ceph_tgz_filename',
Alex30a00642021-12-30 14:20:48 -060059 help="TGZ archive filename to save gathered data"
Alexdcb792f2021-10-04 14:24:21 -050060 )
61
62 ceph_report_parser = ceph_subparsers.add_parser(
63 'report',
Alex30a00642021-12-30 14:20:48 -060064 help="Generate Ceph Info report"
Alexdcb792f2021-10-04 14:24:21 -050065 )
66
67 ceph_report_parser.add_argument(
68 '--html',
69 metavar='ceph_html_filename',
70 help="HTML filename to save report"
71 )
72
73 ceph_bench_parser = ceph_subparsers.add_parser(
74 'bench',
75 help="Run ceph benchmark"
76 )
77
78 ceph_bench_parser.add_argument(
Alex5cace3b2021-11-10 16:40:37 -060079 '--agents',
80 type=int, metavar='agent_count', default=5,
Alex30a00642021-12-30 14:20:48 -060081 help="Number of agents to use in all test runs. Default: 5"
Alex5cace3b2021-11-10 16:40:37 -060082 )
83 ceph_bench_parser.add_argument(
84 '--html',
85 metavar='ceph_html_filename',
86 help="HTML filename to save report"
87 )
88 ceph_bench_parser.add_argument(
89 '--storage-class',
90 metavar='storage_class',
91 help="Storage class to be used in benchmark"
92 )
93 ceph_bench_parser.add_argument(
94 '--task-file',
Alexb2129542021-11-23 15:49:42 -060095 metavar='task_file',
Alex30a00642021-12-30 14:20:48 -060096 help="Task file for benchmark with parameters to use"
Alex5cace3b2021-11-10 16:40:37 -060097 )
Alex2a7657c2021-11-10 20:51:34 -060098 ceph_bench_parser.add_argument(
99 '--no-cleanup',
100 action="store_true", default=False,
101 help="Do not cleanup services, agents, pvc, and pv"
102 )
Alexb2129542021-11-23 15:49:42 -0600103 ceph_bench_parser.add_argument(
104 '--cleanup-only',
105 action="store_true", default=False,
106 help="Cleanup resources related to benchmark"
107 )
108 ceph_bench_parser.add_argument(
Alex30380a42021-12-20 16:11:20 -0600109 '--report-only',
110 action="store_true", default=False,
Alex30a00642021-12-30 14:20:48 -0600111 help="Just create report using files in '--dump-path' folder"
Alex30380a42021-12-20 16:11:20 -0600112 )
113 ceph_bench_parser.add_argument(
Alexb2129542021-11-23 15:49:42 -0600114 '--dump-path',
Alex30380a42021-12-20 16:11:20 -0600115 metavar="dump_results",
Alex30a00642021-12-30 14:20:48 -0600116 help="Dump result after each test run to use them later. "
117 "Default: '/tmp'"
Alexb2129542021-11-23 15:49:42 -0600118 )
Alex90ac1532021-12-09 11:13:14 -0600119 ceph_bench_parser.add_argument(
120 '--name',
121 metavar="name", default="cephbench",
Alex30a00642021-12-30 14:20:48 -0600122 help="Job name to use for running fio. "
123 "Can be used to grep results. Default: 'cephbench'"
Alex90ac1532021-12-09 11:13:14 -0600124 )
125 ceph_bench_parser.add_argument(
126 '--bs',
127 metavar="blocksize", default="16k",
Alex30a00642021-12-30 14:20:48 -0600128 help="Block size for single run. Default: '16k'"
Alex90ac1532021-12-09 11:13:14 -0600129 )
130 ceph_bench_parser.add_argument(
131 '--iodepth',
132 metavar="iodepth", default="16",
Alex30a00642021-12-30 14:20:48 -0600133 help="IO Depth for single run. Default: '16'"
Alex90ac1532021-12-09 11:13:14 -0600134 )
135 ceph_bench_parser.add_argument(
136 '--size',
137 metavar="size", default="10G",
Alex30a00642021-12-30 14:20:48 -0600138 help="Persistent volume size (M, G). Default: '10G'"
Alex90ac1532021-12-09 11:13:14 -0600139 )
140 ceph_bench_parser.add_argument(
141 '--readwrite',
142 metavar="readwrite", default="randrw",
Alex30a00642021-12-30 14:20:48 -0600143 help="Test mode for single run (read, write, randrw, "
144 "randread, randwrite). Default: 'randrw'"
Alex90ac1532021-12-09 11:13:14 -0600145 )
146 ceph_bench_parser.add_argument(
147 '--rwmixread',
148 metavar="rwmixread", default="50",
Alex30a00642021-12-30 14:20:48 -0600149 help="Percent of read in random mixed mode (randrw). Default: '50'"
Alex90ac1532021-12-09 11:13:14 -0600150 )
151 ceph_bench_parser.add_argument(
152 '--ramp-time',
153 metavar="ramp_time", default="5s",
Alex30a00642021-12-30 14:20:48 -0600154 help="Warmup time before test. Default: '5s'"
Alex90ac1532021-12-09 11:13:14 -0600155 )
156 ceph_bench_parser.add_argument(
157 '--runtime',
158 metavar="runtime", default="60s",
Alex30a00642021-12-30 14:20:48 -0600159 help="How long to run test. Default: '60s'"
Alex90ac1532021-12-09 11:13:14 -0600160 )
161 ceph_bench_parser.add_argument(
162 '--ioengine',
163 metavar="ioengine", default="libaio",
Alex30a00642021-12-30 14:20:48 -0600164 help="IO Engine used by fio. See 'fio eng-help' output for list. "
165 "Default: 'libaio'"
Alex90ac1532021-12-09 11:13:14 -0600166 )
167 ceph_bench_parser.add_argument(
168 '--offset-increment',
169 metavar="offset_increment", default="500M",
Alex41dd0cc2022-02-09 17:33:23 -0600170 help="Offset to be used in 'read' and 'write' modes if multiple jobs "
171 "used"
Alex30a00642021-12-30 14:20:48 -0600172 "Default: '500M'"
Alex90ac1532021-12-09 11:13:14 -0600173 )
Alexdcb792f2021-10-04 14:24:21 -0500174
175 return _parser
176
177
178def do_info(args, config):
179 # Ceph info
180 # Gather ceph info and create an archive with data
181 args_utils.check_supported_env(ENV_TYPE_KUBE, args, config)
182 # check tgz
183 _tgzfile = "ceph_info_archive.tgz" if not args.tgz else args.tgz
184
185 # _class = _selectClass(_env)
186 ceph_info = info.KubeCephInfo(config)
187
188 logger_cli.info("# Collecting Ceph cluster information")
Alexdcb792f2021-10-04 14:24:21 -0500189 ceph_info.gather_info()
Alex41dd0cc2022-02-09 17:33:23 -0600190 ceph_info.gather_osd_configs()
Alexdcb792f2021-10-04 14:24:21 -0500191
192 # Debug, enable if needed to debug report generation
193 # without actuall data collecting each time
194 # ceph_info.dump_info()
195 # ceph_info.load_info()
196 # end debug
197
Alexdcb792f2021-10-04 14:24:21 -0500198 ceph_info.generate_archive(_tgzfile)
Alexdf9cc3a2021-10-12 14:37:28 -0500199 ceph_info.print_summary()
Alexdcb792f2021-10-04 14:24:21 -0500200
201 return
202
203
204def do_report(args, config):
205 # Ceph Report
206 # Gather ceph info and create HTML report with all of the data
207 args_utils.check_supported_env(ENV_TYPE_KUBE, args, config)
208 _filename = args_utils.get_arg(args, 'html')
209 logger_cli.info("# Ceph cluster Configuration report")
Alexdcb792f2021-10-04 14:24:21 -0500210
211 # _class = _selectClass(_env)
212 ceph_info = info.KubeCephInfo(config)
213 # Debug, enable if needed to debug report generation
214 # without actuall data collecting each time
215 # ceph_info.load_info()
216 # end debug
217 ceph_info.gather_info()
Alex41dd0cc2022-02-09 17:33:23 -0600218 ceph_info.gather_osd_configs()
Alexdcb792f2021-10-04 14:24:21 -0500219 ceph_info.get_transposed_latency_table()
220 ceph_info.get_latest_health_readout()
221 ceph_info.create_html_report(_filename)
222
223 return
224
225
226def do_bench(args, config):
227 # Ceph Benchmark using multiple pods
Alexb2129542021-11-23 15:49:42 -0600228 # if only cleanup needed do it and exit
229 _cleanup_only = args_utils.get_arg(args, 'cleanup_only')
Alex30380a42021-12-20 16:11:20 -0600230 _report_only = args_utils.get_arg(args, 'report_only')
Alexb2129542021-11-23 15:49:42 -0600231 config.resource_prefix = "cfgagent"
232 if _cleanup_only:
233 # Do forced resource cleanup and exit
234 config.bench_mode = "cleanup"
235 config.bench_agent_count = -1
236 ceph_bench = bench.KubeCephBench(config)
237 logger_cli.info(
238 "# Discovering benchmark resources using prefix of '{}'".format(
239 config.resource_prefix
240 )
241 )
242 ceph_bench.prepare_cleanup()
243 ceph_bench.cleanup()
244 return
245
Alex30380a42021-12-20 16:11:20 -0600246 # dump results options
247 _dump_path = args_utils.get_arg(args, "dump_path")
248 if _dump_path:
249 logger_cli.info("# Results will be dumped to '{}'".format(_dump_path))
250 config.bench_results_dump_path = _dump_path
251 else:
252 _p = "/tmp"
253 logger_cli.info(
254 "# No result dump path set. Defaulting to {}"
255 "Consider setting it if running long task_file "
256 "based test runs".format(_p)
257 )
258 config.bench_results_dump_path = _p
259
260 # Report filename
261 _filename = args_utils.get_arg(args, 'html')
Alexb2129542021-11-23 15:49:42 -0600262 # gather Ceph info
263 logger_cli.info("# Collecting Ceph cluster information")
264 ceph_info = info.KubeCephInfo(config)
265
Alex30380a42021-12-20 16:11:20 -0600266 # Task files or options
267 _opts = get_fio_options()
268 # Load name and announce it
269 config.bench_name = args_utils.get_arg(args, "name")
270 _opts["name"] = config.bench_name
271 logger_cli.info(
272 "# Using '{}' as ceph bench jobs name".format(_opts["name"])
273 )
274
275 if _report_only:
276 # Do forced report creation and exit
277 config.bench_mode = "report"
278 config.bench_agent_count = -1
279 ceph_bench = bench.KubeCephBench(config)
280 ceph_bench.set_ceph_info_class(ceph_info)
281 logger_cli.info(
282 "# Preparing to generate report '{}'".format(
283 config.resource_prefix
284 )
285 )
286 # Preload previous results for this name
287 ceph_bench.preload_results()
288 # Gather ceph data
289 ceph_bench.wait_ceph_cooldown()
290 # Generate report
291 ceph_bench.create_report(_filename)
292 return
293
Alex5cace3b2021-11-10 16:40:37 -0600294 # Prepare the tasks and do synced testrun or a single one
Alexb2129542021-11-23 15:49:42 -0600295 logger_cli.info("# Initializing ceph benchmark module")
Alexdcb792f2021-10-04 14:24:21 -0500296 args_utils.check_supported_env(ENV_TYPE_KUBE, args, config)
Alex5cace3b2021-11-10 16:40:37 -0600297 # agents count option
Alex2a7657c2021-11-10 20:51:34 -0600298 config.bench_agent_count = args_utils.get_arg(args, "agents")
299 logger_cli.info("-> using {} agents".format(config.bench_agent_count))
Alex90ac1532021-12-09 11:13:14 -0600300 # Cleaning option
Alex2a7657c2021-11-10 20:51:34 -0600301 config.no_cleaning_after_benchmark = args_utils.get_arg(args, "no_cleanup")
Alex5cace3b2021-11-10 16:40:37 -0600302 # storage class
303 _storage_class = args_utils.get_arg(args, "storage_class")
304 logger_cli.info("-> using storage class of '{}'".format(_storage_class))
305 config.bench_storage_class = _storage_class
Alexb2129542021-11-23 15:49:42 -0600306 if _dump_path:
307 logger_cli.info("# Results will be dumped to '{}'".format(_dump_path))
308 config.bench_results_dump_path = _dump_path
309 else:
310 logger_cli.info(
311 "# No result dump path set. "
312 "Consider setting it if running long task_file based test runs"
313 )
314 config.bench_results_dump_path = _dump_path
Alex30380a42021-12-20 16:11:20 -0600315
Alex5cace3b2021-11-10 16:40:37 -0600316 _task_file = args_utils.get_arg(args, "task_file", nofail=True)
317 if not _task_file:
Alex90ac1532021-12-09 11:13:14 -0600318 logger_cli.info("-> Running single benchmark run")
Alex5cace3b2021-11-10 16:40:37 -0600319 config.bench_mode = "single"
Alex90ac1532021-12-09 11:13:14 -0600320 # Updating _opts from arguments
321 _params = [
322 "bs",
323 "iodepth",
324 "size",
325 "readwrite",
326 "ramp_time",
327 "runtime",
328 "ioengine"
329 ]
330 for _p in _params:
331 _opts[_p] = _get_param_and_log(args, _p)
332 if _opts["readwrite"] in seq_modes:
333 _p = "offset_increment"
334 _opts[_p] = _get_param_and_log(args, _p)
335 elif _opts["readwrite"] in mix_modes:
336 _p = "rwmixread"
337 _opts[_p] = _get_param_and_log(args, _p)
Alex5cace3b2021-11-10 16:40:37 -0600338 else:
339 logger_cli.info("-> running with tasks from '{}'".format(_task_file))
340 config.bench_task_file = _task_file
341 config.bench_mode = "tasks"
Alex5cace3b2021-11-10 16:40:37 -0600342 logger_cli.debug("... default/selected options for fio:")
343 for _k in _opts.keys():
344 # TODO: Update options for single run
345 logger_cli.debug(" {} = {}".format(_k, _opts[_k]))
Alexdcb792f2021-10-04 14:24:21 -0500346
Alex3034ba52021-11-13 17:06:45 -0600347 # init the Bench class
Alexdcb792f2021-10-04 14:24:21 -0500348 ceph_bench = bench.KubeCephBench(config)
Alexb2129542021-11-23 15:49:42 -0600349 ceph_bench.set_ceph_info_class(ceph_info)
Alex90ac1532021-12-09 11:13:14 -0600350 # Preload previous results for this name
351 ceph_bench.preload_results()
Alexdcb792f2021-10-04 14:24:21 -0500352 # Do the testrun
Alex5cace3b2021-11-10 16:40:37 -0600353 ceph_bench.prepare_agents(_opts)
Alexb2129542021-11-23 15:49:42 -0600354 ceph_bench.wait_ceph_cooldown()
355
356 # DEBUG of report in progress
Alex5cace3b2021-11-10 16:40:37 -0600357 if not ceph_bench.run_benchmark(_opts):
Alex2a7657c2021-11-10 20:51:34 -0600358 # No cleaning and/or report if benchmark was not finished
Alexbfa947c2021-11-11 18:14:28 -0600359 logger_cli.info("# Abnormal benchmark run, no cleaning performed")
Alex5cace3b2021-11-10 16:40:37 -0600360 return
Alexb2129542021-11-23 15:49:42 -0600361 # Remove after DEBUG
362 # ceph_bench.collect_results(_opts)
363 # END DEBUG
364
Alex3034ba52021-11-13 17:06:45 -0600365 # Cleaning
Alex2a7657c2021-11-10 20:51:34 -0600366 if not config.no_cleaning_after_benchmark:
367 ceph_bench.cleanup()
Alexbfa947c2021-11-11 18:14:28 -0600368 else:
369 logger_cli.info(
370 "# '--no-cleaning' option set. Cleaning not conducted."
371 )
Alexdcb792f2021-10-04 14:24:21 -0500372
373 # Create report
Alex5cace3b2021-11-10 16:40:37 -0600374 ceph_bench.create_report(_filename)
Alexdcb792f2021-10-04 14:24:21 -0500375
376 return