blob: eee01ce3a008b0a6c9e5cee3288067988fc3ef4d [file] [log] [blame]
Alex5cace3b2021-11-10 16:40:37 -06001from cfg_checker.agent.fio_runner import get_fio_options
Alex90ac1532021-12-09 11:13:14 -06002from cfg_checker.agent.fio_runner import seq_modes, mix_modes
Alexdcb792f2021-10-04 14:24:21 -05003from cfg_checker.common import logger_cli
4from cfg_checker.common.settings import ENV_TYPE_KUBE
5from cfg_checker.helpers import args_utils
6from cfg_checker.modules.ceph import info, bench
7
8command_help = "Ceph Storage information and benchmarks"
9supported_envs = [ENV_TYPE_KUBE]
10
11
12# def _selectClass(_env, strClassHint="checker"):
13# _class = None
14# if _env == ENV_TYPE_SALT:
15# if strClassHint == "info":
16# _class = info.SaltCephInfo
17# elif strClassHint == "bench":
18# _class = bench.SaltCephInfo
19# elif _env == ENV_TYPE_KUBE:
20# if strClassHint == "info":
21# _class = info.KubeCephInfo
22# elif strClassHint == "bench":
23# _class = bench.KubeCephBench
24# if not _class:
25# raise CheckerException(
26# "Unknown hint for selecting Ceph handler Class: '{}'".format(
27# strClassHint
28# )
29# )
30# else:
31# return _class
32
Alex90ac1532021-12-09 11:13:14 -060033def _get_param_and_log(arg, param_str):
34 _value = args_utils.get_arg(arg, param_str)
35 logger_cli.info(" {}={}".format(param_str, _value))
36 return _value
37
Alexdcb792f2021-10-04 14:24:21 -050038
39def init_parser(_parser):
40 # network subparser
41 ceph_subparsers = _parser.add_subparsers(dest='type')
42
43 ceph_info_parser = ceph_subparsers.add_parser(
44 'info',
45 help="Gather Ceph Cluster information"
46 )
47
48 ceph_info_parser.add_argument(
49 '--detailed',
50 action="store_true", default=False,
51 help="Print additional details"
52 )
53
54 ceph_info_parser.add_argument(
55 '--tgz',
56 metavar='ceph_tgz_filename',
57 help="HTML filename to save report"
58 )
59
60 ceph_report_parser = ceph_subparsers.add_parser(
61 'report',
62 help="Generate network check report"
63 )
64
65 ceph_report_parser.add_argument(
66 '--html',
67 metavar='ceph_html_filename',
68 help="HTML filename to save report"
69 )
70
71 ceph_bench_parser = ceph_subparsers.add_parser(
72 'bench',
73 help="Run ceph benchmark"
74 )
75
76 ceph_bench_parser.add_argument(
77 '--task-list',
78 metavar='ceph_tasks_filename',
79 help="List file with data for Ceph bench testrun"
80 )
Alex5cace3b2021-11-10 16:40:37 -060081 ceph_bench_parser.add_argument(
82 '--agents',
83 type=int, metavar='agent_count', default=5,
84 help="List file with data for Ceph bench testrun"
85 )
86 ceph_bench_parser.add_argument(
87 '--html',
88 metavar='ceph_html_filename',
89 help="HTML filename to save report"
90 )
91 ceph_bench_parser.add_argument(
92 '--storage-class',
93 metavar='storage_class',
94 help="Storage class to be used in benchmark"
95 )
96 ceph_bench_parser.add_argument(
97 '--task-file',
Alexb2129542021-11-23 15:49:42 -060098 metavar='task_file',
Alex5cace3b2021-11-10 16:40:37 -060099 help="Task file for benchmark"
100 )
Alex2a7657c2021-11-10 20:51:34 -0600101 ceph_bench_parser.add_argument(
102 '--no-cleanup',
103 action="store_true", default=False,
104 help="Do not cleanup services, agents, pvc, and pv"
105 )
Alexb2129542021-11-23 15:49:42 -0600106 ceph_bench_parser.add_argument(
107 '--cleanup-only',
108 action="store_true", default=False,
109 help="Cleanup resources related to benchmark"
110 )
111 ceph_bench_parser.add_argument(
Alex30380a42021-12-20 16:11:20 -0600112 '--report-only',
113 action="store_true", default=False,
114 help="Just create report using files in folder"
115 )
116 ceph_bench_parser.add_argument(
Alexb2129542021-11-23 15:49:42 -0600117 '--dump-path',
Alex30380a42021-12-20 16:11:20 -0600118 metavar="dump_results",
Alexb2129542021-11-23 15:49:42 -0600119 help="Dump result after each test run to use them later"
120 )
Alex90ac1532021-12-09 11:13:14 -0600121 ceph_bench_parser.add_argument(
122 '--name',
123 metavar="name", default="cephbench",
124 help="Dump result after each test run to use them later"
125 )
126 ceph_bench_parser.add_argument(
127 '--bs',
128 metavar="blocksize", default="16k",
129 help="Block size for single run"
130 )
131 ceph_bench_parser.add_argument(
132 '--iodepth',
133 metavar="iodepth", default="16",
134 help="IO Depth for single run"
135 )
136 ceph_bench_parser.add_argument(
137 '--size',
138 metavar="size", default="10G",
139 help="Persistent volume size (M, G)"
140 )
141 ceph_bench_parser.add_argument(
142 '--readwrite',
143 metavar="readwrite", default="randrw",
144 help="Test mode for single run"
145 )
146 ceph_bench_parser.add_argument(
147 '--rwmixread',
148 metavar="rwmixread", default="50",
149 help="Percent of read in randon mixed mode (randrw)"
150 )
151 ceph_bench_parser.add_argument(
152 '--ramp-time',
153 metavar="ramp_time", default="5s",
154 help="Warmup time before test"
155 )
156 ceph_bench_parser.add_argument(
157 '--runtime',
158 metavar="runtime", default="60s",
159 help="Time based test run longevity"
160 )
161 ceph_bench_parser.add_argument(
162 '--ioengine',
163 metavar="ioengine", default="libaio",
164 help="IO Engine used by fio. See eng-help output in fio for list"
165 )
166 ceph_bench_parser.add_argument(
167 '--offset-increment',
168 metavar="offset_increment", default="500M",
169 help="IO Engine used by fio. See eng-help output in fio for list"
170 )
Alexdcb792f2021-10-04 14:24:21 -0500171
172 return _parser
173
174
175def do_info(args, config):
176 # Ceph info
177 # Gather ceph info and create an archive with data
178 args_utils.check_supported_env(ENV_TYPE_KUBE, args, config)
179 # check tgz
180 _tgzfile = "ceph_info_archive.tgz" if not args.tgz else args.tgz
181
182 # _class = _selectClass(_env)
183 ceph_info = info.KubeCephInfo(config)
184
185 logger_cli.info("# Collecting Ceph cluster information")
Alexdcb792f2021-10-04 14:24:21 -0500186 ceph_info.gather_info()
187
188 # Debug, enable if needed to debug report generation
189 # without actuall data collecting each time
190 # ceph_info.dump_info()
191 # ceph_info.load_info()
192 # end debug
193
Alexdcb792f2021-10-04 14:24:21 -0500194 ceph_info.generate_archive(_tgzfile)
Alexdf9cc3a2021-10-12 14:37:28 -0500195 ceph_info.print_summary()
Alexdcb792f2021-10-04 14:24:21 -0500196
197 return
198
199
200def do_report(args, config):
201 # Ceph Report
202 # Gather ceph info and create HTML report with all of the data
203 args_utils.check_supported_env(ENV_TYPE_KUBE, args, config)
204 _filename = args_utils.get_arg(args, 'html')
205 logger_cli.info("# Ceph cluster Configuration report")
Alexdcb792f2021-10-04 14:24:21 -0500206
207 # _class = _selectClass(_env)
208 ceph_info = info.KubeCephInfo(config)
209 # Debug, enable if needed to debug report generation
210 # without actuall data collecting each time
211 # ceph_info.load_info()
212 # end debug
213 ceph_info.gather_info()
214 ceph_info.get_transposed_latency_table()
215 ceph_info.get_latest_health_readout()
216 ceph_info.create_html_report(_filename)
217
218 return
219
220
221def do_bench(args, config):
222 # Ceph Benchmark using multiple pods
Alexb2129542021-11-23 15:49:42 -0600223 # if only cleanup needed do it and exit
224 _cleanup_only = args_utils.get_arg(args, 'cleanup_only')
Alex30380a42021-12-20 16:11:20 -0600225 _report_only = args_utils.get_arg(args, 'report_only')
Alexb2129542021-11-23 15:49:42 -0600226 config.resource_prefix = "cfgagent"
227 if _cleanup_only:
228 # Do forced resource cleanup and exit
229 config.bench_mode = "cleanup"
230 config.bench_agent_count = -1
231 ceph_bench = bench.KubeCephBench(config)
232 logger_cli.info(
233 "# Discovering benchmark resources using prefix of '{}'".format(
234 config.resource_prefix
235 )
236 )
237 ceph_bench.prepare_cleanup()
238 ceph_bench.cleanup()
239 return
240
Alex30380a42021-12-20 16:11:20 -0600241 # dump results options
242 _dump_path = args_utils.get_arg(args, "dump_path")
243 if _dump_path:
244 logger_cli.info("# Results will be dumped to '{}'".format(_dump_path))
245 config.bench_results_dump_path = _dump_path
246 else:
247 _p = "/tmp"
248 logger_cli.info(
249 "# No result dump path set. Defaulting to {}"
250 "Consider setting it if running long task_file "
251 "based test runs".format(_p)
252 )
253 config.bench_results_dump_path = _p
254
255 # Report filename
256 _filename = args_utils.get_arg(args, 'html')
Alexb2129542021-11-23 15:49:42 -0600257 # gather Ceph info
258 logger_cli.info("# Collecting Ceph cluster information")
259 ceph_info = info.KubeCephInfo(config)
260
Alex30380a42021-12-20 16:11:20 -0600261 # Task files or options
262 _opts = get_fio_options()
263 # Load name and announce it
264 config.bench_name = args_utils.get_arg(args, "name")
265 _opts["name"] = config.bench_name
266 logger_cli.info(
267 "# Using '{}' as ceph bench jobs name".format(_opts["name"])
268 )
269
270 if _report_only:
271 # Do forced report creation and exit
272 config.bench_mode = "report"
273 config.bench_agent_count = -1
274 ceph_bench = bench.KubeCephBench(config)
275 ceph_bench.set_ceph_info_class(ceph_info)
276 logger_cli.info(
277 "# Preparing to generate report '{}'".format(
278 config.resource_prefix
279 )
280 )
281 # Preload previous results for this name
282 ceph_bench.preload_results()
283 # Gather ceph data
284 ceph_bench.wait_ceph_cooldown()
285 # Generate report
286 ceph_bench.create_report(_filename)
287 return
288
Alex5cace3b2021-11-10 16:40:37 -0600289 # Prepare the tasks and do synced testrun or a single one
Alexb2129542021-11-23 15:49:42 -0600290 logger_cli.info("# Initializing ceph benchmark module")
Alexdcb792f2021-10-04 14:24:21 -0500291 args_utils.check_supported_env(ENV_TYPE_KUBE, args, config)
Alex5cace3b2021-11-10 16:40:37 -0600292 # agents count option
Alex2a7657c2021-11-10 20:51:34 -0600293 config.bench_agent_count = args_utils.get_arg(args, "agents")
294 logger_cli.info("-> using {} agents".format(config.bench_agent_count))
Alex90ac1532021-12-09 11:13:14 -0600295 # Cleaning option
Alex2a7657c2021-11-10 20:51:34 -0600296 config.no_cleaning_after_benchmark = args_utils.get_arg(args, "no_cleanup")
Alex5cace3b2021-11-10 16:40:37 -0600297 # storage class
298 _storage_class = args_utils.get_arg(args, "storage_class")
299 logger_cli.info("-> using storage class of '{}'".format(_storage_class))
300 config.bench_storage_class = _storage_class
Alexb2129542021-11-23 15:49:42 -0600301 if _dump_path:
302 logger_cli.info("# Results will be dumped to '{}'".format(_dump_path))
303 config.bench_results_dump_path = _dump_path
304 else:
305 logger_cli.info(
306 "# No result dump path set. "
307 "Consider setting it if running long task_file based test runs"
308 )
309 config.bench_results_dump_path = _dump_path
Alex30380a42021-12-20 16:11:20 -0600310
Alex5cace3b2021-11-10 16:40:37 -0600311 _task_file = args_utils.get_arg(args, "task_file", nofail=True)
312 if not _task_file:
Alex90ac1532021-12-09 11:13:14 -0600313 logger_cli.info("-> Running single benchmark run")
Alex5cace3b2021-11-10 16:40:37 -0600314 config.bench_mode = "single"
Alex90ac1532021-12-09 11:13:14 -0600315 # Updating _opts from arguments
316 _params = [
317 "bs",
318 "iodepth",
319 "size",
320 "readwrite",
321 "ramp_time",
322 "runtime",
323 "ioengine"
324 ]
325 for _p in _params:
326 _opts[_p] = _get_param_and_log(args, _p)
327 if _opts["readwrite"] in seq_modes:
328 _p = "offset_increment"
329 _opts[_p] = _get_param_and_log(args, _p)
330 elif _opts["readwrite"] in mix_modes:
331 _p = "rwmixread"
332 _opts[_p] = _get_param_and_log(args, _p)
Alex5cace3b2021-11-10 16:40:37 -0600333 else:
334 logger_cli.info("-> running with tasks from '{}'".format(_task_file))
335 config.bench_task_file = _task_file
336 config.bench_mode = "tasks"
Alex5cace3b2021-11-10 16:40:37 -0600337 logger_cli.debug("... default/selected options for fio:")
338 for _k in _opts.keys():
339 # TODO: Update options for single run
340 logger_cli.debug(" {} = {}".format(_k, _opts[_k]))
Alexdcb792f2021-10-04 14:24:21 -0500341
Alex3034ba52021-11-13 17:06:45 -0600342 # init the Bench class
Alexdcb792f2021-10-04 14:24:21 -0500343 ceph_bench = bench.KubeCephBench(config)
Alexb2129542021-11-23 15:49:42 -0600344 ceph_bench.set_ceph_info_class(ceph_info)
Alex90ac1532021-12-09 11:13:14 -0600345 # Preload previous results for this name
346 ceph_bench.preload_results()
Alexdcb792f2021-10-04 14:24:21 -0500347 # Do the testrun
Alex5cace3b2021-11-10 16:40:37 -0600348 ceph_bench.prepare_agents(_opts)
Alexb2129542021-11-23 15:49:42 -0600349 ceph_bench.wait_ceph_cooldown()
350
351 # DEBUG of report in progress
Alex5cace3b2021-11-10 16:40:37 -0600352 if not ceph_bench.run_benchmark(_opts):
Alex2a7657c2021-11-10 20:51:34 -0600353 # No cleaning and/or report if benchmark was not finished
Alexbfa947c2021-11-11 18:14:28 -0600354 logger_cli.info("# Abnormal benchmark run, no cleaning performed")
Alex5cace3b2021-11-10 16:40:37 -0600355 return
Alexb2129542021-11-23 15:49:42 -0600356 # Remove after DEBUG
357 # ceph_bench.collect_results(_opts)
358 # END DEBUG
359
Alex3034ba52021-11-13 17:06:45 -0600360 # Cleaning
Alex2a7657c2021-11-10 20:51:34 -0600361 if not config.no_cleaning_after_benchmark:
362 ceph_bench.cleanup()
Alexbfa947c2021-11-11 18:14:28 -0600363 else:
364 logger_cli.info(
365 "# '--no-cleaning' option set. Cleaning not conducted."
366 )
Alexdcb792f2021-10-04 14:24:21 -0500367
368 # Create report
Alex5cace3b2021-11-10 16:40:37 -0600369 ceph_bench.create_report(_filename)
Alexdcb792f2021-10-04 14:24:21 -0500370
371 return