Blame - cfg_checker/modules/ceph/__init__.py - mcp/cfg-checker

blob: f9bf3ca943239c0670b6ef645ab6b23429ff2705 [file] [log] [blame]

Alex	5cace3b	2021-11-10 16:40:37 -0600	[diff] [blame]	1	from cfg_checker.agent.fio_runner import get_fio_options
Alex	dcb792f	2021-10-04 14:24:21 -0500	[diff] [blame]	2	from cfg_checker.common import logger_cli
				3	from cfg_checker.common.settings import ENV_TYPE_KUBE
				4	from cfg_checker.helpers import args_utils
				5	from cfg_checker.modules.ceph import info, bench
				6
				7	command_help = "Ceph Storage information and benchmarks"
				8	supported_envs = [ENV_TYPE_KUBE]
				9
				10
				11	# def _selectClass(_env, strClassHint="checker"):
				12	# _class = None
				13	# if _env == ENV_TYPE_SALT:
				14	# if strClassHint == "info":
				15	# _class = info.SaltCephInfo
				16	# elif strClassHint == "bench":
				17	# _class = bench.SaltCephInfo
				18	# elif _env == ENV_TYPE_KUBE:
				19	# if strClassHint == "info":
				20	# _class = info.KubeCephInfo
				21	# elif strClassHint == "bench":
				22	# _class = bench.KubeCephBench
				23	# if not _class:
				24	# raise CheckerException(
				25	# "Unknown hint for selecting Ceph handler Class: '{}'".format(
				26	# strClassHint
				27	# )
				28	# )
				29	# else:
				30	# return _class
				31
				32
				33	def init_parser(_parser):
				34	# network subparser
				35	ceph_subparsers = _parser.add_subparsers(dest='type')
				36
				37	ceph_info_parser = ceph_subparsers.add_parser(
				38	'info',
				39	help="Gather Ceph Cluster information"
				40	)
				41
				42	ceph_info_parser.add_argument(
				43	'--detailed',
				44	action="store_true", default=False,
				45	help="Print additional details"
				46	)
				47
				48	ceph_info_parser.add_argument(
				49	'--tgz',
				50	metavar='ceph_tgz_filename',
				51	help="HTML filename to save report"
				52	)
				53
				54	ceph_report_parser = ceph_subparsers.add_parser(
				55	'report',
				56	help="Generate network check report"
				57	)
				58
				59	ceph_report_parser.add_argument(
				60	'--html',
				61	metavar='ceph_html_filename',
				62	help="HTML filename to save report"
				63	)
				64
				65	ceph_bench_parser = ceph_subparsers.add_parser(
				66	'bench',
				67	help="Run ceph benchmark"
				68	)
				69
				70	ceph_bench_parser.add_argument(
				71	'--task-list',
				72	metavar='ceph_tasks_filename',
				73	help="List file with data for Ceph bench testrun"
				74	)
Alex	5cace3b	2021-11-10 16:40:37 -0600	[diff] [blame]	75	ceph_bench_parser.add_argument(
				76	'--agents',
				77	type=int, metavar='agent_count', default=5,
				78	help="List file with data for Ceph bench testrun"
				79	)
				80	ceph_bench_parser.add_argument(
				81	'--html',
				82	metavar='ceph_html_filename',
				83	help="HTML filename to save report"
				84	)
				85	ceph_bench_parser.add_argument(
				86	'--storage-class',
				87	metavar='storage_class',
				88	help="Storage class to be used in benchmark"
				89	)
				90	ceph_bench_parser.add_argument(
				91	'--task-file',
Alex	b212954	2021-11-23 15:49:42 -0600	[diff] [blame^]	92	metavar='task_file',
Alex	5cace3b	2021-11-10 16:40:37 -0600	[diff] [blame]	93	help="Task file for benchmark"
				94	)
Alex	2a7657c	2021-11-10 20:51:34 -0600	[diff] [blame]	95	ceph_bench_parser.add_argument(
				96	'--no-cleanup',
				97	action="store_true", default=False,
				98	help="Do not cleanup services, agents, pvc, and pv"
				99	)
Alex	b212954	2021-11-23 15:49:42 -0600	[diff] [blame^]	100	ceph_bench_parser.add_argument(
				101	'--cleanup-only',
				102	action="store_true", default=False,
				103	help="Cleanup resources related to benchmark"
				104	)
				105	ceph_bench_parser.add_argument(
				106	'--dump-path',
				107	metavar="dump_results", default="/tmp",
				108	help="Dump result after each test run to use them later"
				109	)
Alex	dcb792f	2021-10-04 14:24:21 -0500	[diff] [blame]	110
				111	return _parser
				112
				113
				114	def do_info(args, config):
				115	# Ceph info
				116	# Gather ceph info and create an archive with data
				117	args_utils.check_supported_env(ENV_TYPE_KUBE, args, config)
				118	# check tgz
				119	_tgzfile = "ceph_info_archive.tgz" if not args.tgz else args.tgz
				120
				121	# _class = _selectClass(_env)
				122	ceph_info = info.KubeCephInfo(config)
				123
				124	logger_cli.info("# Collecting Ceph cluster information")
Alex	dcb792f	2021-10-04 14:24:21 -0500	[diff] [blame]	125	ceph_info.gather_info()
				126
				127	# Debug, enable if needed to debug report generation
				128	# without actuall data collecting each time
				129	# ceph_info.dump_info()
				130	# ceph_info.load_info()
				131	# end debug
				132
Alex	dcb792f	2021-10-04 14:24:21 -0500	[diff] [blame]	133	ceph_info.generate_archive(_tgzfile)
Alex	df9cc3a	2021-10-12 14:37:28 -0500	[diff] [blame]	134	ceph_info.print_summary()
Alex	dcb792f	2021-10-04 14:24:21 -0500	[diff] [blame]	135
				136	return
				137
				138
				139	def do_report(args, config):
				140	# Ceph Report
				141	# Gather ceph info and create HTML report with all of the data
				142	args_utils.check_supported_env(ENV_TYPE_KUBE, args, config)
				143	_filename = args_utils.get_arg(args, 'html')
				144	logger_cli.info("# Ceph cluster Configuration report")
Alex	dcb792f	2021-10-04 14:24:21 -0500	[diff] [blame]	145
				146	# _class = _selectClass(_env)
				147	ceph_info = info.KubeCephInfo(config)
				148	# Debug, enable if needed to debug report generation
				149	# without actuall data collecting each time
				150	# ceph_info.load_info()
				151	# end debug
				152	ceph_info.gather_info()
				153	ceph_info.get_transposed_latency_table()
				154	ceph_info.get_latest_health_readout()
				155	ceph_info.create_html_report(_filename)
				156
				157	return
				158
				159
				160	def do_bench(args, config):
				161	# Ceph Benchmark using multiple pods
Alex	b212954	2021-11-23 15:49:42 -0600	[diff] [blame^]	162	# if only cleanup needed do it and exit
				163	_cleanup_only = args_utils.get_arg(args, 'cleanup_only')
				164	config.resource_prefix = "cfgagent"
				165	if _cleanup_only:
				166	# Do forced resource cleanup and exit
				167	config.bench_mode = "cleanup"
				168	config.bench_agent_count = -1
				169	ceph_bench = bench.KubeCephBench(config)
				170	logger_cli.info(
				171	"# Discovering benchmark resources using prefix of '{}'".format(
				172	config.resource_prefix
				173	)
				174	)
				175	ceph_bench.prepare_cleanup()
				176	ceph_bench.cleanup()
				177	return
				178
				179	# gather Ceph info
				180	logger_cli.info("# Collecting Ceph cluster information")
				181	ceph_info = info.KubeCephInfo(config)
				182
Alex	5cace3b	2021-11-10 16:40:37 -0600	[diff] [blame]	183	# Prepare the tasks and do synced testrun or a single one
Alex	b212954	2021-11-23 15:49:42 -0600	[diff] [blame^]	184	logger_cli.info("# Initializing ceph benchmark module")
Alex	dcb792f	2021-10-04 14:24:21 -0500	[diff] [blame]	185	args_utils.check_supported_env(ENV_TYPE_KUBE, args, config)
Alex	5cace3b	2021-11-10 16:40:37 -0600	[diff] [blame]	186	_filename = args_utils.get_arg(args, 'html')
				187	# agents count option
Alex	2a7657c	2021-11-10 20:51:34 -0600	[diff] [blame]	188	config.bench_agent_count = args_utils.get_arg(args, "agents")
				189	logger_cli.info("-> using {} agents".format(config.bench_agent_count))
				190	config.no_cleaning_after_benchmark = args_utils.get_arg(args, "no_cleanup")
Alex	5cace3b	2021-11-10 16:40:37 -0600	[diff] [blame]	191	# storage class
				192	_storage_class = args_utils.get_arg(args, "storage_class")
				193	logger_cli.info("-> using storage class of '{}'".format(_storage_class))
				194	config.bench_storage_class = _storage_class
Alex	b212954	2021-11-23 15:49:42 -0600	[diff] [blame^]	195	# dump results options
				196	_dump_path = args_utils.get_arg(args, "dump_path")
				197	if _dump_path:
				198	logger_cli.info("# Results will be dumped to '{}'".format(_dump_path))
				199	config.bench_results_dump_path = _dump_path
				200	else:
				201	logger_cli.info(
				202	"# No result dump path set. "
				203	"Consider setting it if running long task_file based test runs"
				204	)
				205	config.bench_results_dump_path = _dump_path
Alex	5cace3b	2021-11-10 16:40:37 -0600	[diff] [blame]	206	# Task files or options
				207	_task_file = args_utils.get_arg(args, "task_file", nofail=True)
				208	if not _task_file:
				209	logger_cli.info("-> running single run")
				210	config.bench_mode = "single"
				211	else:
				212	logger_cli.info("-> running with tasks from '{}'".format(_task_file))
				213	config.bench_task_file = _task_file
				214	config.bench_mode = "tasks"
				215	_opts = get_fio_options()
				216	logger_cli.debug("... default/selected options for fio:")
				217	for _k in _opts.keys():
				218	# TODO: Update options for single run
				219	logger_cli.debug(" {} = {}".format(_k, _opts[_k]))
Alex	dcb792f	2021-10-04 14:24:21 -0500	[diff] [blame]	220
Alex	3034ba5	2021-11-13 17:06:45 -0600	[diff] [blame]	221	# handle option inavailability from command line for single mode
				222
				223	# init the Bench class
Alex	dcb792f	2021-10-04 14:24:21 -0500	[diff] [blame]	224	ceph_bench = bench.KubeCephBench(config)
Alex	b212954	2021-11-23 15:49:42 -0600	[diff] [blame^]	225	ceph_bench.set_ceph_info_class(ceph_info)
Alex	dcb792f	2021-10-04 14:24:21 -0500	[diff] [blame]	226	# Do the testrun
Alex	5cace3b	2021-11-10 16:40:37 -0600	[diff] [blame]	227	ceph_bench.prepare_agents(_opts)
Alex	b212954	2021-11-23 15:49:42 -0600	[diff] [blame^]	228	ceph_bench.wait_ceph_cooldown()
				229
				230	# DEBUG of report in progress
Alex	5cace3b	2021-11-10 16:40:37 -0600	[diff] [blame]	231	if not ceph_bench.run_benchmark(_opts):
Alex	2a7657c	2021-11-10 20:51:34 -0600	[diff] [blame]	232	# No cleaning and/or report if benchmark was not finished
Alex	bfa947c	2021-11-11 18:14:28 -0600	[diff] [blame]	233	logger_cli.info("# Abnormal benchmark run, no cleaning performed")
Alex	5cace3b	2021-11-10 16:40:37 -0600	[diff] [blame]	234	return
Alex	b212954	2021-11-23 15:49:42 -0600	[diff] [blame^]	235	# Remove after DEBUG
				236	# ceph_bench.collect_results(_opts)
				237	# END DEBUG
				238
Alex	3034ba5	2021-11-13 17:06:45 -0600	[diff] [blame]	239	# Cleaning
Alex	2a7657c	2021-11-10 20:51:34 -0600	[diff] [blame]	240	if not config.no_cleaning_after_benchmark:
				241	ceph_bench.cleanup()
Alex	bfa947c	2021-11-11 18:14:28 -0600	[diff] [blame]	242	else:
				243	logger_cli.info(
				244	"# '--no-cleaning' option set. Cleaning not conducted."
				245	)
Alex	dcb792f	2021-10-04 14:24:21 -0500	[diff] [blame]	246
				247	# Create report
Alex	5cace3b	2021-11-10 16:40:37 -0600	[diff] [blame]	248	ceph_bench.create_report(_filename)
Alex	dcb792f	2021-10-04 14:24:21 -0500	[diff] [blame]	249
				250	return