Alex | 5cace3b | 2021-11-10 16:40:37 -0600 | [diff] [blame] | 1 | from cfg_checker.agent.fio_runner import get_fio_options |
Alex | 90ac153 | 2021-12-09 11:13:14 -0600 | [diff] [blame] | 2 | from cfg_checker.agent.fio_runner import seq_modes, mix_modes |
Alex | dcb792f | 2021-10-04 14:24:21 -0500 | [diff] [blame] | 3 | from cfg_checker.common import logger_cli |
| 4 | from cfg_checker.common.settings import ENV_TYPE_KUBE |
| 5 | from cfg_checker.helpers import args_utils |
| 6 | from cfg_checker.modules.ceph import info, bench |
| 7 | |
| 8 | command_help = "Ceph Storage information and benchmarks" |
| 9 | supported_envs = [ENV_TYPE_KUBE] |
| 10 | |
| 11 | |
| 12 | # def _selectClass(_env, strClassHint="checker"): |
| 13 | # _class = None |
| 14 | # if _env == ENV_TYPE_SALT: |
| 15 | # if strClassHint == "info": |
| 16 | # _class = info.SaltCephInfo |
| 17 | # elif strClassHint == "bench": |
| 18 | # _class = bench.SaltCephInfo |
| 19 | # elif _env == ENV_TYPE_KUBE: |
| 20 | # if strClassHint == "info": |
| 21 | # _class = info.KubeCephInfo |
| 22 | # elif strClassHint == "bench": |
| 23 | # _class = bench.KubeCephBench |
| 24 | # if not _class: |
| 25 | # raise CheckerException( |
| 26 | # "Unknown hint for selecting Ceph handler Class: '{}'".format( |
| 27 | # strClassHint |
| 28 | # ) |
| 29 | # ) |
| 30 | # else: |
| 31 | # return _class |
| 32 | |
Alex | 90ac153 | 2021-12-09 11:13:14 -0600 | [diff] [blame] | 33 | def _get_param_and_log(arg, param_str): |
| 34 | _value = args_utils.get_arg(arg, param_str) |
| 35 | logger_cli.info(" {}={}".format(param_str, _value)) |
| 36 | return _value |
| 37 | |
Alex | dcb792f | 2021-10-04 14:24:21 -0500 | [diff] [blame] | 38 | |
| 39 | def init_parser(_parser): |
| 40 | # network subparser |
| 41 | ceph_subparsers = _parser.add_subparsers(dest='type') |
| 42 | |
| 43 | ceph_info_parser = ceph_subparsers.add_parser( |
| 44 | 'info', |
| 45 | help="Gather Ceph Cluster information" |
| 46 | ) |
| 47 | |
| 48 | ceph_info_parser.add_argument( |
| 49 | '--detailed', |
| 50 | action="store_true", default=False, |
| 51 | help="Print additional details" |
| 52 | ) |
| 53 | |
| 54 | ceph_info_parser.add_argument( |
| 55 | '--tgz', |
| 56 | metavar='ceph_tgz_filename', |
| 57 | help="HTML filename to save report" |
| 58 | ) |
| 59 | |
| 60 | ceph_report_parser = ceph_subparsers.add_parser( |
| 61 | 'report', |
| 62 | help="Generate network check report" |
| 63 | ) |
| 64 | |
| 65 | ceph_report_parser.add_argument( |
| 66 | '--html', |
| 67 | metavar='ceph_html_filename', |
| 68 | help="HTML filename to save report" |
| 69 | ) |
| 70 | |
| 71 | ceph_bench_parser = ceph_subparsers.add_parser( |
| 72 | 'bench', |
| 73 | help="Run ceph benchmark" |
| 74 | ) |
| 75 | |
| 76 | ceph_bench_parser.add_argument( |
| 77 | '--task-list', |
| 78 | metavar='ceph_tasks_filename', |
| 79 | help="List file with data for Ceph bench testrun" |
| 80 | ) |
Alex | 5cace3b | 2021-11-10 16:40:37 -0600 | [diff] [blame] | 81 | ceph_bench_parser.add_argument( |
| 82 | '--agents', |
| 83 | type=int, metavar='agent_count', default=5, |
| 84 | help="List file with data for Ceph bench testrun" |
| 85 | ) |
| 86 | ceph_bench_parser.add_argument( |
| 87 | '--html', |
| 88 | metavar='ceph_html_filename', |
| 89 | help="HTML filename to save report" |
| 90 | ) |
| 91 | ceph_bench_parser.add_argument( |
| 92 | '--storage-class', |
| 93 | metavar='storage_class', |
| 94 | help="Storage class to be used in benchmark" |
| 95 | ) |
| 96 | ceph_bench_parser.add_argument( |
| 97 | '--task-file', |
Alex | b212954 | 2021-11-23 15:49:42 -0600 | [diff] [blame] | 98 | metavar='task_file', |
Alex | 5cace3b | 2021-11-10 16:40:37 -0600 | [diff] [blame] | 99 | help="Task file for benchmark" |
| 100 | ) |
Alex | 2a7657c | 2021-11-10 20:51:34 -0600 | [diff] [blame] | 101 | ceph_bench_parser.add_argument( |
| 102 | '--no-cleanup', |
| 103 | action="store_true", default=False, |
| 104 | help="Do not cleanup services, agents, pvc, and pv" |
| 105 | ) |
Alex | b212954 | 2021-11-23 15:49:42 -0600 | [diff] [blame] | 106 | ceph_bench_parser.add_argument( |
| 107 | '--cleanup-only', |
| 108 | action="store_true", default=False, |
| 109 | help="Cleanup resources related to benchmark" |
| 110 | ) |
| 111 | ceph_bench_parser.add_argument( |
Alex | 30380a4 | 2021-12-20 16:11:20 -0600 | [diff] [blame^] | 112 | '--report-only', |
| 113 | action="store_true", default=False, |
| 114 | help="Just create report using files in folder" |
| 115 | ) |
| 116 | ceph_bench_parser.add_argument( |
Alex | b212954 | 2021-11-23 15:49:42 -0600 | [diff] [blame] | 117 | '--dump-path', |
Alex | 30380a4 | 2021-12-20 16:11:20 -0600 | [diff] [blame^] | 118 | metavar="dump_results", |
Alex | b212954 | 2021-11-23 15:49:42 -0600 | [diff] [blame] | 119 | help="Dump result after each test run to use them later" |
| 120 | ) |
Alex | 90ac153 | 2021-12-09 11:13:14 -0600 | [diff] [blame] | 121 | ceph_bench_parser.add_argument( |
| 122 | '--name', |
| 123 | metavar="name", default="cephbench", |
| 124 | help="Dump result after each test run to use them later" |
| 125 | ) |
| 126 | ceph_bench_parser.add_argument( |
| 127 | '--bs', |
| 128 | metavar="blocksize", default="16k", |
| 129 | help="Block size for single run" |
| 130 | ) |
| 131 | ceph_bench_parser.add_argument( |
| 132 | '--iodepth', |
| 133 | metavar="iodepth", default="16", |
| 134 | help="IO Depth for single run" |
| 135 | ) |
| 136 | ceph_bench_parser.add_argument( |
| 137 | '--size', |
| 138 | metavar="size", default="10G", |
| 139 | help="Persistent volume size (M, G)" |
| 140 | ) |
| 141 | ceph_bench_parser.add_argument( |
| 142 | '--readwrite', |
| 143 | metavar="readwrite", default="randrw", |
| 144 | help="Test mode for single run" |
| 145 | ) |
| 146 | ceph_bench_parser.add_argument( |
| 147 | '--rwmixread', |
| 148 | metavar="rwmixread", default="50", |
| 149 | help="Percent of read in randon mixed mode (randrw)" |
| 150 | ) |
| 151 | ceph_bench_parser.add_argument( |
| 152 | '--ramp-time', |
| 153 | metavar="ramp_time", default="5s", |
| 154 | help="Warmup time before test" |
| 155 | ) |
| 156 | ceph_bench_parser.add_argument( |
| 157 | '--runtime', |
| 158 | metavar="runtime", default="60s", |
| 159 | help="Time based test run longevity" |
| 160 | ) |
| 161 | ceph_bench_parser.add_argument( |
| 162 | '--ioengine', |
| 163 | metavar="ioengine", default="libaio", |
| 164 | help="IO Engine used by fio. See eng-help output in fio for list" |
| 165 | ) |
| 166 | ceph_bench_parser.add_argument( |
| 167 | '--offset-increment', |
| 168 | metavar="offset_increment", default="500M", |
| 169 | help="IO Engine used by fio. See eng-help output in fio for list" |
| 170 | ) |
Alex | dcb792f | 2021-10-04 14:24:21 -0500 | [diff] [blame] | 171 | |
| 172 | return _parser |
| 173 | |
| 174 | |
| 175 | def do_info(args, config): |
| 176 | # Ceph info |
| 177 | # Gather ceph info and create an archive with data |
| 178 | args_utils.check_supported_env(ENV_TYPE_KUBE, args, config) |
| 179 | # check tgz |
| 180 | _tgzfile = "ceph_info_archive.tgz" if not args.tgz else args.tgz |
| 181 | |
| 182 | # _class = _selectClass(_env) |
| 183 | ceph_info = info.KubeCephInfo(config) |
| 184 | |
| 185 | logger_cli.info("# Collecting Ceph cluster information") |
Alex | dcb792f | 2021-10-04 14:24:21 -0500 | [diff] [blame] | 186 | ceph_info.gather_info() |
| 187 | |
| 188 | # Debug, enable if needed to debug report generation |
| 189 | # without actuall data collecting each time |
| 190 | # ceph_info.dump_info() |
| 191 | # ceph_info.load_info() |
| 192 | # end debug |
| 193 | |
Alex | dcb792f | 2021-10-04 14:24:21 -0500 | [diff] [blame] | 194 | ceph_info.generate_archive(_tgzfile) |
Alex | df9cc3a | 2021-10-12 14:37:28 -0500 | [diff] [blame] | 195 | ceph_info.print_summary() |
Alex | dcb792f | 2021-10-04 14:24:21 -0500 | [diff] [blame] | 196 | |
| 197 | return |
| 198 | |
| 199 | |
| 200 | def do_report(args, config): |
| 201 | # Ceph Report |
| 202 | # Gather ceph info and create HTML report with all of the data |
| 203 | args_utils.check_supported_env(ENV_TYPE_KUBE, args, config) |
| 204 | _filename = args_utils.get_arg(args, 'html') |
| 205 | logger_cli.info("# Ceph cluster Configuration report") |
Alex | dcb792f | 2021-10-04 14:24:21 -0500 | [diff] [blame] | 206 | |
| 207 | # _class = _selectClass(_env) |
| 208 | ceph_info = info.KubeCephInfo(config) |
| 209 | # Debug, enable if needed to debug report generation |
| 210 | # without actuall data collecting each time |
| 211 | # ceph_info.load_info() |
| 212 | # end debug |
| 213 | ceph_info.gather_info() |
| 214 | ceph_info.get_transposed_latency_table() |
| 215 | ceph_info.get_latest_health_readout() |
| 216 | ceph_info.create_html_report(_filename) |
| 217 | |
| 218 | return |
| 219 | |
| 220 | |
| 221 | def do_bench(args, config): |
| 222 | # Ceph Benchmark using multiple pods |
Alex | b212954 | 2021-11-23 15:49:42 -0600 | [diff] [blame] | 223 | # if only cleanup needed do it and exit |
| 224 | _cleanup_only = args_utils.get_arg(args, 'cleanup_only') |
Alex | 30380a4 | 2021-12-20 16:11:20 -0600 | [diff] [blame^] | 225 | _report_only = args_utils.get_arg(args, 'report_only') |
Alex | b212954 | 2021-11-23 15:49:42 -0600 | [diff] [blame] | 226 | config.resource_prefix = "cfgagent" |
| 227 | if _cleanup_only: |
| 228 | # Do forced resource cleanup and exit |
| 229 | config.bench_mode = "cleanup" |
| 230 | config.bench_agent_count = -1 |
| 231 | ceph_bench = bench.KubeCephBench(config) |
| 232 | logger_cli.info( |
| 233 | "# Discovering benchmark resources using prefix of '{}'".format( |
| 234 | config.resource_prefix |
| 235 | ) |
| 236 | ) |
| 237 | ceph_bench.prepare_cleanup() |
| 238 | ceph_bench.cleanup() |
| 239 | return |
| 240 | |
Alex | 30380a4 | 2021-12-20 16:11:20 -0600 | [diff] [blame^] | 241 | # dump results options |
| 242 | _dump_path = args_utils.get_arg(args, "dump_path") |
| 243 | if _dump_path: |
| 244 | logger_cli.info("# Results will be dumped to '{}'".format(_dump_path)) |
| 245 | config.bench_results_dump_path = _dump_path |
| 246 | else: |
| 247 | _p = "/tmp" |
| 248 | logger_cli.info( |
| 249 | "# No result dump path set. Defaulting to {}" |
| 250 | "Consider setting it if running long task_file " |
| 251 | "based test runs".format(_p) |
| 252 | ) |
| 253 | config.bench_results_dump_path = _p |
| 254 | |
| 255 | # Report filename |
| 256 | _filename = args_utils.get_arg(args, 'html') |
Alex | b212954 | 2021-11-23 15:49:42 -0600 | [diff] [blame] | 257 | # gather Ceph info |
| 258 | logger_cli.info("# Collecting Ceph cluster information") |
| 259 | ceph_info = info.KubeCephInfo(config) |
| 260 | |
Alex | 30380a4 | 2021-12-20 16:11:20 -0600 | [diff] [blame^] | 261 | # Task files or options |
| 262 | _opts = get_fio_options() |
| 263 | # Load name and announce it |
| 264 | config.bench_name = args_utils.get_arg(args, "name") |
| 265 | _opts["name"] = config.bench_name |
| 266 | logger_cli.info( |
| 267 | "# Using '{}' as ceph bench jobs name".format(_opts["name"]) |
| 268 | ) |
| 269 | |
| 270 | if _report_only: |
| 271 | # Do forced report creation and exit |
| 272 | config.bench_mode = "report" |
| 273 | config.bench_agent_count = -1 |
| 274 | ceph_bench = bench.KubeCephBench(config) |
| 275 | ceph_bench.set_ceph_info_class(ceph_info) |
| 276 | logger_cli.info( |
| 277 | "# Preparing to generate report '{}'".format( |
| 278 | config.resource_prefix |
| 279 | ) |
| 280 | ) |
| 281 | # Preload previous results for this name |
| 282 | ceph_bench.preload_results() |
| 283 | # Gather ceph data |
| 284 | ceph_bench.wait_ceph_cooldown() |
| 285 | # Generate report |
| 286 | ceph_bench.create_report(_filename) |
| 287 | return |
| 288 | |
Alex | 5cace3b | 2021-11-10 16:40:37 -0600 | [diff] [blame] | 289 | # Prepare the tasks and do synced testrun or a single one |
Alex | b212954 | 2021-11-23 15:49:42 -0600 | [diff] [blame] | 290 | logger_cli.info("# Initializing ceph benchmark module") |
Alex | dcb792f | 2021-10-04 14:24:21 -0500 | [diff] [blame] | 291 | args_utils.check_supported_env(ENV_TYPE_KUBE, args, config) |
Alex | 5cace3b | 2021-11-10 16:40:37 -0600 | [diff] [blame] | 292 | # agents count option |
Alex | 2a7657c | 2021-11-10 20:51:34 -0600 | [diff] [blame] | 293 | config.bench_agent_count = args_utils.get_arg(args, "agents") |
| 294 | logger_cli.info("-> using {} agents".format(config.bench_agent_count)) |
Alex | 90ac153 | 2021-12-09 11:13:14 -0600 | [diff] [blame] | 295 | # Cleaning option |
Alex | 2a7657c | 2021-11-10 20:51:34 -0600 | [diff] [blame] | 296 | config.no_cleaning_after_benchmark = args_utils.get_arg(args, "no_cleanup") |
Alex | 5cace3b | 2021-11-10 16:40:37 -0600 | [diff] [blame] | 297 | # storage class |
| 298 | _storage_class = args_utils.get_arg(args, "storage_class") |
| 299 | logger_cli.info("-> using storage class of '{}'".format(_storage_class)) |
| 300 | config.bench_storage_class = _storage_class |
Alex | b212954 | 2021-11-23 15:49:42 -0600 | [diff] [blame] | 301 | if _dump_path: |
| 302 | logger_cli.info("# Results will be dumped to '{}'".format(_dump_path)) |
| 303 | config.bench_results_dump_path = _dump_path |
| 304 | else: |
| 305 | logger_cli.info( |
| 306 | "# No result dump path set. " |
| 307 | "Consider setting it if running long task_file based test runs" |
| 308 | ) |
| 309 | config.bench_results_dump_path = _dump_path |
Alex | 30380a4 | 2021-12-20 16:11:20 -0600 | [diff] [blame^] | 310 | |
Alex | 5cace3b | 2021-11-10 16:40:37 -0600 | [diff] [blame] | 311 | _task_file = args_utils.get_arg(args, "task_file", nofail=True) |
| 312 | if not _task_file: |
Alex | 90ac153 | 2021-12-09 11:13:14 -0600 | [diff] [blame] | 313 | logger_cli.info("-> Running single benchmark run") |
Alex | 5cace3b | 2021-11-10 16:40:37 -0600 | [diff] [blame] | 314 | config.bench_mode = "single" |
Alex | 90ac153 | 2021-12-09 11:13:14 -0600 | [diff] [blame] | 315 | # Updating _opts from arguments |
| 316 | _params = [ |
| 317 | "bs", |
| 318 | "iodepth", |
| 319 | "size", |
| 320 | "readwrite", |
| 321 | "ramp_time", |
| 322 | "runtime", |
| 323 | "ioengine" |
| 324 | ] |
| 325 | for _p in _params: |
| 326 | _opts[_p] = _get_param_and_log(args, _p) |
| 327 | if _opts["readwrite"] in seq_modes: |
| 328 | _p = "offset_increment" |
| 329 | _opts[_p] = _get_param_and_log(args, _p) |
| 330 | elif _opts["readwrite"] in mix_modes: |
| 331 | _p = "rwmixread" |
| 332 | _opts[_p] = _get_param_and_log(args, _p) |
Alex | 5cace3b | 2021-11-10 16:40:37 -0600 | [diff] [blame] | 333 | else: |
| 334 | logger_cli.info("-> running with tasks from '{}'".format(_task_file)) |
| 335 | config.bench_task_file = _task_file |
| 336 | config.bench_mode = "tasks" |
Alex | 5cace3b | 2021-11-10 16:40:37 -0600 | [diff] [blame] | 337 | logger_cli.debug("... default/selected options for fio:") |
| 338 | for _k in _opts.keys(): |
| 339 | # TODO: Update options for single run |
| 340 | logger_cli.debug(" {} = {}".format(_k, _opts[_k])) |
Alex | dcb792f | 2021-10-04 14:24:21 -0500 | [diff] [blame] | 341 | |
Alex | 3034ba5 | 2021-11-13 17:06:45 -0600 | [diff] [blame] | 342 | # init the Bench class |
Alex | dcb792f | 2021-10-04 14:24:21 -0500 | [diff] [blame] | 343 | ceph_bench = bench.KubeCephBench(config) |
Alex | b212954 | 2021-11-23 15:49:42 -0600 | [diff] [blame] | 344 | ceph_bench.set_ceph_info_class(ceph_info) |
Alex | 90ac153 | 2021-12-09 11:13:14 -0600 | [diff] [blame] | 345 | # Preload previous results for this name |
| 346 | ceph_bench.preload_results() |
Alex | dcb792f | 2021-10-04 14:24:21 -0500 | [diff] [blame] | 347 | # Do the testrun |
Alex | 5cace3b | 2021-11-10 16:40:37 -0600 | [diff] [blame] | 348 | ceph_bench.prepare_agents(_opts) |
Alex | b212954 | 2021-11-23 15:49:42 -0600 | [diff] [blame] | 349 | ceph_bench.wait_ceph_cooldown() |
| 350 | |
| 351 | # DEBUG of report in progress |
Alex | 5cace3b | 2021-11-10 16:40:37 -0600 | [diff] [blame] | 352 | if not ceph_bench.run_benchmark(_opts): |
Alex | 2a7657c | 2021-11-10 20:51:34 -0600 | [diff] [blame] | 353 | # No cleaning and/or report if benchmark was not finished |
Alex | bfa947c | 2021-11-11 18:14:28 -0600 | [diff] [blame] | 354 | logger_cli.info("# Abnormal benchmark run, no cleaning performed") |
Alex | 5cace3b | 2021-11-10 16:40:37 -0600 | [diff] [blame] | 355 | return |
Alex | b212954 | 2021-11-23 15:49:42 -0600 | [diff] [blame] | 356 | # Remove after DEBUG |
| 357 | # ceph_bench.collect_results(_opts) |
| 358 | # END DEBUG |
| 359 | |
Alex | 3034ba5 | 2021-11-13 17:06:45 -0600 | [diff] [blame] | 360 | # Cleaning |
Alex | 2a7657c | 2021-11-10 20:51:34 -0600 | [diff] [blame] | 361 | if not config.no_cleaning_after_benchmark: |
| 362 | ceph_bench.cleanup() |
Alex | bfa947c | 2021-11-11 18:14:28 -0600 | [diff] [blame] | 363 | else: |
| 364 | logger_cli.info( |
| 365 | "# '--no-cleaning' option set. Cleaning not conducted." |
| 366 | ) |
Alex | dcb792f | 2021-10-04 14:24:21 -0500 | [diff] [blame] | 367 | |
| 368 | # Create report |
Alex | 5cace3b | 2021-11-10 16:40:37 -0600 | [diff] [blame] | 369 | ceph_bench.create_report(_filename) |
Alex | dcb792f | 2021-10-04 14:24:21 -0500 | [diff] [blame] | 370 | |
| 371 | return |