Alex | 5cace3b | 2021-11-10 16:40:37 -0600 | [diff] [blame] | 1 | from cfg_checker.agent.fio_runner import get_fio_options |
Alex | 90ac153 | 2021-12-09 11:13:14 -0600 | [diff] [blame^] | 2 | from cfg_checker.agent.fio_runner import seq_modes, mix_modes |
Alex | dcb792f | 2021-10-04 14:24:21 -0500 | [diff] [blame] | 3 | from cfg_checker.common import logger_cli |
| 4 | from cfg_checker.common.settings import ENV_TYPE_KUBE |
| 5 | from cfg_checker.helpers import args_utils |
| 6 | from cfg_checker.modules.ceph import info, bench |
| 7 | |
| 8 | command_help = "Ceph Storage information and benchmarks" |
| 9 | supported_envs = [ENV_TYPE_KUBE] |
| 10 | |
| 11 | |
| 12 | # def _selectClass(_env, strClassHint="checker"): |
| 13 | # _class = None |
| 14 | # if _env == ENV_TYPE_SALT: |
| 15 | # if strClassHint == "info": |
| 16 | # _class = info.SaltCephInfo |
| 17 | # elif strClassHint == "bench": |
| 18 | # _class = bench.SaltCephInfo |
| 19 | # elif _env == ENV_TYPE_KUBE: |
| 20 | # if strClassHint == "info": |
| 21 | # _class = info.KubeCephInfo |
| 22 | # elif strClassHint == "bench": |
| 23 | # _class = bench.KubeCephBench |
| 24 | # if not _class: |
| 25 | # raise CheckerException( |
| 26 | # "Unknown hint for selecting Ceph handler Class: '{}'".format( |
| 27 | # strClassHint |
| 28 | # ) |
| 29 | # ) |
| 30 | # else: |
| 31 | # return _class |
| 32 | |
Alex | 90ac153 | 2021-12-09 11:13:14 -0600 | [diff] [blame^] | 33 | def _get_param_and_log(arg, param_str): |
| 34 | _value = args_utils.get_arg(arg, param_str) |
| 35 | logger_cli.info(" {}={}".format(param_str, _value)) |
| 36 | return _value |
| 37 | |
Alex | dcb792f | 2021-10-04 14:24:21 -0500 | [diff] [blame] | 38 | |
| 39 | def init_parser(_parser): |
| 40 | # network subparser |
| 41 | ceph_subparsers = _parser.add_subparsers(dest='type') |
| 42 | |
| 43 | ceph_info_parser = ceph_subparsers.add_parser( |
| 44 | 'info', |
| 45 | help="Gather Ceph Cluster information" |
| 46 | ) |
| 47 | |
| 48 | ceph_info_parser.add_argument( |
| 49 | '--detailed', |
| 50 | action="store_true", default=False, |
| 51 | help="Print additional details" |
| 52 | ) |
| 53 | |
| 54 | ceph_info_parser.add_argument( |
| 55 | '--tgz', |
| 56 | metavar='ceph_tgz_filename', |
| 57 | help="HTML filename to save report" |
| 58 | ) |
| 59 | |
| 60 | ceph_report_parser = ceph_subparsers.add_parser( |
| 61 | 'report', |
| 62 | help="Generate network check report" |
| 63 | ) |
| 64 | |
| 65 | ceph_report_parser.add_argument( |
| 66 | '--html', |
| 67 | metavar='ceph_html_filename', |
| 68 | help="HTML filename to save report" |
| 69 | ) |
| 70 | |
| 71 | ceph_bench_parser = ceph_subparsers.add_parser( |
| 72 | 'bench', |
| 73 | help="Run ceph benchmark" |
| 74 | ) |
| 75 | |
| 76 | ceph_bench_parser.add_argument( |
| 77 | '--task-list', |
| 78 | metavar='ceph_tasks_filename', |
| 79 | help="List file with data for Ceph bench testrun" |
| 80 | ) |
Alex | 5cace3b | 2021-11-10 16:40:37 -0600 | [diff] [blame] | 81 | ceph_bench_parser.add_argument( |
| 82 | '--agents', |
| 83 | type=int, metavar='agent_count', default=5, |
| 84 | help="List file with data for Ceph bench testrun" |
| 85 | ) |
| 86 | ceph_bench_parser.add_argument( |
| 87 | '--html', |
| 88 | metavar='ceph_html_filename', |
| 89 | help="HTML filename to save report" |
| 90 | ) |
| 91 | ceph_bench_parser.add_argument( |
| 92 | '--storage-class', |
| 93 | metavar='storage_class', |
| 94 | help="Storage class to be used in benchmark" |
| 95 | ) |
| 96 | ceph_bench_parser.add_argument( |
| 97 | '--task-file', |
Alex | b212954 | 2021-11-23 15:49:42 -0600 | [diff] [blame] | 98 | metavar='task_file', |
Alex | 5cace3b | 2021-11-10 16:40:37 -0600 | [diff] [blame] | 99 | help="Task file for benchmark" |
| 100 | ) |
Alex | 2a7657c | 2021-11-10 20:51:34 -0600 | [diff] [blame] | 101 | ceph_bench_parser.add_argument( |
| 102 | '--no-cleanup', |
| 103 | action="store_true", default=False, |
| 104 | help="Do not cleanup services, agents, pvc, and pv" |
| 105 | ) |
Alex | b212954 | 2021-11-23 15:49:42 -0600 | [diff] [blame] | 106 | ceph_bench_parser.add_argument( |
| 107 | '--cleanup-only', |
| 108 | action="store_true", default=False, |
| 109 | help="Cleanup resources related to benchmark" |
| 110 | ) |
| 111 | ceph_bench_parser.add_argument( |
| 112 | '--dump-path', |
| 113 | metavar="dump_results", default="/tmp", |
| 114 | help="Dump result after each test run to use them later" |
| 115 | ) |
Alex | 90ac153 | 2021-12-09 11:13:14 -0600 | [diff] [blame^] | 116 | ceph_bench_parser.add_argument( |
| 117 | '--name', |
| 118 | metavar="name", default="cephbench", |
| 119 | help="Dump result after each test run to use them later" |
| 120 | ) |
| 121 | ceph_bench_parser.add_argument( |
| 122 | '--bs', |
| 123 | metavar="blocksize", default="16k", |
| 124 | help="Block size for single run" |
| 125 | ) |
| 126 | ceph_bench_parser.add_argument( |
| 127 | '--iodepth', |
| 128 | metavar="iodepth", default="16", |
| 129 | help="IO Depth for single run" |
| 130 | ) |
| 131 | ceph_bench_parser.add_argument( |
| 132 | '--size', |
| 133 | metavar="size", default="10G", |
| 134 | help="Persistent volume size (M, G)" |
| 135 | ) |
| 136 | ceph_bench_parser.add_argument( |
| 137 | '--readwrite', |
| 138 | metavar="readwrite", default="randrw", |
| 139 | help="Test mode for single run" |
| 140 | ) |
| 141 | ceph_bench_parser.add_argument( |
| 142 | '--rwmixread', |
| 143 | metavar="rwmixread", default="50", |
| 144 | help="Percent of read in randon mixed mode (randrw)" |
| 145 | ) |
| 146 | ceph_bench_parser.add_argument( |
| 147 | '--ramp-time', |
| 148 | metavar="ramp_time", default="5s", |
| 149 | help="Warmup time before test" |
| 150 | ) |
| 151 | ceph_bench_parser.add_argument( |
| 152 | '--runtime', |
| 153 | metavar="runtime", default="60s", |
| 154 | help="Time based test run longevity" |
| 155 | ) |
| 156 | ceph_bench_parser.add_argument( |
| 157 | '--ioengine', |
| 158 | metavar="ioengine", default="libaio", |
| 159 | help="IO Engine used by fio. See eng-help output in fio for list" |
| 160 | ) |
| 161 | ceph_bench_parser.add_argument( |
| 162 | '--offset-increment', |
| 163 | metavar="offset_increment", default="500M", |
| 164 | help="IO Engine used by fio. See eng-help output in fio for list" |
| 165 | ) |
Alex | dcb792f | 2021-10-04 14:24:21 -0500 | [diff] [blame] | 166 | |
| 167 | return _parser |
| 168 | |
| 169 | |
| 170 | def do_info(args, config): |
| 171 | # Ceph info |
| 172 | # Gather ceph info and create an archive with data |
| 173 | args_utils.check_supported_env(ENV_TYPE_KUBE, args, config) |
| 174 | # check tgz |
| 175 | _tgzfile = "ceph_info_archive.tgz" if not args.tgz else args.tgz |
| 176 | |
| 177 | # _class = _selectClass(_env) |
| 178 | ceph_info = info.KubeCephInfo(config) |
| 179 | |
| 180 | logger_cli.info("# Collecting Ceph cluster information") |
Alex | dcb792f | 2021-10-04 14:24:21 -0500 | [diff] [blame] | 181 | ceph_info.gather_info() |
| 182 | |
| 183 | # Debug, enable if needed to debug report generation |
| 184 | # without actuall data collecting each time |
| 185 | # ceph_info.dump_info() |
| 186 | # ceph_info.load_info() |
| 187 | # end debug |
| 188 | |
Alex | dcb792f | 2021-10-04 14:24:21 -0500 | [diff] [blame] | 189 | ceph_info.generate_archive(_tgzfile) |
Alex | df9cc3a | 2021-10-12 14:37:28 -0500 | [diff] [blame] | 190 | ceph_info.print_summary() |
Alex | dcb792f | 2021-10-04 14:24:21 -0500 | [diff] [blame] | 191 | |
| 192 | return |
| 193 | |
| 194 | |
| 195 | def do_report(args, config): |
| 196 | # Ceph Report |
| 197 | # Gather ceph info and create HTML report with all of the data |
| 198 | args_utils.check_supported_env(ENV_TYPE_KUBE, args, config) |
| 199 | _filename = args_utils.get_arg(args, 'html') |
| 200 | logger_cli.info("# Ceph cluster Configuration report") |
Alex | dcb792f | 2021-10-04 14:24:21 -0500 | [diff] [blame] | 201 | |
| 202 | # _class = _selectClass(_env) |
| 203 | ceph_info = info.KubeCephInfo(config) |
| 204 | # Debug, enable if needed to debug report generation |
| 205 | # without actuall data collecting each time |
| 206 | # ceph_info.load_info() |
| 207 | # end debug |
| 208 | ceph_info.gather_info() |
| 209 | ceph_info.get_transposed_latency_table() |
| 210 | ceph_info.get_latest_health_readout() |
| 211 | ceph_info.create_html_report(_filename) |
| 212 | |
| 213 | return |
| 214 | |
| 215 | |
| 216 | def do_bench(args, config): |
| 217 | # Ceph Benchmark using multiple pods |
Alex | b212954 | 2021-11-23 15:49:42 -0600 | [diff] [blame] | 218 | # if only cleanup needed do it and exit |
| 219 | _cleanup_only = args_utils.get_arg(args, 'cleanup_only') |
| 220 | config.resource_prefix = "cfgagent" |
| 221 | if _cleanup_only: |
| 222 | # Do forced resource cleanup and exit |
| 223 | config.bench_mode = "cleanup" |
| 224 | config.bench_agent_count = -1 |
| 225 | ceph_bench = bench.KubeCephBench(config) |
| 226 | logger_cli.info( |
| 227 | "# Discovering benchmark resources using prefix of '{}'".format( |
| 228 | config.resource_prefix |
| 229 | ) |
| 230 | ) |
| 231 | ceph_bench.prepare_cleanup() |
| 232 | ceph_bench.cleanup() |
| 233 | return |
| 234 | |
| 235 | # gather Ceph info |
| 236 | logger_cli.info("# Collecting Ceph cluster information") |
| 237 | ceph_info = info.KubeCephInfo(config) |
| 238 | |
Alex | 5cace3b | 2021-11-10 16:40:37 -0600 | [diff] [blame] | 239 | # Prepare the tasks and do synced testrun or a single one |
Alex | b212954 | 2021-11-23 15:49:42 -0600 | [diff] [blame] | 240 | logger_cli.info("# Initializing ceph benchmark module") |
Alex | dcb792f | 2021-10-04 14:24:21 -0500 | [diff] [blame] | 241 | args_utils.check_supported_env(ENV_TYPE_KUBE, args, config) |
Alex | 90ac153 | 2021-12-09 11:13:14 -0600 | [diff] [blame^] | 242 | # Report filename |
Alex | 5cace3b | 2021-11-10 16:40:37 -0600 | [diff] [blame] | 243 | _filename = args_utils.get_arg(args, 'html') |
| 244 | # agents count option |
Alex | 2a7657c | 2021-11-10 20:51:34 -0600 | [diff] [blame] | 245 | config.bench_agent_count = args_utils.get_arg(args, "agents") |
| 246 | logger_cli.info("-> using {} agents".format(config.bench_agent_count)) |
Alex | 90ac153 | 2021-12-09 11:13:14 -0600 | [diff] [blame^] | 247 | # Cleaning option |
Alex | 2a7657c | 2021-11-10 20:51:34 -0600 | [diff] [blame] | 248 | config.no_cleaning_after_benchmark = args_utils.get_arg(args, "no_cleanup") |
Alex | 5cace3b | 2021-11-10 16:40:37 -0600 | [diff] [blame] | 249 | # storage class |
| 250 | _storage_class = args_utils.get_arg(args, "storage_class") |
| 251 | logger_cli.info("-> using storage class of '{}'".format(_storage_class)) |
| 252 | config.bench_storage_class = _storage_class |
Alex | b212954 | 2021-11-23 15:49:42 -0600 | [diff] [blame] | 253 | # dump results options |
| 254 | _dump_path = args_utils.get_arg(args, "dump_path") |
| 255 | if _dump_path: |
| 256 | logger_cli.info("# Results will be dumped to '{}'".format(_dump_path)) |
| 257 | config.bench_results_dump_path = _dump_path |
| 258 | else: |
| 259 | logger_cli.info( |
| 260 | "# No result dump path set. " |
| 261 | "Consider setting it if running long task_file based test runs" |
| 262 | ) |
| 263 | config.bench_results_dump_path = _dump_path |
Alex | 5cace3b | 2021-11-10 16:40:37 -0600 | [diff] [blame] | 264 | # Task files or options |
Alex | 90ac153 | 2021-12-09 11:13:14 -0600 | [diff] [blame^] | 265 | _opts = get_fio_options() |
Alex | 5cace3b | 2021-11-10 16:40:37 -0600 | [diff] [blame] | 266 | _task_file = args_utils.get_arg(args, "task_file", nofail=True) |
| 267 | if not _task_file: |
Alex | 90ac153 | 2021-12-09 11:13:14 -0600 | [diff] [blame^] | 268 | logger_cli.info("-> Running single benchmark run") |
Alex | 5cace3b | 2021-11-10 16:40:37 -0600 | [diff] [blame] | 269 | config.bench_mode = "single" |
Alex | 90ac153 | 2021-12-09 11:13:14 -0600 | [diff] [blame^] | 270 | # Updating _opts from arguments |
| 271 | _params = [ |
| 272 | "bs", |
| 273 | "iodepth", |
| 274 | "size", |
| 275 | "readwrite", |
| 276 | "ramp_time", |
| 277 | "runtime", |
| 278 | "ioengine" |
| 279 | ] |
| 280 | for _p in _params: |
| 281 | _opts[_p] = _get_param_and_log(args, _p) |
| 282 | if _opts["readwrite"] in seq_modes: |
| 283 | _p = "offset_increment" |
| 284 | _opts[_p] = _get_param_and_log(args, _p) |
| 285 | elif _opts["readwrite"] in mix_modes: |
| 286 | _p = "rwmixread" |
| 287 | _opts[_p] = _get_param_and_log(args, _p) |
Alex | 5cace3b | 2021-11-10 16:40:37 -0600 | [diff] [blame] | 288 | else: |
| 289 | logger_cli.info("-> running with tasks from '{}'".format(_task_file)) |
| 290 | config.bench_task_file = _task_file |
| 291 | config.bench_mode = "tasks" |
Alex | 90ac153 | 2021-12-09 11:13:14 -0600 | [diff] [blame^] | 292 | config.bench_name = args_utils.get_arg(args, "name") |
| 293 | _opts["name"] = config.bench_name |
| 294 | logger_cli.info( |
| 295 | "# Using '{}' as ceph bench jobs name".format(_opts["name"]) |
| 296 | ) |
Alex | 5cace3b | 2021-11-10 16:40:37 -0600 | [diff] [blame] | 297 | logger_cli.debug("... default/selected options for fio:") |
| 298 | for _k in _opts.keys(): |
| 299 | # TODO: Update options for single run |
| 300 | logger_cli.debug(" {} = {}".format(_k, _opts[_k])) |
Alex | dcb792f | 2021-10-04 14:24:21 -0500 | [diff] [blame] | 301 | |
Alex | 3034ba5 | 2021-11-13 17:06:45 -0600 | [diff] [blame] | 302 | # handle option inavailability from command line for single mode |
| 303 | |
| 304 | # init the Bench class |
Alex | dcb792f | 2021-10-04 14:24:21 -0500 | [diff] [blame] | 305 | ceph_bench = bench.KubeCephBench(config) |
Alex | b212954 | 2021-11-23 15:49:42 -0600 | [diff] [blame] | 306 | ceph_bench.set_ceph_info_class(ceph_info) |
Alex | 90ac153 | 2021-12-09 11:13:14 -0600 | [diff] [blame^] | 307 | # Preload previous results for this name |
| 308 | ceph_bench.preload_results() |
Alex | dcb792f | 2021-10-04 14:24:21 -0500 | [diff] [blame] | 309 | # Do the testrun |
Alex | 5cace3b | 2021-11-10 16:40:37 -0600 | [diff] [blame] | 310 | ceph_bench.prepare_agents(_opts) |
Alex | b212954 | 2021-11-23 15:49:42 -0600 | [diff] [blame] | 311 | ceph_bench.wait_ceph_cooldown() |
| 312 | |
| 313 | # DEBUG of report in progress |
Alex | 5cace3b | 2021-11-10 16:40:37 -0600 | [diff] [blame] | 314 | if not ceph_bench.run_benchmark(_opts): |
Alex | 2a7657c | 2021-11-10 20:51:34 -0600 | [diff] [blame] | 315 | # No cleaning and/or report if benchmark was not finished |
Alex | bfa947c | 2021-11-11 18:14:28 -0600 | [diff] [blame] | 316 | logger_cli.info("# Abnormal benchmark run, no cleaning performed") |
Alex | 5cace3b | 2021-11-10 16:40:37 -0600 | [diff] [blame] | 317 | return |
Alex | b212954 | 2021-11-23 15:49:42 -0600 | [diff] [blame] | 318 | # Remove after DEBUG |
| 319 | # ceph_bench.collect_results(_opts) |
| 320 | # END DEBUG |
| 321 | |
Alex | 3034ba5 | 2021-11-13 17:06:45 -0600 | [diff] [blame] | 322 | # Cleaning |
Alex | 2a7657c | 2021-11-10 20:51:34 -0600 | [diff] [blame] | 323 | if not config.no_cleaning_after_benchmark: |
| 324 | ceph_bench.cleanup() |
Alex | bfa947c | 2021-11-11 18:14:28 -0600 | [diff] [blame] | 325 | else: |
| 326 | logger_cli.info( |
| 327 | "# '--no-cleaning' option set. Cleaning not conducted." |
| 328 | ) |
Alex | dcb792f | 2021-10-04 14:24:21 -0500 | [diff] [blame] | 329 | |
| 330 | # Create report |
Alex | 5cace3b | 2021-11-10 16:40:37 -0600 | [diff] [blame] | 331 | ceph_bench.create_report(_filename) |
Alex | dcb792f | 2021-10-04 14:24:21 -0500 | [diff] [blame] | 332 | |
| 333 | return |