ceph bench module hotfixes after client run
Related-PROD: PROD-37028
Change-Id: Ied20709e367877ca0be1c9bd531476070059de10
diff --git a/cfg_checker/modules/ceph/__init__.py b/cfg_checker/modules/ceph/__init__.py
index e2f0049..5c9357b 100644
--- a/cfg_checker/modules/ceph/__init__.py
+++ b/cfg_checker/modules/ceph/__init__.py
@@ -1,7 +1,5 @@
# Author: Alex Savatieiev (osavatieiev@mirantis.com; a.savex@gmail.com)
# Copyright 2019-2022 Mirantis, Inc.
-from datetime import datetime
-
from cfg_checker.agent.fio_runner import get_fio_options
from cfg_checker.agent.fio_runner import seq_modes, mix_modes
from cfg_checker.common import logger_cli
@@ -337,6 +335,8 @@
logger_cli.info("-> running with tasks from '{}'".format(_task_file))
config.bench_task_file = _task_file
config.bench_mode = "tasks"
+ # Add default size to options
+ _opts["size"] = _get_param_and_log(args, "size")
logger_cli.debug("... default/selected options for fio:")
for _k in _opts.keys():
# TODO: Update options for single run
diff --git a/cfg_checker/modules/ceph/bench.py b/cfg_checker/modules/ceph/bench.py
index fbb0a13..95e39bf 100644
--- a/cfg_checker/modules/ceph/bench.py
+++ b/cfg_checker/modules/ceph/bench.py
@@ -11,6 +11,7 @@
from cfg_checker.common.decorators import retry
from cfg_checker.common.file_utils import write_str_to_file
from cfg_checker.helpers.console_utils import Progress
+from cfg_checker.helpers.console_utils import cl_typewriter
from cfg_checker.reports import reporter
# from cfg_checker.common.exception import InvalidReturnException
# from cfg_checker.common.exception import ConfigException
@@ -127,7 +128,9 @@
"rwmixread": row[1],
"bs": row[2],
"iodepth": row[3],
- "size": row[4]
+ "size": row[4],
+ "ramp_time": row[5],
+ "runtime": row[6]
})
logger_cli.info("-> Loaded {} tasks".format(len(self.tasks)))
@@ -279,11 +282,12 @@
# All is good
return True
- def get_agents_status(self):
+ def get_agents_status(self, silent=True):
_status = {}
_results = self.master.exec_on_labeled_pods_and_ns(
"app=cfgagent",
- "curl -s http://localhost:8765/api/fio"
+ "curl -s http://localhost:8765/api/fio",
+ silent=silent
)
for _agent, _result in _results.items():
_j = _parse_json_output(_result)
@@ -353,23 +357,40 @@
_stats_delay = round((_runtime + _ramptime) / 65)
_start = self.next_scheduled_time
_end = datetime.now(timezone.utc) + timedelta(seconds=_timeout)
+ logger_cli.info(" ")
+ tw = cl_typewriter()
while True:
# Print status
- _sts = self.get_agents_status()
+ tw.cl_start(" ")
+ _sts = self.get_agents_status(silent=True)
+ # Use same line
diff = (_end - datetime.now(timezone.utc)).total_seconds()
- logger_cli.info("-> {:.2f}s left. Agent status:".format(diff))
- for _agent, _status in _sts.items():
- logger_cli.info(
- "\t{}: {} ({}%)".format(
- _agent,
- _status["status"],
- _status["progress"]
+ _startin = (_start - datetime.now(timezone.utc)).total_seconds()
+ if _startin > 0:
+ tw.cl_inline("-> starting in {:.2f}s ".format(_startin))
+ else:
+ tw.cl_inline("-> {:.2f}s; ".format(diff))
+ _progress = [_st["progress"] for _st in _sts.values()]
+ tw.cl_inline(
+ "{}% <-> {}%; ".format(
+ min(_progress),
+ max(_progress)
)
)
+
+ _a_sts = [_t["status"] for _t in _sts.values()]
+ tw.cl_inline(
+ ", ".join(
+ ["{} {}".format(_a_sts.count(_s), _s)
+ for _s in set(_a_sts)]
+ )
+ )
+
# Get Ceph status if _start time passed
_elapsed = (datetime.now(timezone.utc) - _start).total_seconds()
if _elapsed > _stats_delay:
- logger_cli.info("-> {:.2f}s elapsed".format(_elapsed))
+ # Use same line output
+ tw.cl_inline(" {:.2f}s elapsed".format(_elapsed))
_sec = "{:0.1f}".format(_elapsed)
self.results[options["scheduled_to"]]["ceph"][_sec] = \
self.ceph_info.get_cluster_status()
@@ -379,16 +400,19 @@
_fcnt = len(finished)
_tcnt = len(_sts)
if _fcnt < _tcnt:
- logger_cli.info("-> {}/{} finished".format(_fcnt, _tcnt))
+ tw.cl_inline(" {}/{}".format(_fcnt, _tcnt))
else:
+ tw.cl_flush(newline=True)
logger_cli.info("-> All agents finished run")
return True
# recalc how much is left
diff = (_end - datetime.now(timezone.utc)).total_seconds()
# In case end_datetime was in past to begin with
if diff < 0:
+ tw.cl_flush(newline=True)
logger_cli.info("-> Timed out waiting for agents to finish")
return False
+ tw.cl_flush()
def _do_testrun(self, options):
self.results[options["scheduled_to"]]["osd_df_before"] = \
@@ -573,8 +597,8 @@
if _time not in self.results:
# Some older results found
# do not process them
- logger_cli.info(
- "-> Skipped old results for '{}'".format(_time)
+ logger_cli.debug(
+ "...skipped old results for '{}'".format(_time)
)
continue
elif _agent not in self.results[_time]["agents"]:
@@ -690,6 +714,11 @@
return None
def calculate_totals(self):
+ def _savg(vlist):
+ if len(vlist) > 0:
+ return (sum(vlist) / len(vlist)) / 1000
+ else:
+ return 0
# Calculate totals for Read and Write
for _time, data in self.results.items():
if "totals" not in data:
@@ -731,16 +760,12 @@
)
_totals["read_bw_bytes"] = _r_bw
- _totals["read_avg_lat_us"] = \
- (sum(_r_avglat) / len(_r_avglat)) / 1000
- _totals["read_95p_clat_us"] = \
- (sum(_r_95clat) / len(_r_95clat)) / 1000
+ _totals["read_avg_lat_us"] = _savg(_r_avglat)
+ _totals["read_95p_clat_us"] = _savg(_r_95clat)
_totals["read_iops"] = _r_iops
_totals["write_bw_bytes"] = _w_bw
- _totals["write_avg_lat_us"] = \
- (sum(_w_avglat) / len(_w_avglat)) / 1000
- _totals["write_95p_clat_us"] = \
- (sum(_w_95clat) / len(_w_95clat)) / 1000
+ _totals["write_avg_lat_us"] = _savg(_w_avglat)
+ _totals["write_95p_clat_us"] = _savg(_w_95clat)
_totals["write_iops"] = _w_iops
def calculate_ceph_stats(self):
@@ -830,7 +855,7 @@
data["osd_summary"]["active"] = {
"status": "",
"device_class": "",
- "pgs": "",
+ "pgs": 0,
"kb_used": 0,
"kb_used_data": 0,
"kb_used_omap": 0,
diff --git a/cfg_checker/modules/ceph/info.py b/cfg_checker/modules/ceph/info.py
index 1f3b8cb..1fb9205 100644
--- a/cfg_checker/modules/ceph/info.py
+++ b/cfg_checker/modules/ceph/info.py
@@ -8,7 +8,7 @@
from time import sleep
from datetime import datetime
-from cfg_checker.common import logger_cli
+from cfg_checker.common import logger_cli, logger
from cfg_checker.common.exception import KubeException
from cfg_checker.helpers.console_utils import Progress
@@ -204,15 +204,15 @@
_fname = d["filename"]
if isinstance(d["data"], dict) or isinstance(d["data"], list):
_buf = json.dumps(d["data"], indent=2)
- # _filename = key + ".json" if _fname is not None else _fname
+ # _filename = key+".json" if _fname is not None else _fname
_filename = _ensure_fname(".json")
elif isinstance(d["data"], str):
_buf = d["data"]
- # _filename = key + ".txt"
+ # _filename = key+".txt"
_filename = _ensure_fname(".txt")
else:
_buf = str(d["data"])
- # _filename = key + ".txt"
+ # _filename = key+".txt"
_filename = _ensure_fname(".txt")
logger_cli.debug("... writing '{}'".format(_filename))
_tgz.add_file(_filename, buf=_buf, replace=True)
@@ -279,9 +279,9 @@
cmd_str
)
if expect_output and not _r:
- logger_cli.debug("... got empty output for '{}'".format(cmd_str))
+ logger.debug("... got empty output for '{}'".format(cmd_str))
elif not expect_output and _r:
- logger_cli.warning(
+ logger.warning(
"WARNING: Unexpected output for '{}':\n"
"===== Start\n{}\n===== End".format(cmd_str, _r)
)