cfg-checker ceph bench module alpha version
- Ceph benchmark report (beta)
- Updated result time choosing. Now results reported based on start time
- New methods for listing
- Cleanup-only mode
- Unified results processing
- Additional ceph info gather
- Experimental barchart graph example
Fixes:
- Kube API client recreated each time for stability (HTTP/WebSocket specifics)
- args naming fixes
-
Change-Id: Id541f789a00ab4ee827603c5b6f7f07899aaa7c5
diff --git a/cfg_checker/agent/fio_runner.py b/cfg_checker/agent/fio_runner.py
index c8488af..3cc6ca2 100644
--- a/cfg_checker/agent/fio_runner.py
+++ b/cfg_checker/agent/fio_runner.py
@@ -257,6 +257,7 @@
self._fio_options_common["ioengine"] = "posixaio"
# Thread finish marker
self.finished = False
+ self.testrun_starttime = None
self.scheduled_datetime = None
def update_options(self, _dict):
@@ -296,8 +297,8 @@
_q = queue.Queue()
self.fiorun = ShellThread(_cmd, _q)
# Check if schedule is set
+ _now = datetime.now(timezone.utc)
if self.scheduled_datetime:
- _now = datetime.now(timezone.utc)
logger.debug(
"waiting for '{}', now is '{}', total of {} sec left".format(
self.scheduled_datetime.strftime(_datetime_fmt),
@@ -306,6 +307,8 @@
)
)
wait_until(self.scheduled_datetime)
+ else:
+ self.testrun_starttime = _now.strftime(_datetime_fmt)
self.fiorun.run_shell()
_raw = []
_start = -1
@@ -360,11 +363,13 @@
break
sleep(0.1)
# Save status to results dictionary
- self.results[get_time(timestamp=self.testrun["timestamp"])] = {
+ self.results[self.testrun_starttime] = {
"result": self.testrun,
"timeline": self.timeline
}
self.finished = True
+ self.scheduled_datetime = None
+ self.testrun_starttime = None
return
def healthcheck(self):
@@ -511,9 +516,9 @@
raise CheckerException("Parameter missing: 'scheduled_to'")
else:
# set time and get rid of it from options
- _time = options.pop("scheduled_to")
+ self.fio.testrun_starttime = options.pop("scheduled_to")
self.fio.scheduled_datetime = datetime.strptime(
- _time,
+ self.fio.testrun_starttime,
_datetime_fmt
)
# Fill options
@@ -612,7 +617,7 @@
_opts["readwrite"] = "read"
_opts["ramp_time"] = "1s"
_opts["runtime"] = "5s"
- _opts["scheduled_to"] = "11/13/2021, 23:03:30+0000"
+ _opts["scheduled_to"] = "11/23/2021, 21:48:20+0000"
_shell.do_scheduledrun(_opts)
_shell()
_times = _shell.get_resultlist()
@@ -627,8 +632,8 @@
_opts["readwrite"] = "read"
_opts["ramp_time"] = "1s"
_opts["runtime"] = "10s"
- _opts["scheduled_to"] = "11/13/2021, 23:04:20+0000"
- _shell.do_scheduledrun(_opts)
+ # _opts["scheduled_to"] = "11/23/2021, 21:40:30+0000"
+ _shell.do_singlerun(_opts)
_shell()
_times = _shell.get_resultlist()
print("# results:\n{}".format("\n".join(_times)))
diff --git a/cfg_checker/common/file_utils.py b/cfg_checker/common/file_utils.py
index 6fbb675..faf7cf0 100644
--- a/cfg_checker/common/file_utils.py
+++ b/cfg_checker/common/file_utils.py
@@ -102,7 +102,7 @@
os.mkdir(_folder)
return "... folder '{}' created".format(_folder)
else:
- return "... folder is at '{}'".format(_folder)
+ return "... folder exists at '{}'".format(_folder)
def ensure_folder_removed(_folder):
diff --git a/cfg_checker/common/kube_utils.py b/cfg_checker/common/kube_utils.py
index 22eee30..3e15095 100644
--- a/cfg_checker/common/kube_utils.py
+++ b/cfg_checker/common/kube_utils.py
@@ -206,7 +206,6 @@
class KubeRemote(KubeApi):
def __init__(self, config):
super(KubeRemote, self).__init__(config)
- self._coreV1 = None
self._appsV1 = None
self._podV1 = None
self._custom = None
@@ -219,12 +218,10 @@
@property
def CoreV1(self):
- if not self._coreV1:
- if self.is_local:
- self._coreV1 = kclient.CoreV1Api(kclient.ApiClient())
- else:
- self._coreV1 = kclient.CoreV1Api(kclient.ApiClient(self.kConf))
- return self._coreV1
+ if self.is_local:
+ return kclient.CoreV1Api(kclient.ApiClient())
+ else:
+ return kclient.CoreV1Api(kclient.ApiClient(self.kConf))
@property
def AppsV1(self):
@@ -377,6 +374,7 @@
return _pods
+ @retry(ApiException, initial_wait=5)
def exec_on_target_pod(
self,
cmd,
@@ -425,6 +423,7 @@
cmd = cmd if isinstance(cmd, list) else cmd.split()
if arguments:
cmd += [arguments]
+ # Make sure that CoreV1 is fresh before calling it
_pod_stream = stream(
self.CoreV1.connect_get_namespaced_pod_exec,
_pname,
@@ -453,8 +452,6 @@
)
if not _output:
_output = _error
- # Force recreate of api objects
- self._coreV1 = None
# Send output
return _output
@@ -596,7 +593,7 @@
return []
- @retry(ApiException)
+ @retry(ApiException, initial_wait=5)
def get_pods_for_daemonset(self, ds):
# get all pod names for daemonset
logger_cli.debug(
@@ -612,6 +609,7 @@
)
return _pods
+ @retry(ApiException, initial_wait=5)
def put_string_buffer_to_pod_as_textfile(
self,
pod_name,
@@ -653,15 +651,12 @@
logger_cli.debug("... STDERR: %s" % response.read_stderr())
if commands:
c = commands.pop(0)
- logger_cli.debug("... running command... {}\n".format(c))
+ logger_cli.debug("... running command... {}".format(c))
response.write_stdin(str(c, encoding='utf-8'))
else:
break
response.close()
- # Force recreate of Api objects
- self._coreV1 = None
-
return
def get_custom_resource(self, group, version, plural):
@@ -824,6 +819,12 @@
name
)
+ def list_pods(self, ns, label_str=None):
+ return self.CoreV1.list_namespaced_pod(
+ ns,
+ label_selector=label_str
+ )
+
def get_svc_by_name_and_ns(self, name, ns):
return self.safe_get_item_by_name(
self.CoreV1.list_namespaced_service(
@@ -833,6 +834,12 @@
name
)
+ def list_svc(self, ns, label_str=None):
+ return self.CoreV1.list_namespaced_service(
+ ns,
+ label_selector=label_str
+ )
+
def get_pvc_by_name_and_ns(self, name, ns):
return self.safe_get_item_by_name(
self.CoreV1.list_namespaced_persistent_volume_claim(
@@ -842,6 +849,12 @@
name
)
+ def list_pvc(self, ns, label_str=None):
+ return self.CoreV1.list_namespaced_persistent_volume_claim(
+ ns,
+ label_selector=label_str
+ )
+
def get_pv_by_name(self, name):
return self.safe_get_item_by_name(
self.CoreV1.list_persistent_volume(
@@ -850,6 +863,11 @@
name
)
+ def list_pv(self, label_str=None):
+ return self.CoreV1.list_persistent_volume(
+ label_selector=label_str
+ )
+
def wait_for_phase(self, ttype, name, ns, phase_list, timeout=120):
logger_cli.debug(
"... waiting '{}'s until {} is '{}'".format(
diff --git a/cfg_checker/modules/ceph/__init__.py b/cfg_checker/modules/ceph/__init__.py
index 0f1de01..f9bf3ca 100644
--- a/cfg_checker/modules/ceph/__init__.py
+++ b/cfg_checker/modules/ceph/__init__.py
@@ -89,7 +89,7 @@
)
ceph_bench_parser.add_argument(
'--task-file',
- metavar='task-file',
+ metavar='task_file',
help="Task file for benchmark"
)
ceph_bench_parser.add_argument(
@@ -97,6 +97,16 @@
action="store_true", default=False,
help="Do not cleanup services, agents, pvc, and pv"
)
+ ceph_bench_parser.add_argument(
+ '--cleanup-only',
+ action="store_true", default=False,
+ help="Cleanup resources related to benchmark"
+ )
+ ceph_bench_parser.add_argument(
+ '--dump-path',
+ metavar="dump_results", default="/tmp",
+ help="Dump result after each test run to use them later"
+ )
return _parser
@@ -149,8 +159,29 @@
def do_bench(args, config):
# Ceph Benchmark using multiple pods
+ # if only cleanup needed do it and exit
+ _cleanup_only = args_utils.get_arg(args, 'cleanup_only')
+ config.resource_prefix = "cfgagent"
+ if _cleanup_only:
+ # Do forced resource cleanup and exit
+ config.bench_mode = "cleanup"
+ config.bench_agent_count = -1
+ ceph_bench = bench.KubeCephBench(config)
+ logger_cli.info(
+ "# Discovering benchmark resources using prefix of '{}'".format(
+ config.resource_prefix
+ )
+ )
+ ceph_bench.prepare_cleanup()
+ ceph_bench.cleanup()
+ return
+
+ # gather Ceph info
+ logger_cli.info("# Collecting Ceph cluster information")
+ ceph_info = info.KubeCephInfo(config)
+
# Prepare the tasks and do synced testrun or a single one
- logger_cli.info("# Initializing benchmark run")
+ logger_cli.info("# Initializing ceph benchmark module")
args_utils.check_supported_env(ENV_TYPE_KUBE, args, config)
_filename = args_utils.get_arg(args, 'html')
# agents count option
@@ -161,6 +192,17 @@
_storage_class = args_utils.get_arg(args, "storage_class")
logger_cli.info("-> using storage class of '{}'".format(_storage_class))
config.bench_storage_class = _storage_class
+ # dump results options
+ _dump_path = args_utils.get_arg(args, "dump_path")
+ if _dump_path:
+ logger_cli.info("# Results will be dumped to '{}'".format(_dump_path))
+ config.bench_results_dump_path = _dump_path
+ else:
+ logger_cli.info(
+ "# No result dump path set. "
+ "Consider setting it if running long task_file based test runs"
+ )
+ config.bench_results_dump_path = _dump_path
# Task files or options
_task_file = args_utils.get_arg(args, "task_file", nofail=True)
if not _task_file:
@@ -180,12 +222,20 @@
# init the Bench class
ceph_bench = bench.KubeCephBench(config)
+ ceph_bench.set_ceph_info_class(ceph_info)
# Do the testrun
ceph_bench.prepare_agents(_opts)
+ ceph_bench.wait_ceph_cooldown()
+
+ # DEBUG of report in progress
if not ceph_bench.run_benchmark(_opts):
# No cleaning and/or report if benchmark was not finished
logger_cli.info("# Abnormal benchmark run, no cleaning performed")
return
+ # Remove after DEBUG
+ # ceph_bench.collect_results(_opts)
+ # END DEBUG
+
# Cleaning
if not config.no_cleaning_after_benchmark:
ceph_bench.cleanup()
diff --git a/cfg_checker/modules/ceph/bench.py b/cfg_checker/modules/ceph/bench.py
index 7640440..d804f4a 100644
--- a/cfg_checker/modules/ceph/bench.py
+++ b/cfg_checker/modules/ceph/bench.py
@@ -7,7 +7,9 @@
from cfg_checker.common import logger_cli
from cfg_checker.common.decorators import retry
+from cfg_checker.common.file_utils import write_str_to_file
from cfg_checker.helpers.console_utils import Progress
+from cfg_checker.reports import reporter
# from cfg_checker.common.exception import InvalidReturnException
# from cfg_checker.common.exception import ConfigException
# from cfg_checker.common.exception import KubeException
@@ -16,6 +18,27 @@
from cfg_checker.agent.fio_runner import _get_seconds, _datetime_fmt
+def _reformat_timestr(_str, _chars=["/", ",", " ", ":", "+"], _tchar=""):
+ _new = ""
+ for _c in _str:
+ _new += _c if _c not in _chars else _tchar
+ return _new
+
+
+def _parse_json_output(buffer):
+ try:
+ return json.loads(buffer)
+ except TypeError as e:
+ logger_cli.error(
+ "ERROR: Status not decoded: {}\n{}".format(e, buffer)
+ )
+ except json.decoder.JSONDecodeError as e:
+ logger_cli.error(
+ "ERROR: Status not decoded: {}\n{}".format(e, buffer)
+ )
+ return {}
+
+
class CephBench(object):
_agent_template = "cfgagent-template.yaml"
@@ -41,17 +64,30 @@
self.agent_count = config.bench_agent_count
self.master = KubeNodes(config)
super(KubeCephBench, self).__init__(config)
- self.storage_class = config.bench_storage_class
- self.agent_pods = []
- self.services = []
- self.scheduled_delay = 30
+
self.mode = config.bench_mode
+ self.resource_prefix = config.resource_prefix
if config.bench_mode == "tasks":
self.taskfile = config.bench_task_file
self.load_tasks(self.taskfile)
+ elif config.bench_mode == "cleanup":
+ self.cleanup_list = []
+ return
+
+ self.storage_class = config.bench_storage_class
+ self.results_dump_path = config.bench_results_dump_path
+ self.agent_pods = []
+ self.services = []
+ # By default,
+ # 30 seconds should be enough to send tasks to 3-5 agents
+ self.scheduled_delay = 30
self.cleanup_list = []
self.results = {}
+ self.agent_results = {}
+
+ def set_ceph_info_class(self, ceph_info):
+ self.ceph_info = ceph_info
def load_tasks(self, taskfile):
# Load csv file
@@ -68,10 +104,33 @@
"iodepth": row[3],
"size": row[4]
})
+ logger_cli.info("-> Loaded {} tasks".format(len(self.tasks)))
def add_for_deletion(self, obj, typ):
- _d = [typ, obj.metadata.namespace, obj.metadata.name]
- self.cleanup_list.append(_d)
+ self.cleanup_list.append(
+ [
+ typ,
+ obj.metadata.namespace,
+ obj.metadata.name
+ ]
+ )
+ return
+
+ def prepare_cleanup(self):
+ # Assume number of resources not given
+ # list all svc, pod, pvc, pv and identify 'cfgagent-xx ones
+ _types = ["pv", "pvc", "pod", "svc"]
+ _prefix = self.resource_prefix
+ for _typ in _types:
+ _list = self.master.list_resource_names_by_type_and_ns(_typ)
+ for ns, name in _list:
+ if name.startswith(_prefix):
+ if ns:
+ _msg = "{} {}/{}".format(_typ, ns, name)
+ else:
+ _msg = "{} {}".format(_typ, name)
+ logger_cli.info("-> Found {}".format(_msg))
+ self.cleanup_list.append([_typ, ns, name])
return
def prepare_agents(self, options):
@@ -98,19 +157,23 @@
# Save service
self.services.append(_svc)
# prepopulate results
- self.results[_agent.metadata.name] = {}
- self.results[_agent.metadata.name]["list"] = {}
- self.results[_agent.metadata.name]["url"] = \
+ self.agent_results[_agent.metadata.name] = {}
+ self.agent_results[_agent.metadata.name]["url"] = \
"http://{}:{}/api/".format(
_svc.spec.cluster_ip,
8765
)
- self.results[_agent.metadata.name]["storage_class"] = \
+ self.agent_results[_agent.metadata.name]["storage_class"] = \
self.storage_class
- self.results[_agent.metadata.name]["volume_size"] = \
+ self.agent_results[_agent.metadata.name]["volume_size"] = \
options['size']
logger_cli.info("-> Done creating agents")
+ # TODO: Update after implementing pooled task sending
+ self.scheduled_delay = self.agent_count * 6
+ logger_cli.info(
+ "-> Schedule delay set to {} sec".format(self.scheduled_delay)
+ )
return
def _poke_agent(self, url, body, action="GET"):
@@ -141,18 +204,7 @@
self.master._namespace,
" ".join(_cmd)
)
- try:
- return json.loads(_ret)
- except TypeError as e:
- logger_cli.error(
- "ERROR: Status not decoded: {}\n{}".format(e, _ret)
- )
- except json.decoder.JSONDecodeError as e:
- logger_cli.error(
- "ERROR: Status not decoded: {}\n{}".format(e, _ret)
- )
-
- return None
+ return _parse_json_output(_ret)
def _ensure_agents_ready(self):
# make sure agents idle
@@ -190,18 +242,24 @@
def get_agents_status(self):
_status = {}
- for _agent, _d in self.results.items():
- _status[_agent] = self._poke_agent(_d["url"] + "fio", {})
+ _results = self.master.exec_on_labeled_pods_and_ns(
+ "app=cfgagent",
+ "curl -s http://localhost:8765/api/fio"
+ )
+ for _agent, _result in _results.items():
+ _j = _parse_json_output(_result)
+ _status[_agent] = _j
return _status
+ @retry(Exception, initial_wait=5)
def get_agents_resultlist(self):
_t = {"module": "fio", "action": "get_resultlist"}
_status = {}
- for _agent, _d in self.results.items():
+ for _agent, _d in self.agent_results.items():
_status[_agent] = self._poke_agent(_d["url"], _t, action="POST")
return _status
- @retry(Exception)
+ @retry(Exception, initial_wait=5)
def get_result_from_agent(self, agent, time):
_t = {
"module": "fio",
@@ -210,7 +268,11 @@
"time": time
}
}
- return self._poke_agent(self.results[agent]["url"], _t, action="POST")
+ return self._poke_agent(
+ self.agent_results[agent]["url"],
+ _t,
+ action="POST"
+ )
def _get_next_scheduled_time(self):
_now = datetime.now(timezone.utc)
@@ -228,7 +290,7 @@
"action": "do_scheduledrun",
"options": options
}
- for _agent, _d in self.results.items():
+ for _agent, _d in self.agent_results.items():
logger_cli.info(
"-> sending task to '{}:{}'".format(_agent, _d["url"])
)
@@ -261,7 +323,7 @@
_status["progress"]
)
)
- finished = [True for _s in _sts.values()
+ finished = [True for _s in _sts.values()
if _s["status"] == 'finished']
_fcnt = len(finished)
_tcnt = len(_sts)
@@ -276,8 +338,9 @@
if diff < 0:
logger_cli.info("-> Timed out waiting for agents to finish")
return False
- logger_cli.info("-> Sleeping for {:.2f}s".format(diff/3))
- sleep(diff/3)
+ else:
+ logger_cli.info("-> Sleeping for {:.2f}s".format(2))
+ sleep(2)
if diff <= 0.1:
logger_cli.info("-> Timed out waiting for agents to finish")
return False
@@ -292,12 +355,17 @@
else:
logger_cli.info("-> Finished testrun")
# Get results for each agent
- self.collect_results()
+ self.collect_results(options)
return True
- def _wait_ceph_cooldown(self):
+ def wait_ceph_cooldown(self):
# TODO: Query Ceph ince a 20 sec to make sure its load dropped
+ # get ceph idle status
+ self.ceph_idle_status = self.ceph_info.get_cluster_status()
+ self.health_detail = self.ceph_info.get_health_detail()
+ self.ceph_df = self.ceph_info.get_ceph_df()
+ self.ceph_pg_dump = self.ceph_info.get_ceph_pg_dump()
return
def run_benchmark(self, options):
@@ -309,7 +377,9 @@
# Make sure that Ceph is at low load
# TODO: Ceph status check
- self._wait_ceph_cooldown()
+ # self._wait_ceph_cooldown()
+
+ _get_df = self.ceph_info.get_ceph_osd_df
# Do benchmark according to mode
if self.mode == "tasks":
@@ -321,6 +391,8 @@
# take next task
_total_tasks = len(self.tasks)
for idx in range(_total_tasks):
+ # init time to schedule
+ _osd_df_before = _get_df()
_task = self.tasks[idx]
logger_cli.info(
"-> Starting next task ({}/{})".format(idx+1, _total_tasks)
@@ -333,18 +405,36 @@
)
# update options
options.update(_task)
- # init time to schedule
- options["scheduled_to"] = self._get_next_scheduled_time()
+ _sch_time = self._get_next_scheduled_time()
+ options["scheduled_to"] = _sch_time
+ # init results table
+ self.results[_sch_time] = {
+ "input_options": options,
+ "agents": {},
+ "osd_df_before": _osd_df_before
+ }
if not self._do_testrun(options):
return False
+ else:
+ self.results[_sch_time]["osd_df_after"] = _get_df()
- self._wait_ceph_cooldown()
+ self.wait_ceph_cooldown()
elif self.mode == "single":
logger_cli.info("# Running single benchmark")
+ _osd_df_before = _get_df()
# init time to schedule
- options["scheduled_to"] = self._get_next_scheduled_time()
+ _sch_time = self._get_next_scheduled_time()
+ options["scheduled_to"] = _sch_time
+ # init results table
+ self.results[_sch_time] = {
+ "input_options": options,
+ "agents": {},
+ "osd_df_before": _osd_df_before
+ }
if not self._do_testrun(options):
return False
+ else:
+ self.results[_sch_time]["osd_df_after"] = _get_df()
else:
logger_cli.error("ERROR: Unknown mode '{}'".format(self.mode))
return False
@@ -354,6 +444,7 @@
return True
def cleanup(self):
+ logger_cli.info("# Cleaning up")
self.cleanup_list.reverse()
for _res in self.cleanup_list:
@@ -385,39 +476,179 @@
return
- def collect_results(self):
+ def collect_results(self, options):
+ _sch_time = options["scheduled_to"]
logger_cli.info("# Collecting results")
# query agents for results
_agents = self.get_agents_resultlist()
+ # Syntax shortcut
+ _ar = self.results[_sch_time]["agents"]
for _agent, _l in _agents.items():
- _list = _l["resultlist"]
- _new = [r for r in _list if r not in self.results[_agent]["list"]]
- logger_cli.debug(
- "... agent '{}' has {} new results".format(_agent, len(_new))
- )
- # get all new results
- for _time in _new:
- logger_cli.info(
- "-> loading results for '{}' from '{}'".format(
- _time,
- _agent
+ # Create a syntax shortcut
+ if _agent not in _ar:
+ _ar[_agent] = {}
+ _arl = _ar[_agent]
+ # Check if we already have this locally
+ for _time in _l["resultlist"]:
+ _filename = self._get_dump_filename(_sch_time, _agent, options)
+ if os.path.exists(_filename):
+ # There is a file already for this task
+ # Check if we need to load it
+ if _sch_time in _arl:
+ logger_cli.info(
+ "-> Skipped already processed result '{}'".format(
+ _filename
+ )
+ )
+ else:
+ # Load previously dumped result from disk
+ logger_cli.info(
+ "-> Loading already present result '{}'".format(
+ _filename
+ )
+ )
+ _arl[_sch_time] = self.load_dumped_result(_filename)
+ else:
+ # Load result add it locally and dump it
+ logger_cli.info(
+ "-> Getting results for '{}' from '{}'".format(
+ _sch_time,
+ _agent
+ )
)
- )
- self.results[_agent]["list"].update(
- self.get_result_from_agent(_agent, _time)
- )
+ _r = self.get_result_from_agent(_agent, _time)
+ # Important to switch from result status time
+ # to scheduled time
+ _arl[_sch_time] = _r[_time]
+ # Dump collected result
+ self.dump_result(_filename, _arl[_sch_time])
return
- def dump_results(self, path):
- # Function dumps all availabkle results as jsons to the given path
+ def _get_dump_filename(self, _time, agent, options):
+ _dirname = _reformat_timestr(_time)
+ _filename = "-".join([
+ _dirname,
+ agent,
+ options["readwrite"],
+ options["bs"],
+ str(options["iodepth"]),
+ ]) + ".json"
+ return os.path.join(
+ self.results_dump_path,
+ _dirname,
+ _filename
+ )
+
+ def dump_result(self, filename, data):
+ # Function dumps all available results as jsons to the given path
# overwriting if needed
-
- # TODO: Conduct the dumping
-
+ _folder, _file = os.path.split(filename)
+ # Do dump
+ if not os.path.exists(_folder):
+ os.mkdir(_folder)
+ logger_cli.info("-> Created folder '{}'".format(_folder))
+ # Dump agent data for this test run
+ write_str_to_file(filename, json.dumps(data, indent=2))
+ logger_cli.info("-> Dumped '{}'".format(filename))
return
+ def load_dumped_result(self, filename):
+ try:
+ with open(filename, "rt+") as f:
+ return json.loads(f.read())
+ except FileNotFoundError as e:
+ logger_cli.error(
+ "ERROR: {}".format(e)
+ )
+ except TypeError as e:
+ logger_cli.error(
+ "ERROR: Invalid file ({}): {}".format(filename, e)
+ )
+ except json.decoder.JSONDecodeError as e:
+ logger_cli.error(
+ "ERROR: Failed to decode json ({}): {}".format(filename, e)
+ )
+ return None
+
+ def _lookup_storage_class_id_by_name(self, storage_class_name):
+ # Assume that self had proper data
+ for _pool in self.ceph_df["pools"]:
+ if storage_class_name == _pool["name"]:
+ return _pool["id"]
+ return None
+
+ def calculate_totals(self):
+ # Calculate totals for Read and Write
+ for _time, data in self.results.items():
+ if "totals" not in data:
+ data["totals"] = {}
+ else:
+ continue
+ _totals = data["totals"]
+ _r_bw = 0
+ _r_avglat = []
+ _r_iops = 0
+ _w_bw = 0
+ _w_avglat = []
+ _w_iops = 0
+ for _a, _d in data["agents"].items():
+ # Hardcoded number of jobs param :(
+ _j = _d[_time]["jobs"][0]
+ _r_bw += _j["read"]["bw_bytes"]
+ _r_avglat += [_j["read"]["lat_ns"]["mean"]]
+ _r_iops += _j["read"]["iops"]
+ _w_bw += _j["write"]["bw_bytes"]
+ _w_avglat += [_j["write"]["lat_ns"]["mean"]]
+ _w_iops += _j["write"]["iops"]
+ # Save storage class name
+ if "storage_class" not in _totals:
+ _totals["storage_class"] = \
+ self.agent_results[_a]["storage_class"]
+ # Lookup storage class id and num_pg
+ _totals["storage_class_stats"] = \
+ reporter.get_pool_stats_by_id(
+ self._lookup_storage_class_id_by_name(
+ self.agent_results[_a]["storage_class"]
+ ),
+ self.ceph_pg_dump
+ )
+
+ _totals["read_bw_bytes"] = _r_bw
+ _totals["read_avg_lat_us"] = \
+ (sum(_r_avglat) / len(_r_avglat)) / 1000
+ _totals["read_iops"] = _r_iops
+ _totals["write_bw_bytes"] = _w_bw
+ _totals["write_avg_lat_us"] = \
+ (sum(_w_avglat) / len(_w_avglat)) / 1000
+ _totals["write_iops"] = _w_iops
+
# Create report
def create_report(self, filename):
+ """
+ Create static html showing ceph info report
+
+ :return: none
+ """
+ logger_cli.info("### Generating report to '{}'".format(filename))
+ _report = reporter.ReportToFile(
+ reporter.HTMLCephBench(self),
+ filename
+ )
+ self.calculate_totals()
+ _report(
+ {
+ "results": self.results,
+ "idle_status": self.ceph_idle_status,
+ "health_detail": self.health_detail,
+ "ceph_df": self.ceph_df,
+ "ceph_pg_dump": self.ceph_pg_dump,
+ "info": self.ceph_info.ceph_info,
+ "cluster": self.ceph_info.cluster_info,
+ "ceph_version": self.ceph_info.ceph_version,
+ "nodes": self.agent_pods
+ }
+ )
+ logger_cli.info("-> Done")
return
diff --git a/cfg_checker/modules/ceph/info.py b/cfg_checker/modules/ceph/info.py
index 9b55c3f..56e250e 100644
--- a/cfg_checker/modules/ceph/info.py
+++ b/cfg_checker/modules/ceph/info.py
@@ -355,6 +355,21 @@
))
return self.cluster_info['version']
+ def get_cluster_status(self):
+ return self._safe_get_cmd_output_as_json("ceph -s -f json")
+
+ def get_health_detail(self):
+ return self._safe_get_cmd_output_as_json("ceph -f json health detail")
+
+ def get_ceph_df(self):
+ return self._safe_get_cmd_output_as_json("ceph df -f json")
+
+ def get_ceph_pg_dump(self):
+ return self._safe_get_cmd_output_as_json("ceph pg dump -f json")
+
+ def get_ceph_osd_df(self):
+ return self._safe_get_cmd_output_as_json("ceph osd df -f json")
+
def gather_info(self):
logger_cli.info("# Gathering Ceph cluster info")
# Collect info
@@ -394,14 +409,14 @@
self._add_ceph_info_item(
"cluster_status",
"Cluster status",
- _cj("ceph -s -f json")
+ self.get_cluster_status()
)
logger_cli.info("-> Collecting health detail")
self._add_ceph_info_item(
"health_detail",
"Health details",
- _cj("ceph -f json health detail")
+ self.get_health_detail()
)
logger_cli.info("-> Collecting monmap")
@@ -415,14 +430,14 @@
self._add_ceph_info_item(
"ceph_df",
"Ceph DF",
- _cj("ceph df -f json")
+ self.get_ceph_df()
)
logger_cli.info("-> Collecting ceph osd df")
self._add_ceph_info_item(
"ceph_osd_df",
"Ceph OSD DF",
- _cj("ceph osd df -f json")
+ self.get_ceph_osd_df()
)
logger_cli.info("-> Collecting ceph osd dump")
@@ -463,7 +478,7 @@
self._add_ceph_info_item(
"ceph_pg_dump",
"Ceph PG dump",
- _cj("ceph pg dump -f json")
+ self.get_ceph_pg_dump()
)
logger_cli.info("-> Collecting ceph running configuration")
diff --git a/cfg_checker/modules/network/mapper.py b/cfg_checker/modules/network/mapper.py
index dea7d4e..c853724 100644
--- a/cfg_checker/modules/network/mapper.py
+++ b/cfg_checker/modules/network/mapper.py
@@ -789,7 +789,7 @@
self.daemonset = _d
return self.daemonset
- def get_script_output(self, script, args=None):
+ def get_script_output(self, script, _args=None):
"""
Get runtime network by creating DaemonSet with Host network parameter
"""
@@ -801,7 +801,7 @@
_result = self.master.execute_cmd_on_daemon_set(
_daemonset,
script,
- args=args,
+ _args=_args,
is_script=True
)
@@ -823,7 +823,7 @@
_networks = None
if source == self.RUNTIME:
logger_cli.info("# Mapping node runtime network data")
- _r = self.get_script_output("ifs_data.py", args="json")
+ _r = self.get_script_output("ifs_data.py", _args="json")
_networks = self._map_runtime_networks(_r)
else:
raise ConfigException(
diff --git a/cfg_checker/nodes.py b/cfg_checker/nodes.py
index 49284ca..a673842 100644
--- a/cfg_checker/nodes.py
+++ b/cfg_checker/nodes.py
@@ -1085,12 +1085,44 @@
self,
ds,
cmd,
- args=None,
+ _args=None,
is_script=False
):
"""
Query daemonset for pods and execute script on all of them
"""
+ _results = self.exec_cmd_on_pods(
+ self.kube.get_pods_for_daemonset(ds),
+ cmd,
+ _args=_args,
+ is_script=is_script
+ )
+ # Update results
+ _ds_results = {}
+ for _n, _, _v in _results:
+ _ds_results[_n] = _v
+ return _ds_results
+
+ def exec_on_labeled_pods_and_ns(self, label_str, cmd, _args=None, ns=None):
+ if not ns:
+ ns = self._namespace
+ _results = self.exec_cmd_on_pods(
+ self.kube.list_pods(ns, label_str=label_str),
+ cmd,
+ _args=_args
+ )
+ _pod_results = {}
+ for _, _p, _v in _results:
+ _pod_results[_p] = _v
+ return _pod_results
+
+ def exec_cmd_on_pods(
+ self,
+ pod_list,
+ cmd,
+ _args=None,
+ is_script=False
+ ):
def _kube_exec_on_pod(plist):
return [
plist[1], # node
@@ -1105,16 +1137,15 @@
)
]
- _pods = self.kube.get_pods_for_daemonset(ds)
# Create map for threads: [[node_name, ns, pod_name, cmd]...]
logger_cli.debug(
"... runnning script on {} pods using {} threads at a time".format(
- len(_pods.items),
+ len(pod_list.items),
self.env_config.threads
)
)
_plist = []
- _arguments = args if args else ""
+ _arguments = _args if _args else ""
if is_script:
_cmd = [
"python3",
@@ -1133,7 +1164,7 @@
_arguments = cmd
else:
_cmd = cmd
- for item in _pods.items:
+ for item in pod_list.items:
_plist.append(
[
self,
@@ -1147,7 +1178,7 @@
# map func and cmd
pool = Pool(self.env_config.threads)
- _results = {}
+ _results = []
self.not_responded = []
# create result list
_progress = Progress(len(_plist))
@@ -1157,7 +1188,7 @@
if not ii[1][1]:
self.not_responded.append(ii[1][0])
else:
- _results[ii[1][0]] = ii[1][2]
+ _results.append(ii[1])
_progress.write_progress(ii[0])
_progress.end()
@@ -1375,3 +1406,17 @@
return _t.status.phase
else:
return None
+
+ def list_resource_names_by_type_and_ns(self, typ, ns="qa-space"):
+ if typ == "pod":
+ _items = self.kube.list_pods(ns)
+ elif typ == "svc":
+ _items = self.kube.list_svc(ns)
+ elif typ == "pvc":
+ _items = self.kube.list_pvc(ns)
+ elif typ == "pv":
+ _items = self.kube.list_pv()
+ else:
+ logger_cli.error("ERROR: '{}' is not supported yet".format(typ))
+ return None
+ return [[i.metadata.namespace, i.metadata.name] for i in _items.items]
diff --git a/cfg_checker/reports/reporter.py b/cfg_checker/reports/reporter.py
index dc9a2cf..150ce65 100644
--- a/cfg_checker/reports/reporter.py
+++ b/cfg_checker/reports/reporter.py
@@ -6,6 +6,7 @@
from cfg_checker.common import const
from cfg_checker.common import logger_cli
from cfg_checker.common.file_utils import read_file_as_lines
+from cfg_checker.modules.ceph.bench import _reformat_timestr
import jinja2
@@ -164,6 +165,10 @@
return _steps
+def time_strip(timestring):
+ return _reformat_timestr(timestring, _tchar="")
+
+
def get_osdmap(cs):
_osdmap = cs
while True:
@@ -185,7 +190,7 @@
}
-def get_pool_stats(id, pgdump):
+def get_pool_stats_by_id(id, pgdump):
_stats = {}
for pool in pgdump["pg_map"]["pool_stats"]:
if id == pool["poolid"]:
@@ -248,8 +253,9 @@
self.jinja2_env.filters['to_mb'] = to_mb
self.jinja2_env.filters['get_bucket_item_name'] = get_bucket_item_name
self.jinja2_env.filters['get_rule_steps'] = get_rule_steps
- self.jinja2_env.filters['get_pool_stats'] = get_pool_stats
+ self.jinja2_env.filters['get_pool_stats'] = get_pool_stats_by_id
self.jinja2_env.filters['get_osdmap'] = get_osdmap
+ self.jinja2_env.filters['tstrip'] = time_strip
# render!
logger_cli.info("-> Using template: {}".format(self.tmpl))
@@ -283,6 +289,10 @@
tmpl = "ceph_info_html.j2"
+class HTMLCephBench(_TMPLBase):
+ tmpl = "ceph_bench_html.j2"
+
+
# Package versions report
class HTMLModelCompare(_TMPLBase):
tmpl = "model_tree_cmp_tmpl.j2"
diff --git a/etc/taskfile_example b/etc/taskfile_example
new file mode 100644
index 0000000..1c1debb
--- /dev/null
+++ b/etc/taskfile_example
@@ -0,0 +1,18 @@
+randrw,10,4k,16,5G
+randrw,50,4k,16,5G
+randrw,70,4k,16,5G
+randrw,10,8k,16,5G
+randrw,50,8k,16,5G
+randrw,70,8k,16,5G
+randrw,10,16k,16,5G
+randrw,50,16k,16,5G
+randrw,70,16k,16,5G
+randrw,10,32k,16,5G
+randrw,50,32k,16,5G
+randrw,70,32k,16,5G
+randrw,10,64k,16,5G
+randrw,50,64k,16,5G
+randrw,70,64k,16,5G
+randrw,10,128k,16,5G
+randrw,50,128k,16,5G
+randrw,70,128k,16,5G
diff --git a/setup.py b/setup.py
index fd73a82..8fe0ffe 100644
--- a/setup.py
+++ b/setup.py
@@ -38,7 +38,7 @@
setup(
name="mcp-checker",
- version="0.64",
+ version="0.65",
author="Alex Savatieiev",
author_email="osavatieiev@mirantis.com",
classifiers=[
diff --git a/templates/bar_chart.j2 b/templates/bar_chart.j2
new file mode 100644
index 0000000..52a042e
--- /dev/null
+++ b/templates/bar_chart.j2
@@ -0,0 +1,76 @@
+<style>
+ .bc-wrap {
+ display: table;
+ position: relative;
+ margin: 7px 0;
+ height: 60px;
+ }
+ .bc-container {
+ display: table-cell;
+ width: 100%;
+ height: 100%;
+ padding-left: 15px;
+ }
+ .bc {
+ display: table;
+ height: 100%;
+ width: 100%;
+ border-bottom: 2px solid black;
+ }
+ .bccol {
+ position: relative;
+ vertical-align: bottom;
+ display: table-cell;
+ height: 100%;
+ }
+ .bcbar {
+ position: relative;
+ height: 0;
+ transition: height 0.5s 2s;
+ width: 25px;
+ margin: auto;
+ background-color: #358;
+ }
+ .bcfooter {
+ position: absolute;
+ text-align: center;
+ width: 100%;
+ top: 53px;
+ font-size: 10px;
+ }
+ .bctimecol {
+ position: absolute;
+ top: 0;
+ height: 100%;
+ width: 100%;
+ }
+ .bctime {
+ height: 15px;
+ vertical-align: middle;
+ position: relative;
+ }
+ .bctime:after {
+ border-bottom: 1px dotted black;
+ content: "";
+ position: absolute;
+ width: 100%;
+ left: 0;
+ top: 0em;
+ }
+ .bctimetext {
+ position: absolute;
+ top: -8px;
+ z-index: 1;
+ background: white;
+ padding-right: 5px;
+ color: #4d4d4d;
+ font-size: 8px;
+ font-family: 'Avenir Medium';
+ }
+ .red-bar {
+ background-color: darkred;
+ }
+ .green-bar {
+ background-color: green;
+ }
+</style>
diff --git a/templates/ceph_bench_html.j2 b/templates/ceph_bench_html.j2
new file mode 100644
index 0000000..e847cb4
--- /dev/null
+++ b/templates/ceph_bench_html.j2
@@ -0,0 +1,679 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="UTF-8">
+ <title>Ceph cluster info</title>
+ {% include 'common_styles.j2' %}
+ {% include 'common_scripts.j2' %}
+ {% include 'bar_chart.j2' %}
+ <style>
+ table.cluster_nodes {
+ width: 100%;
+ margin-left: 1%;
+ margin-right: 1%;
+ }
+ .barcontent {
+ margin: auto;
+ width: 1350px;
+ padding: 10px;
+ }
+ .bar-centered {
+ float: none;
+ transform: translate(25%);
+ }
+
+ /* Node rows*/
+ .node {
+ font-family: "LaoSangamMN", Monaco, monospace;
+ font-size: 0.8em;
+ display: flex;
+ background-color: white;
+ align-items: center;
+ }
+ .collapsable {
+ font-family: "LaoSangamMN", Monaco, monospace;
+ font-size: 0.8em;
+ display: none;
+ background-color: white;
+ visibility: hidden;
+ }
+ .collapsable.in {
+ visibility: visible;
+ display: inline-block;
+ }
+
+ .row_button {
+ background-color: #468;
+ color: #fff;
+ cursor: pointer;
+ padding: 5px;
+ width: 100%;
+ border: none;
+ text-align: left;
+ outline: none;
+ font-size: 13px;
+ }
+ .row_button:after {
+ content: '\02795'; /* Unicode character for "plus" sign (+) */
+ font-size: 13px;
+ color: white;
+ float: left;
+ margin-left: 5px;
+ }
+
+ .row_active:after {
+ content: "\2796"; /* Unicode character for "minus" sign (-) */
+ color: white
+ }
+
+ .row_active, .row_button:hover {
+ background-color: #68a;
+ color: white
+ }
+
+ .cell_button {
+ color: darkgreen;
+ cursor: pointer;
+ padding: 5px;
+ width: 100%;
+ border: none;
+ text-align: center;
+ outline: none;
+ }
+ .cell_button:hover {
+ background-color: gray;
+ }
+
+ .row_content {
+ padding: 0 18px;
+ background-color: white;
+ max-height: 0;
+ overflow: hidden;
+ transition: max-height 0.2s ease-out;
+ border-width: 1px;
+ border-color: #68a;
+ border-style: solid;
+ }
+
+ div.services > .collapsable.in {
+ display: table-row;
+ }
+ tr:nth-child(even) {
+ background-color: #eee;
+ }
+ tr:nth-child(odd) {
+ background-color: #fff;
+ }
+
+ tr.node > td, tr.collapsable > td {
+ display: block;
+ float: left;
+ padding: 1px;
+ margin: 2px;
+ }
+ td > .osd_group {
+ display: grid;
+ grid-template-columns: 40px 25px 25px 70px;
+ padding-left: 0px;
+ padding-right: 0px;
+ margin: 1px;
+ }
+ td > .props_group {
+ display: grid;
+ grid-template-columns: 60px 60px 80px 35px 45px 95px 50px 60px 45px;
+ padding-left: 0px;
+ padding-right: 0px;
+ margin: 1px;
+ }
+ td > .pg_group {
+ display: grid;
+ grid-template-columns: 50px 40px 60px 65px 60px 65px 65px;;
+ padding-left: 0px;
+ padding-right: 0px;
+ margin: 1px;
+ }
+ td > .bench_run_group {
+ display: grid;
+ grid-template-columns: 80px 80px 80px 80px 75px 75px;
+ padding-left: 0px;
+ padding-right: 0px;
+ margin: 1px;
+ }
+ td > .bench_group {
+ display: grid;
+ grid-template-columns: 80px 80px 75px 75px;
+ padding-left: 0px;
+ padding-right: 0px;
+ margin: 1px;
+ }
+ td > .meta_group {
+ display: inline-block;
+ grid-template-columns: repeat(4, auto);
+ padding-left: 0px;
+ padding-right: 0px;
+ margin: 1px;
+ }
+ .item {
+ display: inline-grid;
+ border-width: 1px;
+ border-style: solid;
+ margin: 1px 1px 1px 1px;
+ padding: 0px 1px 0px 1px;
+ }
+
+ .spacer { border-radius: 2px; width: 20px;}
+ .status { border-radius: 10px; width: 120px; text-align: center;}
+ .health_ok { background-color: #393; color: white;}
+ .health_error { background-color: #933; color: white;}
+ .health_warn { background-color: #eb3; color: #333;}
+ .checks_code { border-radius: 2px; width: 20%; background-color: transparent; color: darkred;}
+
+ .head { height: 18px; background-color: transparent; border-color: transparent; border: 0px;}
+ .centered { text-align: center;}
+ .right { text-align: right;}
+ .col_shortmessage { min-width: 300px; }
+ .col_longmessage { width: auto; }
+ .col_properties { width: auto;}
+
+ .srv_name { width: 300px }
+ .srv_path { width: 250px }
+ .srv_timestamp { width: 250px }
+ .srv_addr { width: 450px }
+
+ .id { width: 30px }
+ .bucket_name { width: 365px }
+ .bucket_type { width: 50px }
+ .bucket_params { width: 200px }
+ .bucket_items { width: 630px }
+
+ .df_name { width: 300px }
+ .df_total { width: 150px }
+ .df_avail { width: 150px }
+ .df_used { width: 150px }
+ .df_used_raw { width: 150px }
+ .df_used_raw_rate { width: 150px }
+
+ .rdf_name { width: 200px; }
+ .rdf_obj { width: 75px; }
+ .rdf_total { width: 100px; }
+ .rdf_used { width: 100px; }
+ .rdf_bench { width: 100px; }
+
+ .dev_name { width: 300px; }
+ .dev_param { width: 100px; }
+
+ .mon_name { width: 100px }
+ .mon_url { width: 500px }
+
+ .meters {
+ display: inline-block;
+ margin: 1px;
+ }
+ .meters > .meter {
+ display: block;
+ float: left;
+ border-width: 1px;
+ border-style: solid;
+ margin: 0px 1px 0px 1px;
+ padding: 0px 1px 0px 1px;
+
+ }
+ .meters > .warn {
+ border-color: #d3a200;
+ background-color: rgb(255, 216, 133);
+ }
+ .meters > .fail {
+ border-color: #bb0000;
+ background-color: rgb(250, 135, 135);
+ }
+ .osd { border-color: #a0c0a0; background-color: rgb(252, 248, 248); text-align: center;}
+ .prop { border-color: #74c28b; background-color: rgb(252, 248, 248); text-align: center;}
+ .pg { border-color: #c0c0a0; background-color: rgb(255, 255, 251); text-align: right; }
+ .bench { border-color: #a0c0c0; background-color: rgb(255, 250, 250); text-align: right; }
+ .lat_commit { border-color: #a0c0c0; background-color: rgb(255, 250, 250); text-align: right; width: 45px}
+ .lat_apply { border-color: #a0c0c0; background-color: rgb(255, 250, 250); text-align: left; width: 35px}
+ .meta_name { border-color: #c4b890; background-color: #e7dbb6; text-align: left; width: 150px;}
+ .meta_value { border-color: #c6c3ba;background-color: #d4d4d4; text-align: left; width: 480px;}
+
+ .map_grid {
+ display: grid;
+ grid-template-columns: auto auto auto auto auto auto auto auto auto auto;
+ grid-column-gap: 20px;
+ padding-left: 0px;
+ padding-right: 0px;
+ margin: 1px;
+ margin-left: 20px;
+
+ }
+ .map_item {
+ display: inline-grid;
+ border-width: 0px;
+ border-style: solid;
+ margin: 1px 1px 1px 1px;
+ padding: 0px 1px 0px 1px;
+ }
+
+ .map_grid > .ok {
+ color: #80a080;
+ }
+ .map_grid > .warn {
+ color: #d3a200;
+ }
+ .map_grid > .fail {
+ color: #bb0000;
+ }
+
+ .modules {
+ font-family: "LaoSangamMN", Monaco, monospace;
+ font-size: 0.8em;
+ background-color: white;
+ }
+ .module_node {
+ margin-bottom: 2px;
+ display: flex;
+ }
+ .module_name, .node_name {
+ text-align: center;
+ border-width: 0px;
+ border-style: solid;
+ margin: 1px 1px 1px 1px;
+ padding: 0px 1px 0px 1px;
+ min-width: 250px;
+ border-radius: 10px;
+ }
+ .node_name {
+ background-color: #ddd;
+ }
+ .module_grid {
+ display: grid;
+ grid-template-columns: repeat(8, 100px);
+ grid-template-rows: repeat(6, auto);
+ grid-auto-flow: column;
+ grid-column-gap: 10px;
+ padding-left: 0px;
+ padding-right: 0px;
+ margin: 1px;
+ margin-left: 20px;
+ }
+ .module {
+ display: inline-grid;
+ text-align: center;
+ border-width: 0px;
+ border-style: solid;
+ margin: 1px 1px 1px 1px;
+ padding: 0px 1px 0px 1px;
+ min-width: 100px;
+ border-radius: 10px;
+ }
+
+ .module_grid > .on, .service_node > .ok {
+ background-color: #8c8;
+ }
+ .module_grid > .off, .service_node > .off{
+ background-color: #9aa;
+ }
+ .module_grid > .fail, .service_node > .fail {
+ background-color: #a33;
+ }
+ .module_grid > .always, .service_node > .fail {
+ background-color: #282;
+ }
+
+ .tooltiptext {
+ transform: translate(100px);
+ }
+
+ .console {
+ background-color: black;
+ font-family: "Lucida Console", Monaco, monospace;
+ font-size: 0.5em;
+ width: auto;
+ color: #fff;
+ border-radius: 6px;
+ padding: 5px 5px;
+ }
+
+ </style>
+</head>
+<body onload="init()">
+
+<div class="header">
+ <div class="label">Ceph version:</div>
+ <div class="text">{{ ceph_version }}</div>
+ <div class="label">Image:</div>
+ <div class="text">{{ cluster.image }}</div>
+ <div class="label date">generated on: {{ gen_date }}</div>
+</div>
+
+<div class="bar">
+ <div class="bar-centered">
+ <button class="bar-item" onclick="openBar(event, 'bench')">Benchmark Results</button>
+ <button class="bar-item" onclick="openBar(event, 'status')">Status</button>
+ <!-- <button class="bar-item" onclick="openBar(event, 'latency')">Latency</button> -->
+ </div>
+</div>
+
+<!-- Benchmarks -->
+{% macro bench_page(results, id_label) %}
+<div id="{{ id_label }}" class="barcontent">
+ <h5>{{ caller() }}</h5>
+ <hr>
+ <table class="ceph_status">
+ <tr class="node">
+ <td class="status">Time started</td>
+ <td class="status">Data point</td>
+ <td class="col_properties">
+ <div class="props_group">
+ <div class="item prop">Warmup</div>
+ <div class="item prop">Run Time</div>
+ <div class="item prop">Storage class</div>
+ <div class="item pg">PGs</div>
+ <div class="item prop">Engine</div>
+ <div class="item prop">Mode</div>
+ <div class="item prop">BS</div>
+ <div class="item prop">IOdepth</div>
+ <div class="item prop">Size</div>
+ </div>
+ </td>
+ <td class="col_bench">
+ <div class="bench_run_group">
+ <div class="item bench">Read, MB/s</div>
+ <div class="item bench">Avg lat, usec</div>
+ <div class="item bench">Read, op/s</div>
+ <div class="item bench">Write, MB/s</div>
+ <div class="item bench">Avg lat, usec</div>
+ <div class="item bench">Write, op/s</div>
+ </div>
+ </td>
+ </tr>
+ {% for time,dt in results.items() %}
+ {% set t = dt["totals"] %}
+ {% set o = dt["input_options"] %}
+ {% set tstripped = time | tstrip %}
+ <tr class="node" onclick="toggleClassByID('timing_{{ tstripped }}_data')" id="timing_{{ tstripped }}_button">
+ <td class="status">{{ time }}</td>
+ <td class="status">All agents</td>
+ <td class="col_properties">
+ <div class="props_group">
+ <div class="item prop">{{ o["ramp_time"] }}</div>
+ <div class="item prop">{{ o["runtime"] }}</div>
+ <div class="item prop">{{ t["storage_class"] }}</div>
+ <div class="item pg">{{ t["storage_class_stats"]["num_pg"] }}</div>
+ <div class="item prop">{{ o["ioengine"] }}</div>
+ <div class="item prop">{{ o["readwrite"] }} ({{ o["rwmixread"] }}/{{ 100-o["rwmixread"] }})</div>
+ <div class="item prop">{{ o["bs"] }}</div>
+ <div class="item prop">{{ o["iodepth"] }}</div>
+ <div class="item prop">{{ o["size"] }}</div>
+ </div>
+ </td>
+ <td class="col_bench">
+ <div class="bench_run_group">
+ <div class="item bench">{{ t["read_bw_bytes"] | to_mb }}</div>
+ <div class="item bench">{{ "%0.2f" | format(t["read_avg_lat_us"]|float) }}</div>
+ <div class="item bench">{{ "%0.2f" | format(t["read_iops"]|float) }}</div>
+ <div class="item bench">{{ t["write_bw_bytes"] | to_mb }}</div>
+ <div class="item bench">{{ "%0.2f" | format(t["write_avg_lat_us"]|float) }}</div>
+ <div class="item bench">{{ "%0.2f" | format(t["write_iops"]|float) }}</div>
+ </div>
+ </td>
+ </tr>
+ <tr class="collapsable" id="timing_{{ tstripped }}_data"><td colspan=3>
+ <div class="bc-wrap">
+ <div class="bctimecol">
+ <div class="bctime"><span class="bctimetext">110</span></div>
+ <div class="bctime"><span class="bctimetext">75</span></div>
+ <div class="bctime"><span class="bctimetext">50</span></div>
+ <div class="bctime"><span class="bctimetext">15</span></div>
+ </div>
+
+ <div class="bc-container">
+ <div class="bc">
+ <div class="bccol"><div class="bcbar" style="height: 75%;"></div><div class="bcfooter">2s</div></div>
+ <div class="bccol"><div class="bcbar" style="height: 25%;"></div><div class="bcfooter">4s</div></div>
+ <div class="bccol"><div class="bcbar" style="height: 55%;"></div><div class="bcfooter">6s</div></div>
+ <div class="bccol"><div class="bcbar" style="height: 65%;"></div><div class="bcfooter">8s</div></div>
+ <div class="bccol"><div class="bcbar" style="height: 15%;"></div><div class="bcfooter">10s</div></div>
+ <div class="bccol"><div class="bcbar" style="height: 16%;"></div><div class="bcfooter">12s</div></div>
+ <div class="bccol"><div class="bcbar" style="height: 17%;"></div><div class="bcfooter">14s</div></div>
+ <div class="bccol"><div class="bcbar" style="height: 18%;"></div><div class="bcfooter">16s</div></div>
+ <div class="bccol"><div class="bcbar" style="height: 19%;"></div><div class="bcfooter">18s</div></div>
+ <div class="bccol"><div class="bcbar" style="height: 20%;"></div><div class="bcfooter">20s</div></div>
+ <div class="bccol"><div class="bcbar" style="height: 21%;"></div><div class="bcfooter">22s</div></div>
+ </div>
+ </div>
+ </div>
+ <table style="table-layout: auto;"><tbody>
+ {% for agent,ag_result in dt["agents"].items() %}
+ {% set j = ag_result[time]["jobs"][0] %}
+ <tr>
+ <td class="status">{{ time }}</td>
+ <td class="status">{{ agent }}</td>
+ <td class="col_properties">
+ <div class="props_group">
+ <div class="item prop">{{ j["job options"]["ramp_time"] }}</div>
+ <div class="item prop">{{ j["job options"]["runtime"] }}</div>
+ <div class="item prop">{{ t["storage_class"] }}</div>
+ <div class="item pg">{{ t["storage_class_stats"]["num_pg"] }}</div>
+ <div class="item prop">{{ o["ioengine"] }}</div>
+ <div class="item prop">{{ o["readwrite"] }} ({{ o["rwmixread"] }}/{{ 100-o["rwmixread"] }})</div>
+ <div class="item prop">{{ j["job options"]["bs"] }}</div>
+ <div class="item prop">{{ o["iodepth"] }}</div>
+ <div class="item prop">{{ j["job options"]["size"] }}</div>
+ </div>
+ </td>
+ <td class="col_bench">
+ <div class="bench_run_group">
+ <div class="item bench">{{ j["read"]["bw_bytes"] | to_mb }}</div>
+ <div class="item bench">{{ "%0.2f" | format(j["read"]["lat_ns"]["mean"]|float / 1000) }}</div>
+ <div class="item bench">{{ "%0.2f" | format(j["read"]["iops"]|float) }}</div>
+ <div class="item bench">{{ j["write"]["bw_bytes"] | to_mb }}</div>
+ <div class="item bench">{{ "%0.2f" | format(j["write"]["lat_ns"]["mean"]|float / 1000) }}</div>
+ <div class="item bench">{{ "%0.2f" | format(j["write"]["iops"]|float) }}</div>
+ </div>
+ </td>
+ </tr>
+ </tr>
+ {% endfor %}
+ </tbody></table>
+ </td></tr>
+ {% endfor %}
+ </table>
+</div>
+{% endmacro %}
+
+<!-- Status page -->
+{% macro status_page(info, id_label) %}
+<div id="{{ id_label }}" class="barcontent">
+ <h5>{{ caller() }}</h5>
+ <hr>
+ <table class="ceph_status">
+ <tr class="node">
+ <td class="status">Cluster status</td>
+ <td class="col_shortmessage">Status summary</td>
+ <td class="col_osd">
+ <div class="osd_group">
+ <div class="item osd">OSDs</div>
+ <div class="item osd">Up</div>
+ <div class="item osd">In</div>
+ <div class="item osd">Remap PGs</div>
+ </div>
+ </td>
+ <td class="col_pgs">
+ <div class="pg_group">
+ <div class="item pg">PGs</div>
+ <div class="item pg">Pools</div>
+ <div class="item pg">Objects</div>
+ <div class="item pg">Data, GB</div>
+ <div class="item pg">Used, GB</div>
+ <div class="item pg">Avail, GB</div>
+ <div class="item pg">Total, GB</div>
+ </div>
+ </td>
+ <td class="col_bench">
+ <div class="bench_group">
+ <div class="item bench">Read, MB/sec</div>
+ <div class="item bench">Write, MB/sec</div>
+ <div class="item bench">Read, op/sec</div>
+ <div class="item bench">Write, op/sec</div>
+ </div>
+ </td>
+ </tr>
+ {% set cs = idle_status %}
+ {% set osdmap = cs | get_osdmap %}
+ <tr class="node" onclick="toggleClassByID('health_data')" id="health_data_button">
+ <td class="status {{ health_detail["status"] | lower }}">{{ health_detail["status"] }}</td>
+ <td class="col_shortmessage">
+ {% for code,dt in health_detail["checks"].items() %}
+ {{ dt["summary"]["message"] }}<br>
+ {% endfor %}
+ </td>
+ <!-- background: linear-gradient(to right, gray 0% 20%, transparent 20% 100%); -->
+ <td class="col_osd">
+ <div class="osd_group">
+ <div class="item osd">{{ osdmap["num_osds"] }}</div>
+ <div class="item osd">{{ osdmap["num_up_osds"] }}</div>
+ <div class="item osd">{{ osdmap["num_in_osds"] }}</div>
+ <div class="item osd">{{ osdmap["num_remapped_pgs"] }}</div>
+ </div>
+ </td>
+ {% set pgmap = cs["pgmap"] %}
+ <td class="col_pgs">
+ <div class="pg_group">
+ <div class="item pg">{{ pgmap["num_pgs"] }}</div>
+ <div class="item pg">{{ pgmap["num_pools"] }}</div>
+ <div class="item pg">{{ pgmap["num_objects"] }}</div>
+ <div class="item pg">{{ pgmap["data_bytes"] | to_gb }}</div>
+ <div class="item pg">{{ pgmap["bytes_used"] | to_gb }}</div>
+ <div class="item pg">{{ pgmap["bytes_avail"] | to_gb }}</div>
+ <div class="item pg">{{ pgmap["bytes_total"] | to_gb }}</div>
+ </div>
+ </td>
+ <td class="col_bench">
+ <div class="bench_group">
+ {% if "read_bytes_sec" in pgmap %}
+ <div class="item bench">{{ pgmap["read_bytes_sec"] | to_mb }}</div>
+ {% else %}
+ <div class="item bench">0</div>
+ {% endif %}
+ {% if "write_bytes_sec" in pgmap %}
+ <div class="item bench">{{ pgmap["write_bytes_sec"] | to_mb }}</div>
+ {% else %}
+ <div class="item bench">0</div>
+ {% endif %}
+ {% if "read_op_per_sec" in pgmap %}
+ <div class="item bench">{{ pgmap["read_op_per_sec"] }}</div>
+ {% else %}
+ <div class="item bench">0</div>
+ {% endif %}
+ {% if "write_op_per_sec" in pgmap %}
+ <div class="item bench">{{ pgmap["write_op_per_sec"] }}</div>
+ {% else %}
+ <div class="item bench">0</div>
+ {% endif %}
+ </div>
+ </td>
+ </tr>
+ <tr class="collapsable in" id="health_data"><td colspan=3>
+ <table><tbody>
+ {% for code,dt in health_detail["checks"].items() %}
+ <tr>
+ <td class="spacer"></td>
+ <td class="status {{ dt["severity"] | lower }}">{{ dt["severity"] }}</td>
+ <td class="checks_code">{{ code }}</td>
+ <td class="col_longmessage">
+ <table><tbody>
+ {% for detail in dt["detail"] %}
+ <tr><td>{{ detail["message"] }}</td></tr>
+ {% endfor %}
+ </tbody></table>
+ </td>
+ </tr>
+ {% endfor %}
+ </tbody></table>
+ </td></tr>
+ </table>
+ <hr>
+ <!-- Services -->
+ {% set sm = idle_status["servicemap"] %}
+ <h5>Services: {{ sm["services"] | count }} running. Last modification: {{ sm["modified"] }}</h5>
+ <table class="ceph_status">
+ <tr class="node">
+ <td class="srv_name">Name</td>
+ <td class="srv_path">Subpath</td>
+ <td class="srv_timestamp">Start time</td>
+ <td class="srv_addr">Address</td>
+ </tr>
+ {% for name, d1 in sm["services"].items() %}
+ {% if "daemons" in d1 %}
+ {% set d2 = d1["daemons"] %}
+ {% for key, d3 in d2.items() %}
+ {% if key.startswith("rgw.store") %}
+ <tr class="node" onclick="toggleClassByID('{{ name }}_service_data')" id="{{ name }}_service_data_button">
+ <td class="srv_name">{{ name }} ({{ d3["gid"] }})</td>
+ <td class="srv_path">daemons:{{ key }}</td>
+ <td class="srv_timestamp">{{ d3["start_stamp"] }}</td>
+ <td class="srv_addr">{{ d3["addr"] }}</td>
+ </tr>
+ <tr class="collapsable in" id="{{ name}}_service_data"><td colspan=4>
+ <table><tbody>
+ <tr><td class="metadata">
+ {% for mname, mvalue in d3["metadata"].items() %}
+ <div class="meta_group">
+ <div class="item meta_name">{{ mname }}</div>
+ <div class="item meta_value">{{ mvalue }}</div>
+ </div>
+ {% endfor %}
+ </td></tr>
+ </tbody></table>
+ </td></tr>
+ {% endif %}
+ {% endfor %}
+ {% endif %}
+ {% endfor %}
+ </table>
+ <hr>
+ <!-- Modules -->
+ {% set mgrmap = idle_status["mgrmap"] %}
+ {% set mods = mgrmap["modules"] %}
+ {% set avail = mgrmap["available_modules"] %}
+ {% if "always_on_modules" in mgrmap %}
+ {% set always_on = mgrmap["always_on_modules"].values() | list %}
+ {% set always_on = always_on[0] %}
+ {% else %}
+ {% set always_on = [] %}
+ {% endif %}
+ <h5>Modules: {{ mods | count}} active. {{ always_on | count }} always on. {{ avail | count }} available.</h5>
+ <div class="modules">
+ <div class="module_grid">
+ {% for mod in avail %}
+ {% if mod["name"] in always_on %}
+ <div class="module always">{{ mod["name"] }}</div>
+ {% elif mod["name"] in mods %}
+ <div class="module on">{{ mod["name"] }}</div>
+ {% elif not mod["can_run"] %}
+ <div class="module fail tooltip">
+ <div class="module fail">{{ mod["name"] }}</div>
+ <pre class="tooltiptext">{{ mod["error_string"] | linebreaks }}</pre>
+ </div>
+ {% else %}
+ <div class="module">{{ mod["name"] }}</div>
+ {% endif %}
+ {% endfor %}
+ </div>
+ </div>
+ <hr>
+</div>
+{% endmacro %}
+
+<!-- ================================= -->
+<!-- Cluster nodes page -->
+{% call bench_page(results, "bench") %}
+ Benchmark results
+{% endcall %}
+
+{% call status_page(info, "status") %}
+ Cluster status
+{% endcall %}
+
+</body>
+</html>
\ No newline at end of file
diff --git a/templates/cfgagent-template.yaml b/templates/cfgagent-template.yaml
index 3152c5f..86c58e6 100644
--- a/templates/cfgagent-template.yaml
+++ b/templates/cfgagent-template.yaml
@@ -5,13 +5,14 @@
namespace: qa-space
labels:
name: cfgagent-xx
+ app: cfgagent
spec:
containers:
- command:
- checker-agent
imagePullPolicy: IfNotPresent
name: cfgagent-pod
- image: savex13/cfg-checker-agent:0.64
+ image: savex13/cfg-checker-agent:0.65
volumeMounts:
- mountPath: /cephvol
name: cfgagent-pv-placeholder