Ceph Info command
Updates
- ceph module with 'info', 'report' and 'bench' commands
- mcp-checker ceph info command is collecting Ceph config
and creates an archive
- ceph report command creates HTML document with
info collected from Ceph cluster
- Basic SMART data output in info and full output in report
- skeleton of the ceph bench command to run synced tests
Fixes
- kube helper commands uses proper naming
Change-Id: Ia5aaa343f7d1c38a67d34e60215801bbb0fea097
Related-PROD: PROD-36605
diff --git a/cfg_checker/common/kube_utils.py b/cfg_checker/common/kube_utils.py
index 042db5d..e1aafbb 100644
--- a/cfg_checker/common/kube_utils.py
+++ b/cfg_checker/common/kube_utils.py
@@ -207,6 +207,13 @@
self._coreV1 = None
self._appsV1 = None
self._podV1 = None
+ self._custom = None
+
+ @property
+ def CustomObjects(self):
+ if not self._custom:
+ self._custom = kclient.CustomObjectsApi(self.kApi)
+ return self._custom
@property
def CoreV1(self):
@@ -298,6 +305,45 @@
return _nodes
+ def get_pod_names_by_partial_name(self, partial_name, ns):
+ logger_cli.debug('... searching for pods with {}'.format(partial_name))
+ _pods = self.CoreV1.list_namespaced_pod(ns)
+ _names = self._get_listed_attrs(_pods.items, "metadata.name")
+ _pnames = [n for n in _names if partial_name in n]
+ if len(_pnames) > 1:
+ logger_cli.debug(
+ "... more than one pod found for '{}': {}\n".format(
+ partial_name,
+ ", ".join(_pnames)
+ )
+ )
+ elif len(_pnames) < 1:
+ logger_cli.warning(
+ "WARNING: No pods found for '{}'".format(partial_name)
+ )
+
+ return _pnames
+
+ def get_pods_by_partial_name(self, partial_name, ns):
+ logger_cli.debug('... searching for pods with {}'.format(partial_name))
+ _all_pods = self.CoreV1.list_namespaced_pod(ns)
+ # _names = self._get_listed_attrs(_pods.items, "metadata.name")
+ _pods = [_pod for _pod in _all_pods.items
+ if partial_name in _pod.metadata.name]
+ if len(_pods) > 1:
+ logger_cli.debug(
+ "... more than one pod found for '{}': {}\n".format(
+ partial_name,
+ ", ".join(partial_name)
+ )
+ )
+ elif len(_pods) < 1:
+ logger_cli.warning(
+ "WARNING: No pods found for '{}'".format(partial_name)
+ )
+
+ return _pods
+
def exec_on_target_pod(
self,
cmd,
@@ -307,6 +353,7 @@
_request_timeout=120,
**kwargs
):
+ _pname = ""
if not strict:
logger_cli.debug(
"... searching for pods with the name '{}'".format(pod_name)
@@ -314,7 +361,6 @@
_pods = {}
_pods = self.CoreV1.list_namespaced_pod(namespace)
_names = self._get_listed_attrs(_pods.items, "metadata.name")
- _pname = ""
_pnames = [n for n in _names if n.startswith(pod_name)]
if len(_pnames) > 1:
logger_cli.debug(
@@ -325,7 +371,7 @@
)
)
_pname = _pnames[0]
- elif len(_pname) < 1:
+ elif len(_pnames) < 1:
raise KubeException("No pods found for '{}'".format(pod_name))
else:
_pname = pod_name
@@ -373,7 +419,9 @@
if _ns is None:
logger_cli.debug("... creating namespace '{}'".format(ns))
- _r = self.CoreV1.create_namespace(ns)
+ _new_ns = kclient.V1Namespace()
+ _new_ns.metadata = kclient.V1ObjectMeta(name=ns)
+ _r = self.CoreV1.create_namespace(_new_ns)
# TODO: check return on fail
if not _r:
return False
@@ -494,7 +542,7 @@
)
# map func and cmd
-
+ logger_cli.error("ERROR: 'exec_on_all_pods'is not implemented yet")
# create result list
return []
@@ -566,3 +614,17 @@
self._coreV1 = None
return
+
+ def get_custom_resource(self, group, version, plural):
+ # Get it
+ # Example:
+ # kubernetes.client.CustomObjectsApi().list_cluster_custom_object(
+ # group="networking.istio.io",
+ # version="v1alpha3",
+ # plural="serviceentries"
+ # )
+ return self.CustomObjects.list_cluster_custom_object(
+ group=group,
+ version=version,
+ plural=plural
+ )
diff --git a/cfg_checker/modules/ceph/__init__.py b/cfg_checker/modules/ceph/__init__.py
new file mode 100644
index 0000000..ad4a207
--- /dev/null
+++ b/cfg_checker/modules/ceph/__init__.py
@@ -0,0 +1,150 @@
+from cfg_checker.common import logger_cli
+from cfg_checker.common.settings import ENV_TYPE_KUBE
+from cfg_checker.helpers import args_utils
+from cfg_checker.modules.ceph import info, bench
+
+command_help = "Ceph Storage information and benchmarks"
+supported_envs = [ENV_TYPE_KUBE]
+
+
+# def _selectClass(_env, strClassHint="checker"):
+# _class = None
+# if _env == ENV_TYPE_SALT:
+# if strClassHint == "info":
+# _class = info.SaltCephInfo
+# elif strClassHint == "bench":
+# _class = bench.SaltCephInfo
+# elif _env == ENV_TYPE_KUBE:
+# if strClassHint == "info":
+# _class = info.KubeCephInfo
+# elif strClassHint == "bench":
+# _class = bench.KubeCephBench
+# if not _class:
+# raise CheckerException(
+# "Unknown hint for selecting Ceph handler Class: '{}'".format(
+# strClassHint
+# )
+# )
+# else:
+# return _class
+
+
+def init_parser(_parser):
+ # network subparser
+ ceph_subparsers = _parser.add_subparsers(dest='type')
+
+ ceph_info_parser = ceph_subparsers.add_parser(
+ 'info',
+ help="Gather Ceph Cluster information"
+ )
+
+ ceph_info_parser.add_argument(
+ '--detailed',
+ action="store_true", default=False,
+ help="Print additional details"
+ )
+
+ ceph_info_parser.add_argument(
+ '--tgz',
+ metavar='ceph_tgz_filename',
+ help="HTML filename to save report"
+ )
+
+ ceph_report_parser = ceph_subparsers.add_parser(
+ 'report',
+ help="Generate network check report"
+ )
+
+ ceph_report_parser.add_argument(
+ '--html',
+ metavar='ceph_html_filename',
+ help="HTML filename to save report"
+ )
+
+ ceph_bench_parser = ceph_subparsers.add_parser(
+ 'bench',
+ help="Run ceph benchmark"
+ )
+
+ ceph_bench_parser.add_argument(
+ '--task-list',
+ metavar='ceph_tasks_filename',
+ help="List file with data for Ceph bench testrun"
+ )
+
+ return _parser
+
+
+def do_info(args, config):
+ # Ceph info
+ # Gather ceph info and create an archive with data
+ args_utils.check_supported_env(ENV_TYPE_KUBE, args, config)
+ # check tgz
+ _tgzfile = "ceph_info_archive.tgz" if not args.tgz else args.tgz
+
+ # _class = _selectClass(_env)
+ ceph_info = info.KubeCephInfo(config)
+
+ logger_cli.info("# Collecting Ceph cluster information")
+ logger_cli.warning(
+ "\nWARNING: 'ceph info' has 'Work in progress' status!\n"
+ )
+
+ ceph_info.gather_info()
+
+ # Debug, enable if needed to debug report generation
+ # without actuall data collecting each time
+ # ceph_info.dump_info()
+ # ceph_info.load_info()
+ # end debug
+
+ ceph_info.print_summary()
+ ceph_info.generate_archive(_tgzfile)
+
+ return
+
+
+def do_report(args, config):
+ # Ceph Report
+ # Gather ceph info and create HTML report with all of the data
+ args_utils.check_supported_env(ENV_TYPE_KUBE, args, config)
+ _filename = args_utils.get_arg(args, 'html')
+ logger_cli.info("# Ceph cluster Configuration report")
+ logger_cli.warning(
+ "\nWARNING: 'ceph info' has 'Work in progress' status!\n"
+ )
+
+ # _class = _selectClass(_env)
+ ceph_info = info.KubeCephInfo(config)
+ # Debug, enable if needed to debug report generation
+ # without actuall data collecting each time
+ # ceph_info.load_info()
+ # end debug
+ ceph_info.gather_info()
+ ceph_info.get_transposed_latency_table()
+ ceph_info.get_latest_health_readout()
+ ceph_info.create_html_report(_filename)
+
+ return
+
+
+def do_bench(args, config):
+ # Ceph Benchmark using multiple pods
+ # Prepare the tasks and do synced testrun
+ # TODO: html option to create a fancy report
+ args_utils.check_supported_env(ENV_TYPE_KUBE, args, config)
+
+ ceph_bench = bench.KubeCephBench(config)
+
+ logger_cli.error("ERROR: To be implemented...")
+
+ # Load tasks
+
+ # Do the testrun
+ ceph_bench.prepare_pods()
+ ceph_bench.run_benchmark()
+
+ # Create report
+ ceph_bench.create_report()
+
+ return
diff --git a/cfg_checker/modules/ceph/bench.py b/cfg_checker/modules/ceph/bench.py
new file mode 100644
index 0000000..28c7929
--- /dev/null
+++ b/cfg_checker/modules/ceph/bench.py
@@ -0,0 +1,48 @@
+from cfg_checker.common import logger_cli
+# from cfg_checker.common.exception import InvalidReturnException
+# from cfg_checker.common.exception import ConfigException
+# from cfg_checker.common.exception import KubeException
+
+from cfg_checker.nodes import KubeNodes
+
+
+class CephBench(object):
+ def __info__(
+ self,
+ config
+ ):
+ self.env_config = config
+ return
+
+ def prepare_pods(self):
+
+ return
+
+ def run_benchmark(self):
+
+ return
+
+ # Create report
+ def create_report(self):
+
+ return
+
+
+class SaltCephBench(CephBench):
+ def __init__(
+ self,
+ config
+ ):
+ logger_cli.error("ERROR: Not impelented for Sale environment!")
+
+ # self.master = SaltNodes(config)
+ super(SaltCephBench, self).__init__(
+ config
+ )
+ return
+
+
+class KubeCephBench(CephBench):
+ def __init__(self, config):
+ self.master = KubeNodes(config)
+ super(KubeCephBench, self).__init__(config)
diff --git a/cfg_checker/modules/ceph/info.py b/cfg_checker/modules/ceph/info.py
new file mode 100644
index 0000000..092c1c7
--- /dev/null
+++ b/cfg_checker/modules/ceph/info.py
@@ -0,0 +1,514 @@
+import json
+from time import sleep
+
+
+from cfg_checker.common import logger_cli
+from cfg_checker.common.exception import KubeException
+
+from cfg_checker.helpers.console_utils import Progress
+from cfg_checker.helpers.tgz import TGZFile
+from cfg_checker.nodes import KubeNodes
+from cfg_checker.reports import reporter
+
+
+class CephInfo(object):
+ def __init__(
+ self,
+ config
+ ):
+ self.env_config = config
+ return
+
+ def get_transposed_latency_table(self):
+ _table = {
+ "<dev>": []
+ }
+ for _pfd in self.ceph_info['osd_latency_data']['data']['data']:
+ _table["<dev>"].append({
+ "formatted": " cL/aL ",
+ "commit_latency_ms": "Commit, ms",
+ "apply_latency_ms": "Apply, ms",
+ "commit_latency_ns": "Commit, ns",
+ "apply_latency_ns": "Apply, ns"
+ })
+ for _f in _pfd['osdstats']['osd_perf_infos']:
+ _n = "osd_{}".format(_f['id'])
+ if _n not in _table:
+ _table[_n] = []
+ _table[_n].append({
+ "formatted": "{:>3}/{:<3}".format(
+ _f['perf_stats']['commit_latency_ms'],
+ _f['perf_stats']['apply_latency_ms'],
+ ),
+ "commit_latency_ms": _f['perf_stats']['commit_latency_ms'],
+ "apply_latency_ms": _f['perf_stats']['apply_latency_ms'],
+ "commit_latency_ns": _f['perf_stats']['commit_latency_ns'],
+ "apply_latency_ns": _f['perf_stats']['apply_latency_ns']
+ })
+ self.ceph_info['osd_latency_data']['table'] = _table
+ return _table
+
+ def get_latest_health_readout(self):
+ _h = self.ceph_info['ceph_health']['data']
+ self.ceph_info['ceph_health']['latest'] = {}
+ for _n, _d in _h.items():
+ if not _d:
+ self.ceph_info['ceph_health']['latest'][_n] = {}
+ continue
+ else:
+ # TODO: Consider filtering out or prepare data for the table
+ _date = sorted(_d.keys(), reverse=True)[0]
+ self.ceph_info['ceph_health']['date'] = _date
+ self.ceph_info['ceph_health']['latest'][_n] = _d[_date]
+
+ return self.ceph_info['ceph_health']['latest']
+
+ def print_summary(self):
+ logger_cli.info("\n# Ceph Cluster summary")
+ # Health status
+ _h = self.ceph_info['health_detail']['data']
+ logger_cli.info("Cluster status: {}".format(_h['status']))
+ for _chk, _d in _h['checks'].items():
+ logger_cli.info(
+ "+ {}: {}\n\tSummary: {}".format(
+ _chk,
+ _d['severity'],
+ _d['summary']['message']
+ )
+ )
+ logger_cli.info("\tDetails:")
+ for _item in _d['detail']:
+ logger_cli.info("\t '{}".format(_item['message']))
+
+ # OSD health metrics
+ logger_cli.info("\n# Device health metrics:")
+ _fmt = " {:45} {:^14} {:^9} {:^6} {:^6}"
+ logger_cli.info(
+ _fmt.format(
+ "Device Name",
+ "Info",
+ "Speed",
+ "SMART",
+ "Tempr."
+ )
+ )
+ _latest = self.get_latest_health_readout()
+ for _n, _d in _latest.items():
+ if not _d:
+ logger_cli.info("{:45} {:<10}".format(_n, "<empty>"))
+ continue
+
+ _status = _d['ata_smart_data']['self_test']['status']['passed']
+
+ _status = 'passed' if _status else 'failed'
+ logger_cli.info(
+ _fmt.format(
+ _n,
+ _d['device']['info_name'],
+ _d['interface_speed']['current']['string'],
+ _status,
+ _d['temperature']['current']
+ )
+ )
+
+ # Latency table
+ logger_cli.info(
+ "\n# OSD Latency data ({} iterations, {} sec delay), "
+ "table items 'osd_dev: N:cL/aL'\n"
+ " 'Commit Latency' -> 'cL', 'Apply Latency' -> 'aL'\n".format(
+ self.ceph_info['osd_latency_data']['data']['total'],
+ self.ceph_info['osd_latency_data']['data']['delay']
+ )
+ )
+ _strs = self.get_transposed_latency_table()
+ for _osd, _list in _strs.items():
+ _row = [c["formatted"] for c in _list]
+ logger_cli.info(
+ " {:8}: {}".format(
+ _osd,
+ " ".join(_row)
+ )
+ )
+ logger_cli.info("\n")
+
+ # critical config values
+ # TODO: print/calculate config values
+
+ return
+
+ def dump_info(self):
+ with open('cephdump.json', 'wt') as _f:
+ _f.write(json.dumps(self.ceph_info, indent=2))
+
+ def load_info(self):
+ with open('cephdump.json', 'rt') as _f:
+ self.ceph_info = json.load(_f)
+
+ def generate_archive(self, tgzfilename):
+ if not self.ceph_info:
+ logger_cli.warning(
+ "WARNING: Ceph Info Data not detected. "
+ "Consider check for errors in log."
+ )
+ else:
+ # Create Archive
+ logger_cli.info("-> Generating archive '{}'".format(tgzfilename))
+ _tgz = TGZFile(
+ tgzfilename,
+ label="MCP Checker: Generated Ceph Information"
+ )
+ # Iterate every key and write data to tar file
+ for key, d in self.ceph_info.items():
+ _filename = None
+ # Cast buf to a proper type
+ _buf = None
+ if isinstance(d["data"], dict) or isinstance(d["data"], list):
+ _buf = json.dumps(d["data"], indent=2)
+ _filename = key + ".json"
+ elif isinstance(d["data"], str):
+ _buf = d["data"]
+ _filename = key + ".txt"
+ else:
+ _buf = str(d["data"])
+ _filename = key + ".txt"
+ logger_cli.debug("... writing '{}'".format(_filename))
+ _tgz.add_file(_filename, buf=_buf, replace=True)
+
+ return
+
+ def create_html_report(self, filename):
+ """
+ Create static html showing ceph info report
+
+ :return: none
+ """
+ logger_cli.info("### Generating report to '{}'".format(filename))
+ _report = reporter.ReportToFile(
+ reporter.HTMLCephInfo(self),
+ filename
+ )
+ _report(
+ {
+ "info": self.ceph_info,
+ "cluster": self.cluster_info,
+ "nodes": self.nodes,
+ "ceph_version": self.ceph_version,
+ }
+ )
+ logger_cli.info("-> Done")
+
+ return
+
+
+class SaltCephInfo(CephInfo):
+ def __init__(
+ self,
+ config
+ ):
+ logger_cli.warning("\nWARNING: Not impelented for Salt environment!\n")
+
+ # self.master = SaltNodes(config)
+ super(SaltCephInfo, self).__init__(config)
+ return
+
+
+class KubeCephInfo(CephInfo):
+ ceph_ns = "rook-ceph"
+ ceph_app_label = "rook-ceph-tools"
+ ceph_group = "ceph.rook.io"
+ ceph_apiversion = "v1"
+ ceph_plural = "cephclusters"
+ ceph_version = "unknown"
+
+ def __init__(self, config):
+ self.master = KubeNodes(config)
+ super(KubeCephInfo, self).__init__(config)
+ # Init ceph tools pod
+ self.pod_name = self._get_tools_pod_name()
+ self.ceph_info = {}
+ self.cluster_info = {}
+ self.ceph_version = self.get_ceph_cluster_config()
+
+ def _safe_tools_cmd(self, cmd, expect_output=True):
+ _r = self.master.exec_cmd_on_target_pod(
+ self.pod_name,
+ self.ceph_ns,
+ cmd
+ )
+ if expect_output and not _r:
+ logger_cli.debug("... got empty output for '{}'".format(cmd))
+ elif not expect_output and _r:
+ logger_cli.warning(
+ "WARNING: Unexpected output for '{}':\n"
+ "===== Start\n{}\n===== End".format(cmd, _r)
+ )
+ return _r
+
+ def _safe_get_cmd_output_as_json(self, cmd):
+ _buf = self._safe_tools_cmd(cmd)
+ try:
+ return json.loads(_buf)
+ except ValueError:
+ logger_cli.error(
+ "\nERROR: failed to parse json: '{}'".format(_buf)
+ )
+ return _buf
+
+ def _get_tools_pod_name(self):
+ # get ceph pod
+ _names = self.master.kube.get_pod_names_by_partial_name(
+ self.ceph_app_label,
+ self.ceph_ns
+ )
+ if not _names:
+ raise KubeException(
+ "Failed to find pod using '{}'".format(self.ceph_app_label)
+ )
+ elif len(_names) > 1:
+ logger_cli.warning(
+ "WARNING: Environment has more than one pod "
+ "with '{}' app: {}".format(
+ self.ceph_app_label,
+ ", ".join(_names)
+ )
+ )
+ else:
+ logger_cli.debug("... found '{}'".format(_names[0]))
+ return _names[0]
+
+ def _add_ceph_info_item(self, key, title, data):
+ if key in self.ceph_info:
+ self.ceph_info[key]["title"] = title
+ self.ceph_info[key]["data"] = data
+ else:
+ self.ceph_info[key] = {
+ "title": title,
+ "data": data
+ }
+
+ def _parse_dev_classes(self, deviceClasses):
+ _devClasses = []
+ for _i in deviceClasses:
+ _devClasses += list(_i.values())
+ return set(_devClasses)
+
+ def get_ceph_cluster_config(self):
+ # get cephclusters resource
+ logger_cli.info("# Loading '{}' object of type '{}/{}'".format(
+ self.ceph_plural,
+ self.ceph_group,
+ self.ceph_apiversion
+ ))
+ _r = self.master.kube.get_custom_resource(
+ self.ceph_group,
+ self.ceph_apiversion,
+ self.ceph_plural,
+ )
+ # find cluster
+ _cluster = None
+ if len(_r['items']) < 1:
+ logger_cli.warning(
+ "WARNING: Failed to find '{}' ({}/{})".format(
+ self.ceph_plural,
+ self.ceph_group,
+ self.ceph_apiversion
+ )
+ )
+ return 'uknown'
+ elif len(_r['items']) > 1:
+ logger_cli.warning(
+ "WARNING: Multiple clusters found '{}' ({}/{})".format(
+ self.ceph_plural,
+ self.ceph_group,
+ self.ceph_apiversion
+ )
+ )
+ _cluster = _r['items'][0]
+ _s = _cluster['status']
+ self.cluster_info.update({
+ 'image': _s['version']['image'],
+ 'version': _s['version']['version'],
+ 'device_classes': self._parse_dev_classes(
+ _s['storage']['deviceClasses']
+ ),
+ 'phase': _s['phase'],
+ 'state': _s['state'],
+ 'health': _s['ceph']['health'],
+ 'previousHealth': _s['ceph']['previousHealth'],
+ 'lastChanged': _s['ceph']['lastChanged'],
+ 'lastChecked': _s['ceph']['lastChecked'],
+ 'mon_count': _cluster['spec']['mon']['count']
+ })
+ self.nodes = _cluster['spec']['storage']['nodes'],
+ logger_cli.info("-> Found Ceph cluster: {} ({})".format(
+ self.cluster_info['version'],
+ self.cluster_info['image']
+ ))
+ return self.cluster_info['version']
+
+ def gather_info(self):
+ logger_cli.info("# Gathering Ceph cluster info")
+ # Collect info
+ _c = self._safe_tools_cmd
+ _cj = self._safe_get_cmd_output_as_json
+ # Crush Map
+ logger_cli.info("-> Collecting CRUSH map")
+ _cmap_tmp_path = "/tmp/crushmap.bin"
+ _r = _c(
+ "ceph osd getcrushmap -o " + _cmap_tmp_path,
+ expect_output=False
+ )
+ # TODO: Handle errors in _r
+ logger_cli.debug("... 'getcrushmap' return value is: '{}'".format(_r))
+
+ # Get Crush map as json and text
+ self._add_ceph_info_item(
+ "crushmap_json",
+ "Crush Map (json)",
+ _cj("crushtool -i " + _cmap_tmp_path + " --dump")
+ )
+ # _crushmap = _cj("crushtool -i " + _cmap_tmp_path + " --dump")
+ self._add_ceph_info_item(
+ "crushmap_text",
+ "Crush Map (text)",
+ _c("crushtool -d " + _cmap_tmp_path)
+ )
+
+ logger_cli.info("-> Collecting ceph osd crush dump")
+ self._add_ceph_info_item(
+ "osd_crushdump",
+ "Crush dump (osd)",
+ _cj("ceph osd crush dump")
+ )
+
+ logger_cli.info("-> Collecting cluster status")
+ self._add_ceph_info_item(
+ "cluster_status",
+ "Cluster status",
+ _cj("ceph -s -f json")
+ )
+
+ logger_cli.info("-> Collecting health detail")
+ self._add_ceph_info_item(
+ "health_detail",
+ "Health details",
+ _cj("ceph -f json health detail")
+ )
+
+ logger_cli.info("-> Collecting monmap")
+ self._add_ceph_info_item(
+ "monmap",
+ "Ceph Mon map",
+ _cj("ceph mon dump -f json")
+ )
+
+ logger_cli.info("-> Collecting ceph df")
+ self._add_ceph_info_item(
+ "ceph_df",
+ "Ceph DF",
+ _cj("ceph df -f json")
+ )
+
+ logger_cli.info("-> Collecting ceph osd df")
+ self._add_ceph_info_item(
+ "ceph_osd_df",
+ "Ceph OSD DF",
+ _cj("ceph osd df -f json")
+ )
+
+ logger_cli.info("-> Collecting ceph osd dump")
+ self._add_ceph_info_item(
+ "ceph_osd_dump",
+ "Ceph OSD dump",
+ _cj("ceph osd dump -f json")
+ )
+
+ logger_cli.info("-> Collecting rados df")
+ self._add_ceph_info_item(
+ "rados_df",
+ "Rados DF",
+ _cj("rados df -f json")
+ )
+
+ logger_cli.info("-> Collecting ceph report")
+ self._add_ceph_info_item(
+ "ceph_report",
+ "Ceph Report",
+ _cj("ceph report")
+ )
+
+ logger_cli.info("-> Collecting auth data anonymized")
+ _auth_data = _cj("ceph auth list -f json")
+ # Anonymize data
+ # _cj("ceph auth list -f json | sed 's/AQ[^=]*==/KEY/g'")
+ for item in _auth_data["auth_dump"]:
+ if "key" in item:
+ item['key'] = "key-data-redacted"
+ self._add_ceph_info_item(
+ "ceph_auth_ls",
+ "Ceph Auth Data (anonymized)",
+ _auth_data
+ )
+
+ logger_cli.info("-> Collecting ceph pg dump")
+ self._add_ceph_info_item(
+ "ceph_pg_dump",
+ "Ceph PG dump",
+ _cj("ceph pg dump -f json")
+ )
+
+ logger_cli.info("-> Collecting ceph running configuration")
+ self._add_ceph_info_item(
+ "ceph_config_dump",
+ "Ceph Configuration Dump",
+ _cj("ceph config dump -f json")
+ )
+
+ logger_cli.info("-> Collecting health metrics")
+ _health_metrics = {}
+ _devices = _c("ceph device ls")
+ for device in _devices.splitlines():
+ _t = device.split()
+ _osd = _t[2]
+ _dev = _t[0]
+ if _dev == "DEVICE":
+ continue
+ _metric = _cj("ceph device get-health-metrics {}".format(_dev))
+ _health_metrics["{}_{}".format(_osd, _dev)] = _metric
+ self._add_ceph_info_item(
+ "ceph_health",
+ "Ceph Health Metrics",
+ _health_metrics
+ )
+
+ # Latency values
+ # config const for set
+ _latency_count = 10
+ _latency_delay = 4
+ logger_cli.info(
+ "-> Collecting ceph osd latency data "
+ "({} total, {} sec delay)".format(
+ _latency_count,
+ _latency_delay
+ )
+ )
+ _osd_lat = {
+ "total": _latency_count,
+ "delay": _latency_delay,
+ "data": []
+ }
+ _progress = Progress(_latency_count)
+ _index = 1
+ while _index <= _latency_count:
+ _progress.write_progress(_index)
+ _osd_lat["data"].append(_cj("ceph osd perf -f json"))
+ sleep(_latency_delay)
+ _index += 1
+ _progress.end()
+ self._add_ceph_info_item(
+ "osd_latency_data",
+ "OSD Latency metrics",
+ _osd_lat
+ )
+
+ return
diff --git a/cfg_checker/modules/network/__init__.py b/cfg_checker/modules/network/__init__.py
index 4c95ef3..a99fa9e 100644
--- a/cfg_checker/modules/network/__init__.py
+++ b/cfg_checker/modules/network/__init__.py
@@ -154,7 +154,7 @@
config
)
# Start command
- logger_cli.info("# Network report (check, node map")
+ logger_cli.info("# Network report (check, node map)")
_filename = args_utils.get_arg(args, 'html')
_skip, _skip_file = args_utils.get_skip_args(args)
diff --git a/cfg_checker/modules/network/pinger.py b/cfg_checker/modules/network/pinger.py
index 04a5f68..60c80cc 100644
--- a/cfg_checker/modules/network/pinger.py
+++ b/cfg_checker/modules/network/pinger.py
@@ -401,7 +401,7 @@
"targets.json"
)
# execute ping.py
- _result = self.mapper.master.exec_on_target_pod(
+ _result = self.mapper.master.exec_script_on_target_pod(
_pname,
"ping.py",
args=[_path]
diff --git a/cfg_checker/nodes.py b/cfg_checker/nodes.py
index d87d829..ef2219c 100644
--- a/cfg_checker/nodes.py
+++ b/cfg_checker/nodes.py
@@ -1043,7 +1043,7 @@
logger_cli.error("Timed out waiting for Daemonset to be ready")
return False
- def exec_on_target_pod(self, pod_name, script_filename, args=None):
+ def exec_script_on_target_pod(self, pod_name, script_filename, args=None):
"""
Run script from configmap on target pod assuming it is present
"""
@@ -1064,6 +1064,18 @@
)
return _result
+ def exec_cmd_on_target_pod(self, pod_name, ns, command_str):
+ """
+ Run script from configmap on target pod assuming it is present
+ """
+ _result = self.kube.exec_on_target_pod(
+ command_str,
+ pod_name,
+ ns,
+ strict=True
+ )
+ return _result
+
def execute_script_on_daemon_set(self, ds, script_filename, args=None):
"""
Query daemonset for pods and execute script on all of them
diff --git a/cfg_checker/reports/reporter.py b/cfg_checker/reports/reporter.py
index 7ddbc4f..1f54ff3 100644
--- a/cfg_checker/reports/reporter.py
+++ b/cfg_checker/reports/reporter.py
@@ -32,6 +32,11 @@
return text.replace("\n", "<br />")
+def tabstops(text):
+ # replace python linebreaks with html breaks
+ return text.replace("\t", "	")
+
+
def get_sorted_keys(td):
# detect if we can sort by desc
# Yes, this is slow, but bullet-proof from empty desc
@@ -116,6 +121,78 @@
return _text
+def to_gb(bytes_str):
+ _bytes = int(bytes_str)
+ _gb = _bytes / 1024 / 1024 / 1024
+ return "{}".format(round(_gb, 2))
+
+
+def to_mb(bytes_str):
+ _bytes = int(bytes_str)
+ _mb = _bytes / 1024 / 1024
+ return "{}".format(round(_mb, 2))
+
+
+def get_bucket_item_name(id, cmap):
+ for buck in cmap["buckets"]:
+ if id == buck["id"]:
+ return buck["name"]
+ for dev in cmap["devices"]:
+ if id == dev["id"]:
+ return dev["name"]
+ return id
+
+
+def get_rule_steps(steps):
+ _steps = []
+ for step in steps:
+ _ops = step.pop("op").split('_')
+ if "take" in _ops:
+ _steps.append(
+ "step {} {}".format(
+ " ".join(_ops),
+ step["item_name"]
+ )
+ )
+ else:
+ _steps.append(
+ "step {} {}".format(
+ " ".join(_ops),
+ " ".join(["{} {}".format(k, v) for k, v in step.items()])
+ )
+ )
+ return _steps
+
+
+def get_osdmap(cs):
+ _osdmap = cs
+ while True:
+ _keys = list(_osdmap.keys())
+ for _k in _keys:
+ if _k == "osdmap":
+ _osdmap = _osdmap[_k]
+ break
+ elif _k == 'epoch':
+ return _osdmap
+ return {
+ "epoch": 0,
+ "num_osds": 0,
+ "num_up_osds": 0,
+ "osd_up_since": 0,
+ "num_in_osds": 0,
+ "osd_in_since": 0,
+ "num_remapped_pgs": 0
+ }
+
+
+def get_pool_stats(id, pgdump):
+ _stats = {}
+ for pool in pgdump["pg_map"]["pool_stats"]:
+ if id == pool["poolid"]:
+ _stats = pool
+ return _stats
+
+
@six.add_metaclass(abc.ABCMeta)
class _Base(object):
def __init__(self, master=None):
@@ -167,6 +244,12 @@
self.jinja2_env.filters['pkg_action_class'] = make_pkg_action_class
self.jinja2_env.filters['node_status_class'] = make_node_status
self.jinja2_env.filters['pkg_repo_info'] = make_repo_info
+ self.jinja2_env.filters['to_gb'] = to_gb
+ self.jinja2_env.filters['to_mb'] = to_mb
+ self.jinja2_env.filters['get_bucket_item_name'] = get_bucket_item_name
+ self.jinja2_env.filters['get_rule_steps'] = get_rule_steps
+ self.jinja2_env.filters['get_pool_stats'] = get_pool_stats
+ self.jinja2_env.filters['get_osdmap'] = get_osdmap
# render!
logger_cli.info("-> Using template: {}".format(self.tmpl))
@@ -195,6 +278,11 @@
tmpl = "pkg_versions_html.j2"
+# HTML Ceph information report
+class HTMLCephInfo(_TMPLBase):
+ tmpl = "ceph_info_html.j2"
+
+
# Package versions report
class HTMLModelCompare(_TMPLBase):
tmpl = "model_tree_cmp_tmpl.j2"
diff --git a/scripts/ceph_collect.sh b/scripts/ceph_collect.sh
new file mode 100644
index 0000000..fe2ef6c
--- /dev/null
+++ b/scripts/ceph_collect.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+echo "Collecting Ceph cluster data."
+
+if [ "$#" -lt 2 ]; then echo "Usage: ./ceph_collect.sh <CUSTOMER> <CLUSTERNAME>"; exit; fi
+export CUSTOMER=$1
+export CLUSTERNAME=$2
+
+if ! which ceph >/dev/null; then echo "ERROR: This script must be run on a ceph monitor or admin node"; exit; fi
+
+DATE=`date "+%Y-%m-%d"`
+DIRNAME="CephCollectData.$CUSTOMER.$CLUSTERNAME.$DATE"
+ARCHNAME=$DIRNAME".tar.gz"
+mkdir $DIRNAME
+cd $DIRNAME
+
+echo "Collecting CRUSH map"
+ceph osd getcrushmap -o crush.bin
+crushtool -d crush.bin -o crushmap.txt
+crushtool -i crush.bin --dump > crushmap.json
+rm crush.bin
+
+echo "Collecting ceph osd crush dump"
+ceph osd crush dump >crushdump.json
+
+echo "Collecting cluster status"
+ceph -s -f json -o ceph_s.json
+echo "Collecting health detail"
+ceph -f json health detail -o ceph_healt_detail.json
+echo "Collecting monmap"
+ceph mon dump -f json -o monmap.json
+echo "Collecting ceph df"
+ceph df -f json -o ceph_df.json
+echo "Collecting ceph osd df"
+ceph osd df -f json -o ceph_osd_df.json
+echo "Collecting ceph osd dump"
+ceph osd dump -f json -o ceph_osd_dump.json
+echo "Collecting rados df"
+rados df -f json >rados_df.json
+echo "Collecting ceph report"
+ceph report -o ceph_report.json
+echo "Collecting auth data anonymized"
+ceph auth list -f json |sed 's/AQ[^=]*==/KEY/g' > ceph_auth_ls.json
+echo "Collecting ceph pg dump"
+ceph pg dump -f json -o ceph_pg_dump.json
+echo "Collecting health metrics"
+mkdir ceph-health
+IFS=$'\n'; for device in `ceph device ls|grep -v DEVICE`; do osd=$(echo $device|awk '{print $3}'); dev=$(echo $device|awk '{print $1}'); ceph device get-health-metrics $dev >ceph-health/$osd-$dev.json ; done
+echo "Collecting ceph osd perf"
+for i in {0..9}; do echo $i; ceph osd perf -f json -o ceph_osd_perf_$i.json; sleep 4; done
+echo "Collecting ceph running configuration"
+ceph config dump -f json >ceph_config_dump.json
+
+tar czf "../"$ARCHNAME *
+
diff --git a/templates/ceph_info_html.j2 b/templates/ceph_info_html.j2
new file mode 100644
index 0000000..1e461c8
--- /dev/null
+++ b/templates/ceph_info_html.j2
@@ -0,0 +1,959 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="UTF-8">
+ <title>Ceph cluster info</title>
+ {% include 'common_styles.j2' %}
+ {% include 'common_scripts.j2' %}
+ <style>
+ table.cluster_nodes {
+ width: 100%;
+ margin-left: 1%;
+ margin-right: 1%;
+ }
+ .barcontent {
+ margin: auto;
+ width: 1350px;
+ padding: 10px;
+ }
+ .bar-centered {
+ float: none;
+ transform: translate(25%);
+ }
+
+ /* Node rows*/
+ .node {
+ font-family: "LaoSangamMN", Monaco, monospace;
+ font-size: 0.8em;
+ display: flex;
+ background-color: white;
+ align-items: center;
+ }
+ .collapsable {
+ font-family: "LaoSangamMN", Monaco, monospace;
+ font-size: 0.8em;
+ display: none;
+ background-color: white;
+ visibility: hidden;
+ }
+ .collapsable.in {
+ visibility: visible;
+ display: inline-block;
+ }
+
+ .row_button {
+ background-color: #468;
+ color: #fff;
+ cursor: pointer;
+ padding: 5px;
+ width: 100%;
+ border: none;
+ text-align: left;
+ outline: none;
+ font-size: 13px;
+ }
+ .row_button:after {
+ content: '\02795'; /* Unicode character for "plus" sign (+) */
+ font-size: 13px;
+ color: white;
+ float: left;
+ margin-left: 5px;
+ }
+
+ .row_active:after {
+ content: "\2796"; /* Unicode character for "minus" sign (-) */
+ color: white
+ }
+
+ .row_active, .row_button:hover {
+ background-color: #68a;
+ color: white
+ }
+
+ .cell_button {
+ color: darkgreen;
+ cursor: pointer;
+ padding: 5px;
+ width: 100%;
+ border: none;
+ text-align: center;
+ outline: none;
+ }
+ .cell_button:hover {
+ background-color: gray;
+ }
+
+ .row_content {
+ padding: 0 18px;
+ background-color: white;
+ max-height: 0;
+ overflow: hidden;
+ transition: max-height 0.2s ease-out;
+ border-width: 1px;
+ border-color: #68a;
+ border-style: solid;
+ }
+
+ div.services > .collapsable.in {
+ display: table-row;
+ }
+ tr:nth-child(even) {
+ background-color: #eee;
+ }
+ tr:nth-child(odd) {
+ background-color: #fff;
+ }
+
+ tr.node > td, tr.collapsable > td {
+ display: block;
+ float: left;
+ padding: 1px;
+ margin: 2px;
+ }
+ td > .osd_group {
+ display: grid;
+ grid-template-columns: 40px 25px 25px 70px;
+ padding-left: 0px;
+ padding-right: 0px;
+ margin: 1px;
+ }
+ td > .pg_group {
+ display: grid;
+ grid-template-columns: 50px 40px 60px 65px 60px 65px 65px;;
+ padding-left: 0px;
+ padding-right: 0px;
+ margin: 1px;
+ }
+ td > .bench_group {
+ display: grid;
+ grid-template-columns: 80px 80px 75px 75px;
+ padding-left: 0px;
+ padding-right: 0px;
+ margin: 1px;
+ }
+ td > .meta_group {
+ display: inline-block;
+ grid-template-columns: repeat(4, auto);
+ padding-left: 0px;
+ padding-right: 0px;
+ margin: 1px;
+ }
+ .item {
+ display: inline-grid;
+ border-width: 1px;
+ border-style: solid;
+ margin: 1px 1px 1px 1px;
+ padding: 0px 1px 0px 1px;
+ }
+
+ .spacer { border-radius: 2px; width: 20px;}
+ .status { border-radius: 10px; width: 120px; text-align: center;}
+ .health_ok { background-color: #393; color: white;}
+ .health_error { background-color: #933; color: white;}
+ .health_warn { background-color: #eb3; color: #333;}
+ .checks_code { border-radius: 2px; width: 20%; background-color: transparent; color: darkred;}
+
+ .head { height: 18px; background-color: transparent; border-color: transparent; border: 0px;}
+ .centered { text-align: center;}
+ .right { text-align: right;}
+ .col_shortmessage { min-width: 300px; }
+ .col_longmessage { width: auto; }
+ .col_properties { width: auto;}
+
+ .srv_name { width: 300px }
+ .srv_path { width: 250px }
+ .srv_timestamp { width: 250px }
+ .srv_addr { width: 450px }
+
+ .id { width: 30px }
+ .bucket_name { width: 365px }
+ .bucket_type { width: 50px }
+ .bucket_params { width: 200px }
+ .bucket_items { width: 630px }
+
+ .df_name { width: 300px }
+ .df_total { width: 150px }
+ .df_avail { width: 150px }
+ .df_used { width: 150px }
+ .df_used_raw { width: 150px }
+ .df_used_raw_rate { width: 150px }
+
+ .rdf_name { width: 200px; }
+ .rdf_obj { width: 75px; }
+ .rdf_total { width: 100px; }
+ .rdf_used { width: 100px; }
+ .rdf_bench { width: 100px; }
+
+ .dev_name { width: 300px; }
+ .dev_param { width: 100px; }
+
+ .mon_name { width: 100px }
+ .mon_url { width: 500px }
+
+ .meters {
+ display: inline-block;
+ margin: 1px;
+ }
+ .meters > .meter {
+ display: block;
+ float: left;
+ border-width: 1px;
+ border-style: solid;
+ margin: 0px 1px 0px 1px;
+ padding: 0px 1px 0px 1px;
+
+ }
+ .meters > .warn {
+ border-color: #d3a200;
+ background-color: rgb(255, 216, 133);
+ }
+ .meters > .fail {
+ border-color: #bb0000;
+ background-color: rgb(250, 135, 135);
+ }
+ .osd { border-color: #a0c0a0; background-color: rgb(252, 248, 248); text-align: center;}
+ .pg { border-color: #c0c0a0; background-color: rgb(255, 255, 251); text-align: right; }
+ .bench { border-color: #a0c0c0; background-color: rgb(255, 250, 250); text-align: right; }
+ .lat_commit { border-color: #a0c0c0; background-color: rgb(255, 250, 250); text-align: right; width: 45px}
+ .lat_apply { border-color: #a0c0c0; background-color: rgb(255, 250, 250); text-align: left; width: 35px}
+ .meta_name { border-color: #c4b890; background-color: #e7dbb6; text-align: left; width: 150px;}
+ .meta_value { border-color: #c6c3ba;background-color: #d4d4d4; text-align: left; width: 480px;}
+
+ .map_grid {
+ display: grid;
+ grid-template-columns: auto auto auto auto auto auto auto auto auto auto;
+ grid-column-gap: 20px;
+ padding-left: 0px;
+ padding-right: 0px;
+ margin: 1px;
+ margin-left: 20px;
+
+ }
+ .map_item {
+ display: inline-grid;
+ border-width: 0px;
+ border-style: solid;
+ margin: 1px 1px 1px 1px;
+ padding: 0px 1px 0px 1px;
+ }
+
+ .map_grid > .ok {
+ color: #80a080;
+ }
+ .map_grid > .warn {
+ color: #d3a200;
+ }
+ .map_grid > .fail {
+ color: #bb0000;
+ }
+
+ .modules {
+ font-family: "LaoSangamMN", Monaco, monospace;
+ font-size: 0.8em;
+ background-color: white;
+ }
+ .module_node {
+ margin-bottom: 2px;
+ display: flex;
+ }
+ .module_name, .node_name {
+ text-align: center;
+ border-width: 0px;
+ border-style: solid;
+ margin: 1px 1px 1px 1px;
+ padding: 0px 1px 0px 1px;
+ min-width: 250px;
+ border-radius: 10px;
+ }
+ .node_name {
+ background-color: #ddd;
+ }
+ .module_grid {
+ display: grid;
+ grid-template-columns: repeat(8, 100px);
+ grid-template-rows: repeat(6, auto);
+ grid-auto-flow: column;
+ grid-column-gap: 10px;
+ padding-left: 0px;
+ padding-right: 0px;
+ margin: 1px;
+ margin-left: 20px;
+ }
+ .module {
+ display: inline-grid;
+ text-align: center;
+ border-width: 0px;
+ border-style: solid;
+ margin: 1px 1px 1px 1px;
+ padding: 0px 1px 0px 1px;
+ min-width: 100px;
+ border-radius: 10px;
+ }
+
+ .module_grid > .on, .service_node > .ok {
+ background-color: #8c8;
+ }
+ .module_grid > .off, .service_node > .off{
+ background-color: #9aa;
+ }
+ .module_grid > .fail, .service_node > .fail {
+ background-color: #a33;
+ }
+ .module_grid > .always, .service_node > .fail {
+ background-color: #282;
+ }
+
+ .tooltiptext {
+ transform: translate(100px);
+ }
+
+ .console {
+ background-color: black;
+ font-family: "Lucida Console", Monaco, monospace;
+ font-size: 0.5em;
+ width: auto;
+ color: #fff;
+ border-radius: 6px;
+ padding: 5px 5px;
+ }
+
+ </style>
+</head>
+<body onload="init()">
+
+<div class="header">
+ <div class="label">Ceph version:</div>
+ <div class="text">{{ ceph_version }}</div>
+ <div class="label">Image:</div>
+ <div class="text">{{ cluster.image }}</div>
+ <div class="label date">generated on: {{ gen_date }}</div>
+</div>
+
+<div class="bar">
+ <div class="bar-centered">
+ <button class="bar-item" onclick="openBar(event, 'status')">Status</button>
+ <button class="bar-item" onclick="openBar(event, 'latency')">Latency</button>
+ <button class="bar-item" onclick="openBar(event, 'crush')">CRUSH Map</button>
+ <button class="bar-item" onclick="openBar(event, 'mondump')">Monitors</button>
+ <button class="bar-item" onclick="openBar(event, 'df')">Pools</button>
+ <button class="bar-item" onclick="openBar(event, 'dfrados')">Rados</button>
+ <button class="bar-item" onclick="openBar(event, 'auth')">Auth list</button>
+ <button class="bar-item" onclick="openBar(event, 'dhealth')">Device Health</button>
+ </div>
+</div>
+
+{% macro status_page(info, id_label) %}
+<div id="{{ id_label }}" class="barcontent">
+ <h5>{{ caller() }}</h5>
+ <hr>
+ <table class="ceph_status">
+ <tr class="node">
+ <td class="status">Cluster status</td>
+ <td class="col_shortmessage">Status summary</td>
+ <td class="col_osd">
+ <div class="osd_group">
+ <div class="item osd">OSDs</div>
+ <div class="item osd">Up</div>
+ <div class="item osd">In</div>
+ <div class="item osd">Remap PGs</div>
+ </div>
+ </td>
+ <td class="col_pgs">
+ <div class="pg_group">
+ <div class="item pg">PGs</div>
+ <div class="item pg">Pools</div>
+ <div class="item pg">Objects</div>
+ <div class="item pg">Data, GB</div>
+ <div class="item pg">Used, GB</div>
+ <div class="item pg">Avail, GB</div>
+ <div class="item pg">Total, GB</div>
+ </div>
+ </td>
+ <td class="col_bench">
+ <div class="bench_group">
+ <div class="item bench">Read, MB/sec</div>
+ <div class="item bench">Write, MB/sec</div>
+ <div class="item bench">Read, op/sec</div>
+ <div class="item bench">Write, op/sec</div>
+ </div>
+ </td>
+ </tr>
+ {% set hdetail = info["health_detail"]["data"] %}
+ {% set cs = info["cluster_status"]["data"] %}
+ {% set osdmap = cs | get_osdmap %}
+ <tr class="node" onclick="toggleClassByID('health_data')" id="health_data_button">
+ <td class="status {{ hdetail["status"] | lower }}">{{ hdetail["status"] }}</td>
+ <td class="col_shortmessage">
+ {% for code,dt in hdetail["checks"].items() %}
+ {{ dt["summary"]["message"] }}<br>
+ {% endfor %}
+ </td>
+ <!-- background: linear-gradient(to right, gray 0% 20%, transparent 20% 100%); -->
+ <td class="col_osd">
+ <div class="osd_group">
+ <div class="item osd">{{ osdmap["num_osds"] }}</div>
+ <div class="item osd">{{ osdmap["num_up_osds"] }}</div>
+ <div class="item osd">{{ osdmap["num_in_osds"] }}</div>
+ <div class="item osd">{{ osdmap["num_remapped_pgs"] }}</div>
+ </div>
+ </td>
+ {% set pgmap = cs["pgmap"] %}
+ <td class="col_pgs">
+ <div class="pg_group">
+ <div class="item pg">{{ pgmap["num_pgs"] }}</div>
+ <div class="item pg">{{ pgmap["num_pools"] }}</div>
+ <div class="item pg">{{ pgmap["num_objects"] }}</div>
+ <div class="item pg">{{ pgmap["data_bytes"] | to_gb }}</div>
+ <div class="item pg">{{ pgmap["bytes_used"] | to_gb }}</div>
+ <div class="item pg">{{ pgmap["bytes_avail"] | to_gb }}</div>
+ <div class="item pg">{{ pgmap["bytes_total"] | to_gb }}</div>
+ </div>
+ </td>
+ <td class="col_bench">
+ <div class="bench_group">
+ {% if "read_bytes_sec" in pgmap %}
+ <div class="item bench">{{ pgmap["read_bytes_sec"] | to_mb }}</div>
+ {% else %}
+ <div class="item bench">0</div>
+ {% endif %}
+ {% if "write_bytes_sec" in pgmap %}
+ <div class="item bench">{{ pgmap["write_bytes_sec"] | to_mb }}</div>
+ {% else %}
+ <div class="item bench">0</div>
+ {% endif %}
+ {% if "read_op_per_sec" in pgmap %}
+ <div class="item bench">{{ pgmap["read_op_per_sec"] }}</div>
+ {% else %}
+ <div class="item bench">0</div>
+ {% endif %}
+ {% if "write_op_per_sec" in pgmap %}
+ <div class="item bench">{{ pgmap["write_op_per_sec"] }}</div>
+ {% else %}
+ <div class="item bench">0</div>
+ {% endif %}
+ </div>
+ </td>
+ </tr>
+ <tr class="collapsable in" id="health_data"><td colspan=3>
+ <table><tbody>
+ {% for code,dt in hdetail["checks"].items() %}
+ <tr>
+ <td class="spacer"></td>
+ <td class="status {{ dt["severity"] | lower }}">{{ dt["severity"] }}</td>
+ <td class="checks_code">{{ code }}</td>
+ <td class="col_longmessage">
+ <table><tbody>
+ {% for detail in dt["detail"] %}
+ <tr><td>{{ detail["message"] }}</td></tr>
+ {% endfor %}
+ </tbody></table>
+ </td>
+ </tr>
+ {% endfor %}
+ </tbody></table>
+ </td></tr>
+ </table>
+ <hr>
+ <!-- Services -->
+ {% set sm = info["cluster_status"]["data"]["servicemap"] %}
+ <h5>Services: {{ sm["services"] | count }} running. Last modification: {{ sm["modified"] }}</h5>
+ <table class="ceph_status">
+ <tr class="node">
+ <td class="srv_name">Name</td>
+ <td class="srv_path">Subpath</td>
+ <td class="srv_timestamp">Start time</td>
+ <td class="srv_addr">Address</td>
+ </tr>
+ {% for name, d1 in sm["services"].items() %}
+ {% if "daemons" in d1 %}
+ {% set d2 = d1["daemons"] %}
+ {% for key, d3 in d2.items() %}
+ {% if key.startswith("rgw.store") %}
+ <tr class="node" onclick="toggleClassByID('{{ name }}_service_data')" id="{{ name }}_service_data_button">
+ <td class="srv_name">{{ name }} ({{ d3["gid"] }})</td>
+ <td class="srv_path">daemons:{{ key }}</td>
+ <td class="srv_timestamp">{{ d3["start_stamp"] }}</td>
+ <td class="srv_addr">{{ d3["addr"] }}</td>
+ </tr>
+ <tr class="collapsable in" id="{{ name}}_service_data"><td colspan=4>
+ <table><tbody>
+ <tr><td class="metadata">
+ {% for mname, mvalue in d3["metadata"].items() %}
+ <div class="meta_group">
+ <div class="item meta_name">{{ mname }}</div>
+ <div class="item meta_value">{{ mvalue }}</div>
+ </div>
+ {% endfor %}
+ </td></tr>
+ </tbody></table>
+ </td></tr>
+ {% endif %}
+ {% endfor %}
+ {% endif %}
+ {% endfor %}
+ </table>
+ <hr>
+ <!-- Modules -->
+ {% set mgrmap = info["cluster_status"]["data"]["mgrmap"] %}
+ {% set mods = mgrmap["modules"] %}
+ {% set avail = mgrmap["available_modules"] %}
+ {% if "always_on_modules" in mgrmap %}
+ {% set always_on = mgrmap["always_on_modules"].values() | list %}
+ {% set always_on = always_on[0] %}
+ {% else %}
+ {% set always_on = [] %}
+ {% endif %}
+ <h5>Modules: {{ mods | count}} active. {{ always_on | count }} always on. {{ avail | count }} available.</h5>
+ <div class="modules">
+ <div class="module_grid">
+ {% for mod in avail %}
+ {% if mod["name"] in always_on %}
+ <div class="module always">{{ mod["name"] }}</div>
+ {% elif mod["name"] in mods %}
+ <div class="module on">{{ mod["name"] }}</div>
+ {% elif not mod["can_run"] %}
+ <div class="module fail tooltip">
+ <div class="module fail">{{ mod["name"] }}</div>
+ <pre class="tooltiptext">{{ mod["error_string"] | linebreaks }}</pre>
+ </div>
+ {% else %}
+ <div class="module">{{ mod["name"] }}</div>
+ {% endif %}
+ {% endfor %}
+ </div>
+ </div>
+ <hr>
+</div>
+{% endmacro %}
+
+<!-- CRUSH MAP -->
+{% macro crush_page(info, id_label) %}
+<div id="{{ id_label }}" class="barcontent">
+ <h5>{{ caller() }}</h5>
+ <hr>
+ {% set cmap = info["crushmap_json"]["data"] %}
+ <button type="button" class="row_button">{{ cmap["tunables"] | length }} tunable parameters</button>
+ <div class="row_content">
+ <table class="ceph_status"><tbody>
+ <tr><td class="metadata">
+ {% for tname, tvalue in cmap["tunables"].items() %}
+ <div class="meta_group">
+ <div class="item meta_name">{{ tname }}</div>
+ <div class="item meta_value">{{ tvalue }}</div>
+ </div>
+ {% endfor %}
+ </td></tr>
+ </tbody></table>
+ </div>
+
+ <button type="button" class="row_button">{{ cmap["devices"] | length }} devices</button>
+ <div class="row_content">
+ <table class="ceph_status"><tbody>
+ <tr><td class="metadata">
+ {% for dev in cmap["devices"] %}
+ <div class="meta_group">
+ <div class="item meta_name">{{ dev["name"] }}</div>
+ <div class="item meta_value">id: {{ dev["id"] }}, class: {{ dev["class"] }}</div>
+ </div>
+ {% endfor %}
+ </td></tr>
+ </tbody></table>
+ </div>
+
+ <button type="button" class="row_button">{{ cmap["types"] | length }} types</button>
+ <div class="row_content">
+ <table class="ceph_status"><tbody>
+ <tr><td class="metadata">
+ {% for dtyp in cmap["types"] %}
+ <div class="meta_group">
+ <div class="item meta_name">type_id: {{ dtyp["type_id"] }}</div>
+ <div class="item meta_value">{{ dtyp["name"] }}</div>
+ </div>
+ {% endfor %}
+ </td></tr>
+ </tbody></table>
+ </div>
+
+ <button type="button" class="row_button">{{ cmap["buckets"] | length }} buckets</button>
+ <div class="row_content">
+ <table class="ceph_status"><tbody>
+ <tr class="node">
+ <td class="id">ID</td>
+ <td class="bucket_name">Bucket name</td>
+ <td class="bucket_type">Type</td>
+ <td class="bucket_params">Weight, algorithm, hash</td>
+ <td class="bucket_items">Items</td>
+ </tr>
+ {% for buck in cmap["buckets"] %}
+ <tr class="node">
+ <td class="id">{{ buck["id"] }}</td>
+ <td class="bucket_name">{{ buck["name"] }}</td>
+ <td class="bucket_type">{{ buck["type_name"] }}</td>
+ <td class="bucket_params">{{ buck["weight"] }}, {{ buck["alg"] }}, {{ buck["hash"] }}</td>
+ <td class="bucket_items">
+ {% for bitem in buck["items"] %}
+ {{ bitem["pos"] }}: {{ bitem["id"] | get_bucket_item_name(cmap) }}, weight {{ bitem["weight"] }}<br>
+ {% endfor %}
+ </td>
+ </tr>
+ {% endfor %}
+ </td></tr>
+ </tbody></table>
+ </div>
+
+ <button type="button" class="row_button">{{ cmap["rules"] | length }} rules</button>
+ <div class="row_content">
+ <table class="ceph_status"><tbody>
+ <tr class="node">
+ <td class="id">ID</td>
+ <td class="bucket_name">Rule name</td>
+ <td class="bucket_type">Type</td>
+ <td class="bucket_params">Min/Max Size</td>
+ <td class="bucket_items">Steps</td>
+ </tr>
+ {% for rule in cmap["rules"] %}
+ <tr class="node">
+ <td class="id">{{ rule["rule_id"] }}</td>
+ <td class="bucket_name">{{ rule["rule_name"] }}</td>
+ <td class="bucket_type">{{ rule["type"] }}</td>
+ <td class="bucket_params">{{ rule["min_size"] }}/{{ rule["max_size"] }}</td>
+ <td class="bucket_items">
+ {% for step in rule["steps"] | get_rule_steps %}
+ {{ step }}<br>
+ {% endfor %}
+ </td>
+ </tr>
+ {% endfor %}
+ </td></tr>
+ </tbody></table>
+ </div>
+</div>
+{% endmacro %}
+
+<!-- Latency -->
+{% macro latency_page(lat, id_label) %}
+<div id="{{ id_label }}" class="barcontent">
+ {% set lat = info["osd_latency_data"]["data"] %}
+ {% set ldelay = lat["delay"] %}
+ {% set ltotal = lat["total"] %}
+ {% set ldata = lat["data"] %}
+ {% set ltable = info["osd_latency_data"]["table"] %}
+ <h5>{{ caller() }}: {{ ltotal }} iterations, {{ ldelay }} delay between iterations</h5>
+ <hr>
+ <table class="ceph_status">
+ {% for osd, llist in ltable.items() %}
+ <tr class="node">
+ {% if osd == "<dev>" %}
+ <td class="status">ODS node (ms)</td>
+ <td class="col_latency">
+ <div class="meters">
+ {% for ii in llist %}
+ <div class="meter lat_commit">Commit</div>
+ <div class="meter lat_apply">Apply</div>
+ {% endfor %}
+ </div>
+ </td>
+ {% else %}
+ <td class="status">{{ osd }}</td>
+ <td class="col_latency">
+ <div class="meters">
+ {% for ii in llist %}
+ <div class="meter lat_commit">{{ ii["commit_latency_ms"] }}</div>
+ <div class="meter lat_apply">{{ ii["apply_latency_ms"] }}</div>
+ {% endfor %}
+ </div>
+ </td>
+ {% endif %}
+ </tr>
+ {% endfor %}
+ </table>
+</div>
+{% endmacro %}
+
+<!-- Mon Dump -->
+{% macro mondump_page(mondump, id_label) %}
+<div id="{{ id_label }}" class="barcontent">
+ {% set mons = info["monmap"]["data"] %}
+ <h5>{{ caller() }} : v{{ mons["min_mon_release"] }}/{{ mons["min_mon_release_name"] }} and higher</h5>
+ <div class="note">Persistent: {{ mons["features"]["persistent"] | join(", ") }}</div>
+ {% if mons["features"]["optional"] | length > 0 %}
+ <div class="note">Optional: {{ mons["features"]["optional"] | join(", ") }}</div>
+ {% else %}
+ <div class="note">Optional: no</div>
+ {% endif %}
+ <hr>
+ <table class="ceph_status">
+ <tr class="node">
+ <td class="id centered">Rank</td>
+ <td class="mon_name">Name</td>
+ <td class="mon_url">Address</td>
+ <td class="mon_url">Public address</td>
+ </tr>
+ {% for mon in mons["mons"] %}
+ <tr class="node">
+ <td class="id centered">{{ mon["rank"] }}</td>
+ <td class="mon_name">{{ mon["name"] }}</td>
+ <td class="mon_url">{{ mon["addr"] }}</td>
+ <td class="mon_url">{{ mon["public_addr"] }}</td>
+ </tr>
+ {% endfor %}
+ </table>
+</div>
+{% endmacro %}
+
+<!-- DF -->
+{% macro df_page(info, id_label) %}
+<div id="{{ id_label }}" class="barcontent">
+ {% set df = info["ceph_df"]["data"] %}
+ <h5>{{ caller() }}</h5>
+ <div class="note">{{ df["stats"]["num_osds"] }} OSD nodes, {{ df["stats"]["num_per_pool_osds"] }} per pool</div>
+ <hr>
+ <table class="ceph_status">
+ <tr class="node">
+ <td class="df_name">Scope</td>
+ <td class="df_total right">Total, GB</td>
+ <td class="df_avail right">Available, GB</td>
+ <td class="df_used right">Used, GB</td>
+ <td class="df_used_raw right">Used raw, GB</td>
+ <td class="df_used_raw_rate right">Raw ratio</td>
+ </tr>
+ <tr class="node">
+ <td class="df_name">All</td>
+ <td class="df_total right">{{ df["stats"]["total_bytes"] | to_gb }}</td>
+ <td class="df_avail right">{{ df["stats"]["total_avail_bytes"] | to_gb }}</td>
+ <td class="df_used right">{{ df["stats"]["total_used_bytes"] | to_gb }}</td>
+ <td class="df_used_raw right">{{ df["stats"]["total_used_raw_bytes"] | to_gb }}</td>
+ <td class="df_used_raw_rate right">{{ "%0.4f" | format(df["stats"]["total_used_raw_ratio"]|float) }}</td>
+ </tr>
+ {% for class, stat in df["stats_by_class"].items() %}
+ <tr class="node">
+ <td class="df_name">{{ class }}</td>
+ <td class="df_total right">{{ stat["total_bytes"] | to_gb }}</td>
+ <td class="df_avail right">{{ stat["total_avail_bytes"] | to_gb }}</td>
+ <td class="df_used right">{{ stat["total_used_bytes"] | to_gb }}</td>
+ <td class="df_used_raw right">{{ stat["total_used_raw_bytes"] | to_gb }}</td>
+ <td class="df_used_raw_rate right">{{ "%0.4f" | format(stat["total_used_raw_ratio"]|float) }}</td>
+ </tr>
+ {% endfor %}
+ </table>
+ <hr>
+ <table class="ceph_status">
+ <tr class="node">
+ <td class="id centered">ID</td>
+ <td class="df_name">Name</td>
+ <td class="df_total centered">Objects</td>
+ <td class="df_avail right">Stored, GB</td>
+ <td class="df_used right">Used, GB</td>
+ <td class="df_used_raw centered">Used, %</td>
+ <td class="df_used_raw_rate right">Max Available, GB</td>
+ <td class="df_total centered">Placement Groups</td>
+ </tr>
+
+ {% for pool in df["pools"] %}
+ {% set pool_stats = pool["id"] | get_pool_stats(info["ceph_pg_dump"]["data"]) %}
+ <tr class="node">
+ <td class="id centered">{{ pool["id"] }}</td>
+ <td class="df_name">{{ pool["name"] }}</td>
+ <td class="df_total centered">{{ pool["stats"]["objects"] }}</td>
+ <td class="df_avail right">{{ pool["stats"]["stored"] | to_gb }}</td>
+ <td class="df_used right">{{ pool["stats"]["bytes_used"] | to_gb }}</td>
+ <td class="df_used_raw centered">{{ "%0.2f" | format(pool["stats"]["percent_used"]|float) }}</td>
+ <td class="df_used_raw_rate right">{{ pool["stats"]["max_avail"] | to_gb }}</td>
+ <td class="df_total centered">{{ pool_stats["num_pg"] }}</td>
+ </tr>
+ {% endfor %}
+ </table>
+
+</div>
+{% endmacro %}
+
+<!-- RADOS DF -->
+{% macro dfrados_page(info, id_label) %}
+<div id="{{ id_label }}" class="barcontent">
+ {% set rdf = info["rados_df"]["data"] %}
+ <h5>{{ caller() }}</h5>
+ <hr>
+ <table class="ceph_status">
+ <tr class="node">
+ <td class="df_name">Stats</td>
+ <td class="df_total right">Total objects</td>
+ <td class="df_avail right">Total used, GB</td>
+ <td class="df_used right">Total Available, GB</td>
+ <td class="df_used_raw right">Total space, GB</td>
+ </tr>
+ <tr class="node">
+ <td class="df_name">Rados DF</td>
+ <td class="df_total right">{{ rdf["total_objects"] }}</td>
+ <td class="df_avail right">{{ rdf["total_used"] | to_gb }}</td>
+ <td class="df_used right">{{ rdf["total_avail"] | to_gb }}</td>
+ <td class="df_used_raw right">{{ rdf["total_space"] | to_gb }}</td>
+ </tr>
+ </table>
+ <hr>
+ <table class="ceph_status">
+ <tr class="node">
+ <td class="id centered">ID</td>
+ <td class="rdf_name">Name</td>
+ <td class="rdf_obj centered">Objects</td>
+ <td class="rdf_obj centered">Clones</td>
+ <td class="rdf_obj centered">Copies</td>
+ <td class="rdf_obj centered">Unfound</td>
+ <td class="rdf_obj centered">Degraded</td>
+ <td class="rdf_obj centered">Missing</td>
+
+ <td class="rdf_used right">Size, GB</td>
+ <td class="rdf_used right">Compressed, GB</td>
+ <td class="rdf_used right">Real, GB</td>
+
+ <td class="rdf_bench right">Read, MB/s</td>
+ <td class="rdf_bench right">Read, IOPS</td>
+
+ <td class="rdf_bench right">Write, MB/s</td>
+ <td class="rdf_bench right">Write, IOPS</td>
+ </tr>
+ {% for pool in rdf["pools"] | sort(attribute='id') %}
+ <tr class="node">
+ <td class="id centered">{{ pool["id"] }}</td>
+ <td class="rdf_name">{{ pool["name"] }}</td>
+ <td class="rdf_obj centered">{{ pool["num_objects"] }}</td>
+ <td class="rdf_obj centered">{{ pool["num_object_clones"] }}</td>
+ <td class="rdf_obj centered">{{ pool["num_object_copies"] }}</td>
+ <td class="rdf_obj centered">{{ pool["num_objects_unfound"] }}</td>
+ <td class="rdf_obj centered">{{ pool["num_objects_degraded"] }}</td>
+ <td class="rdf_obj centered">{{ pool["num_objects_missing_on_primary"] }}</td>
+
+ <td class="rdf_total right">{{ pool["size_bytes"] | to_gb }}</td>
+ <td class="rdf_used right">{{ pool["compress_bytes_used"] | to_gb }}</td>
+ <td class="rdf_used right">{{ pool["compress_under_bytes"] | to_gb }}</td>
+
+ <td class="rdf_bench right">{{ pool["read_bytes"] | to_mb }}</td>
+ <td class="rdf_bench right">{{ pool["read_ops"] }}</td>
+
+ <td class="rdf_bench right">{{ pool["write_bytes"] | to_mb }}</td>
+ <td class="rdf_bench right">{{ pool["write_ops"] }}</td>
+ </tr>
+ {% endfor %}
+ </table>
+ <hr>
+</div>
+{% endmacro %}
+
+<!-- Auth ls -->
+{% macro auth_page(info, id_label) %}
+<div id="{{ id_label }}" class="barcontent">
+ {% set auth = info["ceph_auth_ls"]["data"] %}
+ <h5>{{ caller() }}</h5>
+ <hr>
+ <table class="ceph_status">
+ <tr class="node">
+ <td class="bucket_name">Entity</td>
+ <td class="bucket_items">Caps</td>
+ </tr>
+ {% for ath in auth["auth_dump"] | sort(attribute='entity') %}
+ <tr class="node">
+ <td class="bucket_name">{{ ath["entity"] }}</td>
+ <td class="bucket_items">
+ {% for scope, value in ath["caps"].items() %}
+ {{ scope }}: {{ value }}<br>
+ {% endfor %}
+ </td>
+ </tr>
+ {% endfor %}
+ </table>
+ <hr>
+</div>
+{% endmacro %}
+
+<!-- Device Health -->
+{% macro dhealth_page(info, id_label) %}
+<div id="{{ id_label }}" class="barcontent">
+ {% set dh = info["ceph_health"]["latest"] %}
+ <h5>{{ caller() }}: {{ dh | count }} devices</h5>
+ <div class="note">Data collection timestamp is '{{ info["ceph_health"]['date'] }}'</div>
+ <hr>
+ <table class="ceph_status">
+ <tr class="node">
+ <td class="dev_name">Name</td>
+ <td class="dev_param centered">Device</td>
+ <td class="dev_param centered">Protocol</td>
+ <td class="dev_param centered">Firmware</td>
+ <td class="dev_param centered">Speed<div class='note'>current/max</div></td>
+ <td class="dev_param centered">Block size<div class='note'>physical/logical</div></td>
+ <td class="dev_param centered">Power cycles</td>
+ <td class="dev_param centered">Temperature</td>
+ <td class="dev_param centered">Smart Status</td>
+ <td class="dev_param centered">Smart data</td>
+ </tr>
+ {% for _d, _p in dh.items() | sort(attribute='0') %}
+ <tr class="node">
+ <td class="dev_name">
+ <div class="text">{{ _p['model_name'] }}, {{ _p['serial_number'] }}</div><br>
+ <div class="note">{{ _p['model_family'] }}; {{ _p['sata_version']['string'] }}</div>
+ <div class="note">{{ _d }}</div>
+ </td>
+ <td class="dev_param centered">{{ _p['device']['info_name'] }}</td>
+ <td class="dev_param centered">{{ _p['device']['protocol'] }}</td>
+ <td class="dev_param centered">{{ _p['firmware_version'] }}</td>
+ <td class="dev_param centered">{{ _p['interface_speed']['current']['string'] }} / {{ _p['interface_speed']['max']['string'] }}</td>
+ <td class="dev_param centered">{{ _p['physical_block_size'] }} / {{ _p['logical_block_size'] }}</td>
+ <td class="dev_param centered">{{ _p['power_cycle_count'] }}</td>
+ <td class="dev_param centered">{{ _p['temperature']['current'] }}</td>
+ {% if _p['smart_status']['passed'] %}
+ <td class="dev_param centered" style="color: green">Passed</td>
+ {% else %}
+ <td class="dev_param centered" style="color: red;">Failed</td>
+ {% endif %}
+ <td class="dev_param centered">
+ <div class="cell_button" onclick="toggleClassByID('{{ _d }}_smart_output')" id="{{ _d }}_smart_output_button">Show/Hide</div>
+ </td>
+ </tr>
+ <tr class="collapsable" id="{{ _d }}_smart_output"><td colspan=10>
+ <div class="console">
+ <pre>
+ {% for line in _p['smartctl']['output'] %}
+ {{ line }}
+ {% endfor %}
+ </pre>
+ </div>
+ </td></tr>
+ {% endfor %}
+ </table>
+ <hr>
+</div>
+{% endmacro %}
+
+<!-- ================================= -->
+<!-- Cluster nodes page -->
+{% call status_page(info, "status") %}
+ Cluster status
+{% endcall %}
+
+{% call crush_page(info, "crush") %}
+ CRUSH map
+{% endcall %}
+
+{% call latency_page(info['osd_latency_data'], "latency") %}
+ Quick latency check for all OSDs
+{% endcall %}
+
+{% call mondump_page(info['monmap'], "mondump") %}
+ Ceph monitors
+{% endcall %}
+
+{% call df_page(info, "df") %}
+ Pool list with additional data
+{% endcall %}
+
+{% call dfrados_page(info, "dfrados") %}
+ Rados pools list with additional details
+{% endcall %}
+
+{% call auth_page(info, "auth") %}
+ Anonymized auth list
+{% endcall %}
+
+{% call dhealth_page(info, "dhealth") %}
+ Device health status and S.M.A.R.T. outputs
+{% endcall %}
+</body>
+</html>
\ No newline at end of file
diff --git a/templates/common_scripts.j2 b/templates/common_scripts.j2
index 2ea3614..9418ed6 100644
--- a/templates/common_scripts.j2
+++ b/templates/common_scripts.j2
@@ -32,6 +32,21 @@
}
items[0].className += " active";
+ // Process all row_buttons
+ var coll = document.getElementsByClassName("row_button");
+ var i;
+
+ for (i = 0; i < coll.length; i++) {
+ coll[i].addEventListener("click", function() {
+ this.classList.toggle("row_active");
+ var content = this.nextElementSibling;
+ if (content.style.maxHeight){
+ content.style.maxHeight = null;
+ } else {
+ content.style.maxHeight = content.scrollHeight + "px";
+ }
+ });
+ }
}
function openBar(evt, barName) {
// Declare all variables
diff --git a/templates/common_styles.j2 b/templates/common_styles.j2
index ab1719f..96738aa 100644
--- a/templates/common_styles.j2
+++ b/templates/common_styles.j2
@@ -18,7 +18,7 @@
/* Bar */
.bar{
- background: linear-gradient(to bottom, #126666 0%, #284753 77%);
+ background: linear-gradient(to bottom, #142e46 0%, #214666 77%);
width:100%;
overflow:hidden;
display: inline-block;
@@ -39,7 +39,7 @@
}
.bar .bar-item.active {
- background-color: #328686;
+ background-color: #365e81;
color: white;
}