Log collector module
New:
- [Done] multiple namespace selector
- [Done] keyword-based pod selector
- [Done] per-pod logs syntax detection and parsing
- [Differed] in-place filtering for shorter logs
- [Done] individual logs timestamp detection
- [Done] Unix time bases Timestamp sorting
- [Done] Single file logs output using common format
- [Done] add all log types from all MOS namespaces and pods
Update:
- resource preparation can be skipped per module
- updated log collection using multiple threads
- new setting LOG_COLLECT_THREADS
Fixes:
- Network MTU fix
- Faster cmd execution on single pod
- Ceph benchmark validations
- Ceph benchmark report sorting
- Daemonset deployment with nodes skipped
- Network tree debugging script
- Tree depth limiter, i.e. stackoverflow prevention
Related-PROD: PROD-36845
Change-Id: Icf229ac62078c6418ab4dbdff12b0d27ed42af1d
diff --git a/cfg_checker/modules/ceph/info.py b/cfg_checker/modules/ceph/info.py
index 2c62018..db3dd75 100644
--- a/cfg_checker/modules/ceph/info.py
+++ b/cfg_checker/modules/ceph/info.py
@@ -313,49 +313,58 @@
self._safe_tools_cmd("rm -f " + _tar_path)
return _json
- def _safe_get_cmd_output_as_json(self, cmd, zipped=False):
- if zipped:
- _buf = self._safe_tools_cmd_zipped_output(cmd)
- else:
- _buf = self._safe_tools_cmd(cmd)
+ @staticmethod
+ def _as_json(buf):
try:
- return json.loads(_buf)
+ return json.loads(buf)
except ValueError as e:
_out = ""
- if len(_buf) > 512:
- _out = _buf[:512]
+ if len(buf) > 512:
+ _out = buf[:512]
_out += "..."
else:
- _out = _buf
+ _out = buf
logger_cli.error(
"\nERROR: failed to parse json: '{}'. Data: '{}'".format(
e,
_out
)
)
- return _buf
+ return buf
+
+ def _safe_get_cmd_output_as_json(self, cmd, zipped=False):
+ if zipped:
+ _buf = self._safe_tools_cmd_zipped_output(cmd)
+ else:
+ _buf = self._safe_tools_cmd(cmd)
+ return self._as_json(_buf)
def _get_tools_pod_name(self):
# get ceph pod
- _names = self.master.kube.get_pod_names_by_partial_name(
+ _pods = self.master.kube.get_pods_by_partial_name(
self.ceph_app_label,
self.ceph_ns
)
- if not _names:
+ # _names = self.master.kube.get_pod_names_by_partial_name(
+ # self.ceph_app_label,
+ # self.ceph_ns
+ # )
+ if not _pods:
raise KubeException(
"Failed to find pod using '{}'".format(self.ceph_app_label)
)
- elif len(_names) > 1:
+ elif len(_pods) > 1:
logger_cli.warning(
"WARNING: Environment has more than one pod "
"with '{}' app: {}".format(
self.ceph_app_label,
- ", ".join(_names)
+ ", ".join([p.metadata.name for p in _pods])
)
)
else:
- logger_cli.debug("... found '{}'".format(_names[0]))
- return _names[0]
+ logger_cli.debug("... found '{}'".format(_pods[0].metadata.name))
+ self.ceph_pod = _pods[0]
+ return _pods[0].metadata.name
def _add_ceph_info_item(self, key, title, data, filename=None):
# handle data
@@ -572,8 +581,7 @@
_health_metrics = {}
_devices = _c("ceph device ls")
_devices = _devices.splitlines()
- _progress = Progress(len(_devices)-1)
- _index = 1
+ cmd_list = []
for device in _devices:
_t = device.split()
_dev = _t[0]
@@ -582,14 +590,31 @@
if _dev == "DEVICE":
continue
- _metric = _cj("ceph device get-health-metrics {}".format(_dev))
+ # _metric = _cj("ceph device get-health-metrics {}".format(_dev))
+ _cmd = "ceph device get-health-metrics {}".format(_dev)
+ cmd_list.append(_cmd)
_dev_name = "{}_{}".format(_osd, _dev)
- _health_metrics[_dev_name] = _metric
+ _health_metrics[_dev_name] = {}
_health_metrics[_dev_name]['node_name'] = _node
_health_metrics[_dev_name]['osd_name'] = _osd
- _progress.write_progress(_index, note=_dev_name)
- _index += 1
- _progress.end()
+ _health_metrics[_dev_name]['cmd'] = _cmd
+
+ results = self.master.exec_cmds_on_pod(
+ self.ceph_pod,
+ cmd_list
+ )
+
+ logger_cli.info("-> Processing results")
+ for _r in results:
+ _cmd = _r[3]
+ _j = self._as_json(_r[2])
+ for _dev_name in _health_metrics.keys():
+ if "cmd" in _health_metrics[_dev_name] and \
+ _health_metrics[_dev_name]["cmd"] == _cmd:
+ _health_metrics[_dev_name].update(_j)
+ _health_metrics[_dev_name].pop("cmd")
+ break
+
self._add_ceph_info_item(
"ceph_health",
"Ceph Health Metrics",
@@ -633,21 +658,29 @@
logger_cli.info(
"-> Gathering OSD configuration ({})".format(_total_osd)
)
- # Shortcuts
- # _c = self._safe_tools_cmd
- _cj = self._safe_get_cmd_output_as_json
- _progress = Progress(_total_osd)
- _idx = 1
- _cfgs = {}
+ cmds = {}
+ cmd_list = []
for _osd in self.ceph_info["ceph_osd_df"]["data"]["nodes"]:
- _progress.write_progress(_idx, note=_osd["name"])
- _cfgs[_osd["name"]] = _cj(
- "ceph config show-with-defaults -f json {}".format(
- _osd["name"]
- )
+ _cmd = "ceph config show-with-defaults -f json {}".format(
+ _osd["name"]
)
- _idx += 1
- _progress.end()
+ cmd_list.append(_cmd)
+ cmds[_osd["name"]] = _cmd
+
+ results = self.master.exec_cmds_on_pod(
+ self.ceph_pod,
+ cmd_list
+ )
+
+ logger_cli.info("-> Processing results")
+ _cfgs = {}
+ for _r in results:
+ _cmd = _r[3]
+ _j = self._as_json(_r[2])
+ for _osd_name in cmds.keys():
+ if cmds[_osd_name] == _cmd:
+ _cfgs[_osd_name] = _j
+ break
# Process configs
_base = {}