Log collector module New: - [Done] multiple namespace selector - [Done] keyword-based pod selector - [Done] per-pod logs syntax detection and parsing - [Differed] in-place filtering for shorter logs - [Done] individual logs timestamp detection - [Done] Unix time bases Timestamp sorting - [Done] Single file logs output using common format - [Done] add all log types from all MOS namespaces and pods Update: - resource preparation can be skipped per module - updated log collection using multiple threads - new setting LOG_COLLECT_THREADS Fixes: - Network MTU fix - Faster cmd execution on single pod - Ceph benchmark validations - Ceph benchmark report sorting - Daemonset deployment with nodes skipped - Network tree debugging script - Tree depth limiter, i.e. stackoverflow prevention Related-PROD: PROD-36845 Change-Id: Icf229ac62078c6418ab4dbdff12b0d27ed42af1d

commit: 0bcf31bb53db3f3f74bb5b90dbe9b2c1b893ee89 [log] [tgz]
author: Alex <osavatieiev@mirantis.com> Tue Mar 29 17:38:58 2022 -0500
committer: Oleksandr Savatieiev <osavatieiev@mirantis.com> Thu May 25 22:42:55 2023 +0000
tree: 29ddd7bcc7dab0dd262c6f58f1cb046411271307
parent: f6563ea4dfe1500029582136c006ceb9f5330e29 [diff] [blame]
diff --git a/cfg_checker/modules/ceph/info.py b/cfg_checker/modules/ceph/info.py
index 2c62018..db3dd75 100644
--- a/cfg_checker/modules/ceph/info.py
+++ b/cfg_checker/modules/ceph/info.py

@@ -313,49 +313,58 @@
         self._safe_tools_cmd("rm -f " + _tar_path)
         return _json
 
-    def _safe_get_cmd_output_as_json(self, cmd, zipped=False):
-        if zipped:
-            _buf = self._safe_tools_cmd_zipped_output(cmd)
-        else:
-            _buf = self._safe_tools_cmd(cmd)
+    @staticmethod
+    def _as_json(buf):
         try:
-            return json.loads(_buf)
+            return json.loads(buf)
         except ValueError as e:
             _out = ""
-            if len(_buf) > 512:
-                _out = _buf[:512]
+            if len(buf) > 512:
+                _out = buf[:512]
                 _out += "..."
             else:
-                _out = _buf
+                _out = buf
             logger_cli.error(
                 "\nERROR: failed to parse json: '{}'. Data: '{}'".format(
                     e,
                     _out
                 )
             )
-            return _buf
+            return buf
+
+    def _safe_get_cmd_output_as_json(self, cmd, zipped=False):
+        if zipped:
+            _buf = self._safe_tools_cmd_zipped_output(cmd)
+        else:
+            _buf = self._safe_tools_cmd(cmd)
+        return self._as_json(_buf)
 
     def _get_tools_pod_name(self):
         # get ceph pod
-        _names = self.master.kube.get_pod_names_by_partial_name(
+        _pods = self.master.kube.get_pods_by_partial_name(
             self.ceph_app_label,
             self.ceph_ns
         )
-        if not _names:
+        # _names = self.master.kube.get_pod_names_by_partial_name(
+        #     self.ceph_app_label,
+        #     self.ceph_ns
+        # )
+        if not _pods:
             raise KubeException(
                 "Failed to find pod using '{}'".format(self.ceph_app_label)
             )
-        elif len(_names) > 1:
+        elif len(_pods) > 1:
             logger_cli.warning(
                 "WARNING: Environment has more than one pod "
                 "with '{}' app: {}".format(
                     self.ceph_app_label,
-                    ", ".join(_names)
+                    ", ".join([p.metadata.name for p in _pods])
                 )
             )
         else:
-            logger_cli.debug("... found '{}'".format(_names[0]))
-        return _names[0]
+            logger_cli.debug("... found '{}'".format(_pods[0].metadata.name))
+        self.ceph_pod = _pods[0]
+        return _pods[0].metadata.name
 
     def _add_ceph_info_item(self, key, title, data, filename=None):
         # handle data
@@ -572,8 +581,7 @@
         _health_metrics = {}
         _devices = _c("ceph device ls")
         _devices = _devices.splitlines()
-        _progress = Progress(len(_devices)-1)
-        _index = 1
+        cmd_list = []
         for device in _devices:
             _t = device.split()
             _dev = _t[0]
@@ -582,14 +590,31 @@
 
             if _dev == "DEVICE":
                 continue
-            _metric = _cj("ceph device get-health-metrics {}".format(_dev))
+            # _metric = _cj("ceph device get-health-metrics {}".format(_dev))
+            _cmd = "ceph device get-health-metrics {}".format(_dev)
+            cmd_list.append(_cmd)
             _dev_name = "{}_{}".format(_osd, _dev)
-            _health_metrics[_dev_name] = _metric
+            _health_metrics[_dev_name] = {}
             _health_metrics[_dev_name]['node_name'] = _node
             _health_metrics[_dev_name]['osd_name'] = _osd
-            _progress.write_progress(_index, note=_dev_name)
-            _index += 1
-        _progress.end()
+            _health_metrics[_dev_name]['cmd'] = _cmd
+
+        results = self.master.exec_cmds_on_pod(
+            self.ceph_pod,
+            cmd_list
+        )
+
+        logger_cli.info("-> Processing results")
+        for _r in results:
+            _cmd = _r[3]
+            _j = self._as_json(_r[2])
+            for _dev_name in _health_metrics.keys():
+                if "cmd" in _health_metrics[_dev_name] and \
+                  _health_metrics[_dev_name]["cmd"] == _cmd:
+                    _health_metrics[_dev_name].update(_j)
+                    _health_metrics[_dev_name].pop("cmd")
+                    break
+
         self._add_ceph_info_item(
             "ceph_health",
             "Ceph Health Metrics",
@@ -633,21 +658,29 @@
         logger_cli.info(
             "-> Gathering OSD configuration ({})".format(_total_osd)
         )
-        # Shortcuts
-        # _c = self._safe_tools_cmd
-        _cj = self._safe_get_cmd_output_as_json
-        _progress = Progress(_total_osd)
-        _idx = 1
-        _cfgs = {}
+        cmds = {}
+        cmd_list = []
         for _osd in self.ceph_info["ceph_osd_df"]["data"]["nodes"]:
-            _progress.write_progress(_idx, note=_osd["name"])
-            _cfgs[_osd["name"]] = _cj(
-                "ceph config show-with-defaults -f json {}".format(
-                    _osd["name"]
-                )
+            _cmd = "ceph config show-with-defaults -f json {}".format(
+                _osd["name"]
             )
-            _idx += 1
-        _progress.end()
+            cmd_list.append(_cmd)
+            cmds[_osd["name"]] = _cmd
+
+        results = self.master.exec_cmds_on_pod(
+            self.ceph_pod,
+            cmd_list
+        )
+
+        logger_cli.info("-> Processing results")
+        _cfgs = {}
+        for _r in results:
+            _cmd = _r[3]
+            _j = self._as_json(_r[2])
+            for _osd_name in cmds.keys():
+                if cmds[_osd_name] == _cmd:
+                    _cfgs[_osd_name] = _j
+                    break
 
         # Process configs
         _base = {}
commit	0bcf31bb53db3f3f74bb5b90dbe9b2c1b893ee89	[log] [tgz]
author	Alex <osavatieiev@mirantis.com>	Tue Mar 29 17:38:58 2022 -0500
committer	Oleksandr Savatieiev <osavatieiev@mirantis.com>	Thu May 25 22:42:55 2023 +0000
tree	29ddd7bcc7dab0dd262c6f58f1cb046411271307
parent	f6563ea4dfe1500029582136c006ceb9f5330e29 [diff] [blame]