Log collector module
New:
- [Done] multiple namespace selector
- [Done] keyword-based pod selector
- [Done] per-pod logs syntax detection and parsing
- [Differed] in-place filtering for shorter logs
- [Done] individual logs timestamp detection
- [Done] Unix time bases Timestamp sorting
- [Done] Single file logs output using common format
- [Done] add all log types from all MOS namespaces and pods
Update:
- resource preparation can be skipped per module
- updated log collection using multiple threads
- new setting LOG_COLLECT_THREADS
Fixes:
- Network MTU fix
- Faster cmd execution on single pod
- Ceph benchmark validations
- Ceph benchmark report sorting
- Daemonset deployment with nodes skipped
- Network tree debugging script
- Tree depth limiter, i.e. stackoverflow prevention
Related-PROD: PROD-36845
Change-Id: Icf229ac62078c6418ab4dbdff12b0d27ed42af1d
diff --git a/cfg_checker/common/kube_utils.py b/cfg_checker/common/kube_utils.py
index f4c38ef..e6b9922 100644
--- a/cfg_checker/common/kube_utils.py
+++ b/cfg_checker/common/kube_utils.py
@@ -11,6 +11,7 @@
from kubernetes import client as kclient, config as kconfig, watch
from kubernetes.stream import stream
from kubernetes.client.rest import ApiException
+from urllib3.exceptions import MaxRetryError
from time import time, sleep
from cfg_checker.common import logger, logger_cli
@@ -965,7 +966,11 @@
_svc
)
- def get_pod_logs(self, podname, ns):
+ def list_namespaces(self):
+ return self.CoreV1.list_namespace()
+
+ @retry(ApiException, initial_wait=2)
+ def get_pod_logs(self, podname, container, ns, tail_lines=50):
# Params
# read log of the specified Pod # noqa: E501 This method makes a synchronous HTTP request by default. To make an asynchronous HTTP request, please pass async_req=True
@@ -1022,10 +1027,23 @@
# If the method is called asynchronously, returns the request
# thread.
- return self.CoreV1.read_namespaced_pod_log(
- podname,
- ns,
- # timestamps=True,
- tail_lines=50,
- # pretty=True
- )
+ try:
+ return self.CoreV1.read_namespaced_pod_log(
+ name=podname,
+ namespace=ns,
+ container=container,
+ timestamps=True,
+ tail_lines=tail_lines,
+ # pretty=True,
+ _request_timeout=(1, 5)
+ )
+ except MaxRetryError as e:
+ logger_cli.warning(
+ "WARNING: Failed to retrieve log {}/{}:{}:\n{}".format(
+ ns,
+ podname,
+ container,
+ e.reason
+ )
+ )
+ return ""