Merge "Implement timeout for process execution"
diff --git a/collectd/files/plugin/collectd_base.py b/collectd/files/plugin/collectd_base.py
index 36c0060..4a61168 100644
--- a/collectd/files/plugin/collectd_base.py
+++ b/collectd/files/plugin/collectd_base.py
@@ -22,6 +22,8 @@
 import traceback
 
 
+TIMEOUT_BIN = '/usr/bin/timeout'
+
 INTERVAL = 10
 
 
@@ -167,7 +169,8 @@
         )
         v.dispatch()
 
-    def execute(self, cmd, shell=True, cwd=None, log_error=True):
+    def execute(self, cmd, shell=True, cwd=None, log_error=True,
+                signal='TERM'):
         """Executes a program with arguments.
 
         Args:
@@ -179,6 +182,8 @@
             (default=None).
             log_error: whether to log an error when the command returned a
             non-zero status code (default=True).
+            signal: the signal used to kill the command if the timeout occurs
+            (default TERM).
 
         Returns:
             A tuple containing the return code, the standard output and the
@@ -189,9 +194,12 @@
             (-1, None, None) if the program couldn't be executed at all.
         """
         start_time = time.time()
+        full_cmd = [TIMEOUT_BIN, '-k', '1', '-s', signal, str(self.timeout)]
+        full_cmd.extend(cmd)
+
         try:
             proc = subprocess.Popen(
-                cmd,
+                full_cmd,
                 cwd=cwd,
                 shell=shell,
                 stdout=subprocess.PIPE,
@@ -201,18 +209,30 @@
             stdout = stdout.rstrip('\n')
         except Exception as e:
             self.logger.error("Cannot execute command '%s': %s : %s" %
-                              (cmd, str(e), traceback.format_exc()))
+                              (full_cmd, str(e), traceback.format_exc()))
             return (-1, None, None)
 
         returncode = proc.returncode
 
-        if returncode != 0 and log_error:
-            self.logger.error("Command '%s' failed (return code %d): %s" %
-                              (cmd, returncode, stderr))
+        if returncode != 0:
+            # timeout command returns usually 124 (TERM) or 137 (KILL) when the
+            # timeout occurs.
+            # But for some reason, python subprocess rewrites the return
+            # code with the (negative) signal sent when the the signal is not
+            # catched by the process.
+            if returncode == 124 or returncode < 0:
+                stderr = 'timeout {}s'.format(self.timeout)
+                msg = "Command '{}' timeout {}s".format(cmd, self.timeout)
+            else:
+                msg = "Command '{}' failed (return code {}): {}".format(
+                    cmd, returncode, stderr)
+
+            if log_error:
+                self.logger.error(msg)
         if self.debug:
             elapsedtime = time.time() - start_time
             self.logger.info("Command '%s' returned %s in %0.3fs" %
-                             (cmd, returncode, elapsedtime))
+                             (full_cmd, returncode, elapsedtime))
 
         return (returncode, stdout, stderr)
 
@@ -273,6 +293,7 @@
     def shutdown_callback(self):
         pass
 
+
 class CephBase(Base):
 
     def __init__(self, *args, **kwargs):
diff --git a/collectd/files/plugin/collectd_docker_info.py b/collectd/files/plugin/collectd_docker_info.py
index e0589f1..08b8ca4 100644
--- a/collectd/files/plugin/collectd_docker_info.py
+++ b/collectd/files/plugin/collectd_docker_info.py
@@ -31,6 +31,7 @@
     def __init__(self, *args, **kwargs):
         super(DockerInfoPlugin, self).__init__(*args, **kwargs)
         self.plugin = NAME
+        self.timeout = 3
 
     def itermetrics(self):
         cmd = [DOCKER_BINARY, 'info', '-f', "{{ json .}}"]
diff --git a/collectd/files/plugin/collectd_k8s_kubectl_get.py b/collectd/files/plugin/collectd_k8s_kubectl_get.py
index 1333120..1d91b5e 100644
--- a/collectd/files/plugin/collectd_k8s_kubectl_get.py
+++ b/collectd/files/plugin/collectd_k8s_kubectl_get.py
@@ -36,6 +36,7 @@
         self.polling_interval = INTERVAL
         self.resources = []
         self._get_nodes = False
+        self.timeout = 10
 
     def shutdown_callback(self):
         for tid, t in self._threads.items():
@@ -58,7 +59,9 @@
         def kubectl_poller():
             cmd = [KUBECTL_BINARY, 'get', '-o', 'json', resource]
             data = self.execute_to_json(cmd, shell=False, log_error=True)
-            return data.get('items', [])
+            if data:
+                return data.get('items', [])
+            return []
 
         if resource not in self._threads:
             t = base.AsyncPoller(self.collectd,