Merge pull request #42 from elemoine/glusterfs

Ignore gluster transaction in progress errors
diff --git a/collectd/files/plugin/collectd_base.py b/collectd/files/plugin/collectd_base.py
index 28cea12..4a9842a 100644
--- a/collectd/files/plugin/collectd_base.py
+++ b/collectd/files/plugin/collectd_base.py
@@ -151,7 +151,7 @@
         )
         v.dispatch()
 
-    def execute(self, cmd, shell=True, cwd=None):
+    def execute(self, cmd, shell=True, cwd=None, log_error=True):
         """Executes a program with arguments.
 
         Args:
@@ -161,6 +161,8 @@
             True).
             cwd: the directory to change to before running the program
             (default=None).
+            log_error: whether to log an error when the command returned a
+            non-zero status code (default=True).
 
         Returns:
             A tuple containing the standard output and error strings if the
@@ -168,8 +170,10 @@
 
             ("foobar\n", "")
 
-            (None, None) if the command couldn't be executed or returned a
-            non-zero status code
+            (None, "stderr of the command") if the command returned a
+            non-zero status code.
+
+            (None, None) if the command couldn't be executed at all.
         """
         start_time = time.time()
         try:
@@ -190,9 +194,10 @@
         returncode = proc.returncode
 
         if returncode != 0:
-            self.logger.error("Command '%s' failed (return code %d): %s" %
-                              (cmd, returncode, stderr))
-            return (None, None)
+            if log_error:
+                self.logger.error("Command '%s' failed (return code %d): %s" %
+                                  (cmd, returncode, stderr))
+            return (None, stderr)
         if self.debug:
             elapsedtime = time.time() - start_time
             self.logger.info("Command '%s' returned %s in %0.3fs" %
diff --git a/collectd/files/plugin/collectd_glusterfs.py b/collectd/files/plugin/collectd_glusterfs.py
index fbcfc1f..344d5b5 100644
--- a/collectd/files/plugin/collectd_glusterfs.py
+++ b/collectd/files/plugin/collectd_glusterfs.py
@@ -25,6 +25,8 @@
 state_re = re.compile(r'^State: (?P<state>.+)$', re.MULTILINE)
 
 vol_status_re = re.compile(r'\n\s*\n', re.MULTILINE)
+vol_status_transaction_in_progress_re = re.compile(
+    r'Another transaction.*in progress\.')
 vol_block_re = re.compile(r'^-+', re.MULTILINE)
 volume_re = re.compile(r'^Status of volume:\s+(?P<volume>.+)', re.MULTILINE)
 brick_server_re = re.compile(r'^Brick\s*:\s*Brick\s*(?P<peer>[^:]+)',
@@ -104,103 +106,112 @@
             }
 
         # Collect volumes' metrics
-        out, err = self.execute(
-            [GLUSTER_BINARY, 'volume', 'status', 'all', 'detail'],
-            shell=False)
+        cmd = [GLUSTER_BINARY, 'volume', 'status', 'all', 'detail']
+        out, err = self.execute(cmd, shell=False, log_error=False)
         if not out:
-            raise base.CheckException("Failed to execute 'gluster volume'")
-
-        for vol_block in vol_status_re.split(out):
-            volume_m = volume_re.search(vol_block)
-            if not volume_m:
-                continue
-            volume = volume_m.group('volume')
-            for line in vol_block_re.split(vol_block):
-                peer_m = brick_server_re.search(line)
-                if not peer_m:
+            if err and vol_status_transaction_in_progress_re.match(err):
+                # "transaction already in progress" error, we assume volumes
+                # metrics are being collected on another glusterfs node, and
+                # just silently skip the collecting of the volume metrics
+                # this time
+                self.logger.info("Command '%s' failed because of a "
+                                 "transaction is already in progress, "
+                                 "ignore the error" % cmd)
+            else:
+                self.logger.error("Command '%s' failed: %s" % (cmd, err))
+                raise base.CheckException("Failed to execute 'gluster volume'")
+        else:
+            for vol_block in vol_status_re.split(out):
+                volume_m = volume_re.search(vol_block)
+                if not volume_m:
                     continue
                 volume = volume_m.group('volume')
-                peer = peer_m.group('peer')
-                disk_free_m = disk_free_re.search(line)
-                disk_total_m = disk_total_re.search(line)
-                inode_free_m = inode_free_re.search(line)
-                inode_count_m = inode_count_re.search(line)
-                if disk_free_m and disk_total_m:
-                    free = convert_to_bytes(
-                        disk_free_m.group('disk_free'),
-                        disk_free_m.group('unit'))
-                    total = convert_to_bytes(
-                        disk_total_m.group('disk_total'),
-                        disk_total_m.group('unit'))
-                    used = total - free
-                    yield {
-                        'type_instance': 'space_free',
-                        'values': free,
-                        'meta': {
-                            'volume': volume,
-                            'peer': peer,
+                for line in vol_block_re.split(vol_block):
+                    peer_m = brick_server_re.search(line)
+                    if not peer_m:
+                        continue
+                    volume = volume_m.group('volume')
+                    peer = peer_m.group('peer')
+                    disk_free_m = disk_free_re.search(line)
+                    disk_total_m = disk_total_re.search(line)
+                    inode_free_m = inode_free_re.search(line)
+                    inode_count_m = inode_count_re.search(line)
+                    if disk_free_m and disk_total_m:
+                        free = convert_to_bytes(
+                            disk_free_m.group('disk_free'),
+                            disk_free_m.group('unit'))
+                        total = convert_to_bytes(
+                            disk_total_m.group('disk_total'),
+                            disk_total_m.group('unit'))
+                        used = total - free
+                        yield {
+                            'type_instance': 'space_free',
+                            'values': free,
+                            'meta': {
+                                'volume': volume,
+                                'peer': peer,
+                            }
                         }
-                    }
-                    yield {
-                        'type_instance': 'space_percent_free',
-                        'values': free * 100.0 / total,
-                        'meta': {
-                            'volume': volume,
-                            'peer': peer,
+                        yield {
+                            'type_instance': 'space_percent_free',
+                            'values': free * 100.0 / total,
+                            'meta': {
+                                'volume': volume,
+                                'peer': peer,
+                            }
                         }
-                    }
-                    yield {
-                        'type_instance': 'space_used',
-                        'values': used,
-                        'meta': {
-                            'volume': volume,
-                            'peer': peer,
+                        yield {
+                            'type_instance': 'space_used',
+                            'values': used,
+                            'meta': {
+                                'volume': volume,
+                                'peer': peer,
+                            }
                         }
-                    }
-                    yield {
-                        'type_instance': 'space_percent_used',
-                        'values': used * 100.0 / total,
-                        'meta': {
-                            'volume': volume,
-                            'peer': peer,
+                        yield {
+                            'type_instance': 'space_percent_used',
+                            'values': used * 100.0 / total,
+                            'meta': {
+                                'volume': volume,
+                                'peer': peer,
+                            }
                         }
-                    }
-                if inode_free_m and inode_count_m:
-                    free = int(inode_free_m.group('inode_free'))
-                    total = int(inode_count_m.group('inode_count'))
-                    used = total - free
-                    yield {
-                        'type_instance': 'inodes_free',
-                        'values': free,
-                        'meta': {
-                            'volume': volume,
-                            'peer': peer,
+                    if inode_free_m and inode_count_m:
+                        free = int(inode_free_m.group('inode_free'))
+                        total = int(inode_count_m.group('inode_count'))
+                        used = total - free
+                        yield {
+                            'type_instance': 'inodes_free',
+                            'values': free,
+                            'meta': {
+                                'volume': volume,
+                                'peer': peer,
+                            }
                         }
-                    }
-                    yield {
-                        'type_instance': 'inodes_percent_free',
-                        'values': free * 100.0 / total,
-                        'meta': {
-                            'volume': volume,
-                            'peer': peer,
+                        yield {
+                            'type_instance': 'inodes_percent_free',
+                            'values': free * 100.0 / total,
+                            'meta': {
+                                'volume': volume,
+                                'peer': peer,
+                            }
                         }
-                    }
-                    yield {
-                        'type_instance': 'inodes_used',
-                        'values': used,
-                        'meta': {
-                            'volume': volume,
-                            'peer': peer,
+                        yield {
+                            'type_instance': 'inodes_used',
+                            'values': used,
+                            'meta': {
+                                'volume': volume,
+                                'peer': peer,
+                            }
                         }
-                    }
-                    yield {
-                        'type_instance': 'inodes_percent_used',
-                        'values': used * 100.0 / total,
-                        'meta': {
-                            'volume': volume,
-                            'peer': peer,
+                        yield {
+                            'type_instance': 'inodes_percent_used',
+                            'values': used * 100.0 / total,
+                            'meta': {
+                                'volume': volume,
+                                'peer': peer,
+                            }
                         }
-                    }
 
 
 plugin = GlusterfsPlugin(collectd)
@@ -217,6 +228,7 @@
 def read_callback():
     plugin.read_callback()
 
+
 collectd.register_init(init_callback)
 collectd.register_config(config_callback)
 collectd.register_read(read_callback)