Merge pull request #47 from simonpasquier/fix-check-openstack-api-plugin
Fix check_openstack_api plugin
diff --git a/collectd/files/collectd.conf b/collectd/files/collectd.conf
index 0a64b0f..02918f1 100644
--- a/collectd/files/collectd.conf
+++ b/collectd/files/collectd.conf
@@ -40,6 +40,14 @@
ReadThreads {{ client.read_threads }}
{%- endif %}
+{%- if client.write_queue_limit_high is defined %}
+WriteQueueLimitHigh {{ client.write_queue_limit_high}}
+{%- endif %}
+
+{%- if client.write_queue_limit_low is defined %}
+WriteQueueLimitLow {{ client.write_queue_limit_low}}
+{%- endif %}
+
##############################################################################
# Logging #
#----------------------------------------------------------------------------#
diff --git a/collectd/files/plugin/collectd_base.py b/collectd/files/plugin/collectd_base.py
index 4a9842a..4e6eaff 100644
--- a/collectd/files/plugin/collectd_base.py
+++ b/collectd/files/plugin/collectd_base.py
@@ -165,15 +165,12 @@
non-zero status code (default=True).
Returns:
- A tuple containing the standard output and error strings if the
- program execution has been successful.
+ A tuple containing the return code, the standard output and the
+ standard error if the program has been executed.
- ("foobar\n", "")
+ (0, "foobar\n", "")
- (None, "stderr of the command") if the command returned a
- non-zero status code.
-
- (None, None) if the command couldn't be executed at all.
+ (-1, None, None) if the program couldn't be executed at all.
"""
start_time = time.time()
try:
@@ -189,24 +186,19 @@
except Exception as e:
self.logger.error("Cannot execute command '%s': %s : %s" %
(cmd, str(e), traceback.format_exc()))
- return (None, None)
+ return (-1, None, None)
returncode = proc.returncode
- if returncode != 0:
- if log_error:
- self.logger.error("Command '%s' failed (return code %d): %s" %
- (cmd, returncode, stderr))
- return (None, stderr)
+ if returncode != 0 and log_error:
+ self.logger.error("Command '%s' failed (return code %d): %s" %
+ (cmd, returncode, stderr))
if self.debug:
elapsedtime = time.time() - start_time
self.logger.info("Command '%s' returned %s in %0.3fs" %
(cmd, returncode, elapsedtime))
- if not stdout and self.debug:
- self.logger.info("Command '%s' returned no output!", cmd)
-
- return (stdout, stderr)
+ return (returncode, stdout, stderr)
def execute_to_json(self, *args, **kwargs):
"""Executes a program and decodes the output as a JSON string.
@@ -217,12 +209,12 @@
A Python object or
None if the execution of the program or JSON decoding fails.
"""
- outputs = self.execute(*args, **kwargs)
- if outputs:
+ (retcode, out, err) = self.execute(*args, **kwargs)
+ if retcode == 0:
try:
- return json.loads(outputs[0])
+ return json.loads(out)
except ValueError as e:
- self.logger.error("{}: document: '{}'".format(e, outputs[0]))
+ self.logger.error("{}: document: '{}'".format(e, out))
@staticmethod
def restore_sigchld():
diff --git a/collectd/files/plugin/collectd_glusterfs.py b/collectd/files/plugin/collectd_glusterfs.py
index 344d5b5..9b03e8f 100644
--- a/collectd/files/plugin/collectd_glusterfs.py
+++ b/collectd/files/plugin/collectd_glusterfs.py
@@ -59,9 +59,9 @@
def itermetrics(self):
# Collect peers' metrics
- out, err = self.execute([GLUSTER_BINARY, 'peer', 'status'],
- shell=False)
- if not out:
+ retcode, out, err = self.execute([GLUSTER_BINARY, 'peer', 'status'],
+ shell=False)
+ if retcode != 0:
raise base.CheckException("Failed to execute 'gluster peer'")
total = 0
@@ -107,8 +107,8 @@
# Collect volumes' metrics
cmd = [GLUSTER_BINARY, 'volume', 'status', 'all', 'detail']
- out, err = self.execute(cmd, shell=False, log_error=False)
- if not out:
+ retcode, out, err = self.execute(cmd, shell=False, log_error=False)
+ if retcode != 0:
if err and vol_status_transaction_in_progress_re.match(err):
# "transaction already in progress" error, we assume volumes
# metrics are being collected on another glusterfs node, and
diff --git a/collectd/files/plugin/collectd_pacemaker.py b/collectd/files/plugin/collectd_pacemaker.py
index 682c100..87dc470 100644
--- a/collectd/files/plugin/collectd_pacemaker.py
+++ b/collectd/files/plugin/collectd_pacemaker.py
@@ -73,9 +73,9 @@
return 1
return 0
- out, err = self.execute([self.crm_mon_binary, '--as-xml', '-r', '-f'],
- shell=False)
- if not out:
+ retcode, out, err = self.execute(
+ [self.crm_mon_binary, '--as-xml', '-r', '-f'], shell=False)
+ if retcode != 0:
raise base.CheckException(
"Failed to execute crm_mon '{}'".format(err))
diff --git a/collectd/files/plugin/hypervisor_stats.py b/collectd/files/plugin/hypervisor_stats.py
index d03f7ae..ba6e050 100644
--- a/collectd/files/plugin/hypervisor_stats.py
+++ b/collectd/files/plugin/hypervisor_stats.py
@@ -42,21 +42,35 @@
if 'cpu_ratio' not in self.extra_config:
self.logger.warning('CpuAllocationRatio parameter not set')
- def dispatch_value(self, name, value, host=None):
+ def dispatch_value(self, name, value, meta=None):
v = collectd.Values(
plugin=PLUGIN_NAME,
type='gauge',
type_instance=name,
interval=INTERVAL,
# w/a for https://github.com/collectd/collectd/issues/716
- meta={'0': True},
+ meta=meta or {'0': True},
values=[value]
)
- if host:
- v.host = host
v.dispatch()
def collect(self):
+ nova_aggregates = {}
+ r = self.get('nova', 'os-aggregates')
+ if not r:
+ self.logger.warning("Could not get nova aggregates")
+ else:
+ aggregates_list = r.json().get('aggregates', [])
+ for agg in aggregates_list:
+ nova_aggregates[agg['name']] = {
+ 'id': agg['id'],
+ 'hosts': agg['hosts'],
+ 'metrics': {'free_vcpus': 0},
+ }
+ nova_aggregates[agg['name']]['metrics'].update(
+ {v: 0 for v in self.VALUE_MAP.values()}
+ )
+
r = self.get('nova', 'os-hypervisors/detail')
if not r:
self.logger.warning("Could not get hypervisor statistics")
@@ -69,14 +83,48 @@
# remove domain name and keep only the hostname portion
host = stats['hypervisor_hostname'].split('.')[0]
for k, v in self.VALUE_MAP.iteritems():
- self.dispatch_value(v, stats.get(k, 0), host)
- total_stats[v] += stats.get(k, 0)
+ m_val = stats.get(k, 0)
+ self.dispatch_value(v, m_val, {'host': host})
+ total_stats[v] += m_val
+ for agg in nova_aggregates.keys():
+ agg_hosts = nova_aggregates[agg]['hosts']
+ if stats['hypervisor_hostname'] in agg_hosts:
+ nova_aggregates[agg]['metrics'][v] += m_val
if 'cpu_ratio' in self.extra_config:
+ m_vcpus = stats.get('vcpus', 0)
+ m_vcpus_used = stats.get('vcpus_used', 0)
free = (int(self.extra_config['cpu_ratio'] *
- stats.get('vcpus', 0))) - stats.get('vcpus_used', 0)
- self.dispatch_value('free_vcpus', free, host)
+ m_vcpus)) - m_vcpus_used
+ self.dispatch_value('free_vcpus', free, {'host': host})
total_stats['free_vcpus'] += free
+ for agg in nova_aggregates.keys():
+ agg_hosts = nova_aggregates[agg]['hosts']
+ if stats['hypervisor_hostname'] in agg_hosts:
+ free = ((int(self.extra_config['cpu_ratio'] *
+ m_vcpus)) -
+ m_vcpus_used)
+ nova_aggregates[agg]['metrics']['free_vcpus'] += free
+ # Dispatch the aggregate metrics
+ for agg in nova_aggregates.keys():
+ agg_id = nova_aggregates[agg]['id']
+ agg_total_free_ram = (
+ nova_aggregates[agg]['metrics']['free_ram_MB'] +
+ nova_aggregates[agg]['metrics']['used_ram_MB']
+ )
+ # Only emit metric when value is > 0
+ # If this is not the case, (for instance when no host
+ # in aggregate), this requires the corresponding alarms to
+ # have a 'skip' no_data_policy, so as not to be triggered
+ if agg_total_free_ram > 0:
+ nova_aggregates[agg]['metrics']['free_ram_percent'] = round(
+ (100.0 * nova_aggregates[agg]['metrics']['free_ram_MB']) /
+ agg_total_free_ram,
+ 2)
+ for k, v in nova_aggregates[agg]['metrics'].iteritems():
+ self.dispatch_value('aggregate_{}'.format(k), v,
+ {'aggregate': agg,
+ 'aggregate_id': agg_id})
# Dispatch the global metrics
for k, v in total_stats.iteritems():
self.dispatch_value('total_{}'.format(k), v)
diff --git a/metadata/service/client/init.yml b/metadata/service/client/init.yml
index 6ef1257..0ed4a04 100644
--- a/metadata/service/client/init.yml
+++ b/metadata/service/client/init.yml
@@ -7,4 +7,6 @@
client:
enabled: true
read_interval: 60
+ write_queue_limit_high: 10000
+ write_queue_limit_low: 10000
use_fqdn: true
diff --git a/metadata/service/remote_client/cluster.yml b/metadata/service/remote_client/cluster.yml
index b1d8d34..238f8b0 100644
--- a/metadata/service/remote_client/cluster.yml
+++ b/metadata/service/remote_client/cluster.yml
@@ -8,5 +8,7 @@
enabled: true
read_interval: 10
read_threads: 10
+ write_queue_limit_high: 10000
+ write_queue_limit_low: 10000
use_fqdn: false
automatic_starting: false