Merge "Implement timeout for process execution"
diff --git a/.kitchen.yml b/.kitchen.yml
new file mode 100644
index 0000000..e96d842
--- /dev/null
+++ b/.kitchen.yml
@@ -0,0 +1,47 @@
+---
+driver:
+ name: docker
+ hostname: collectd.ci.local
+ use_sudo: false
+
+provisioner:
+ name: salt_solo
+ salt_install: bootstrap
+ salt_bootstrap_url: https://bootstrap.saltstack.com
+ salt_version: latest
+ require_chef: false
+ log_level: error
+ formula: collectd
+ grains:
+ noservices: True
+ state_top:
+ base:
+ "*":
+ - collectd
+ pillars:
+ top.sls:
+ base:
+ "*":
+ - collectd
+ dependencies:
+ - name: linux
+ repo: git
+ source: https://github.com/salt-formulas/salt-formula-linux
+
+verifier:
+ name: inspec
+ sudo: true
+
+platforms:
+ - name: <%=ENV['PLATFORM'] || 'ubuntu-xenial'%>
+ driver_config:
+ image: <%=ENV['PLATFORM'] || 'trevorj/salty-whales:xenial'%>
+ platform: ubuntu
+
+suites:
+
+ - name: client
+ provisioner:
+ pillars-from-files:
+ collectd.sls: tests/pillar/client.sls
+# vim: ft=yaml sw=2 ts=2 sts=2 tw=125
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..4f34af2
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,40 @@
+sudo: required
+services:
+ - docker
+
+install:
+ - pip install PyYAML
+ - pip install virtualenv
+ - |
+ test -e Gemfile || cat <<EOF > Gemfile
+ source 'https://rubygems.org'
+ gem 'rake'
+ gem 'test-kitchen'
+ gem 'kitchen-docker'
+ gem 'kitchen-inspec'
+ gem 'inspec'
+ gem 'kitchen-salt', :git => 'https://github.com/salt-formulas/kitchen-salt.git'
+ - bundle install
+
+env:
+ - PLATFORM=trevorj/salty-whales:trusty
+ - PLATFORM=trevorj/salty-whales:xenial
+
+
+before_script:
+ - set -o pipefail
+ - make test | tail
+
+script:
+ - test ! -e .kitchen.yml || bundle exec kitchen test -t tests/integration
+
+notifications:
+ webhooks:
+ urls:
+ - https://webhooks.gitter.im/e/6123573504759330786b
+ on_success: change # options: [always|never|change] default: always
+ on_failure: never # options: [always|never|change] default: always
+ on_start: never # options: [always|never|change] default: always
+ on_cancel: never # options: [always|never|change] default: always
+ on_error: never # options: [always|never|change] default: always
+ email: false
diff --git a/collectd/files/plugin/collectd_elasticsearch_node.py b/collectd/files/plugin/collectd_elasticsearch_node.py
index 1ce23fa..2cfc7af 100644
--- a/collectd/files/plugin/collectd_elasticsearch_node.py
+++ b/collectd/files/plugin/collectd_elasticsearch_node.py
@@ -27,21 +27,97 @@
def __init__(self, *args, **kwargs):
super(ElasticsearchNodePlugin, self).__init__(*args, **kwargs)
self.plugin = NAME
+ self._previous = {}
+
+ @staticmethod
+ def _metric(name, values, meta=None):
+ return {'type_instance': name, 'values': values, 'meta': meta or {}}
+
+ def _get_latency(self, name, count, time):
+ cname = '{}_count'.format(name)
+ tname = '{}_time'.format(name)
+ prev_count = self._previous.get(cname)
+ prev_time = self._previous.get(tname)
+ self._previous[cname] = count
+ self._previous[tname] = time
+ if prev_count and prev_time:
+ diff_count = count - prev_count
+ diff_time = time - prev_time
+ return diff_time / diff_count if diff_count > 0 else 0
def itermetrics(self):
stats = self.query_api('_nodes/_local/stats').get(
'nodes', {}).values()[0]
- yield {
- 'type_instance': 'documents',
- 'values': stats['indices']['docs']['count']
- }
- yield {
- 'type_instance': 'documents_deleted',
- 'values': stats['indices']['docs']['deleted']
- }
- # TODO: collectd more metrics
- # See https://www.elastic.co/guide/en/elasticsearch/guide/current/
- # _monitoring_individual_nodes.html
+ indices = stats['indices']
+ yield self._metric('documents', indices['docs']['count'])
+ yield self._metric('documents_deleted', indices['docs']['deleted'])
+ yield self._metric(
+ 'indexing_current', indices['indexing']['index_current'])
+ yield self._metric(
+ 'indexing_failed', indices['indexing']['index_failed'])
+ indexing_latency = self._get_latency(
+ 'indexing', indices['indexing']['index_total'],
+ indices['indexing']['index_time_in_millis'])
+ if indexing_latency:
+ yield self._metric('indexing_latency', indexing_latency)
+ yield self._metric('store_size', indices['store']['size_in_bytes'])
+ fd_open = 0
+ if stats['process']['max_file_descriptors'] > 0:
+ fd_open = 100.0 * stats['process']['open_file_descriptors'] \
+ / stats['process']['max_file_descriptors']
+ yield self._metric('fd_open_percent', fd_open)
+
+ thread_pools = stats['thread_pool']
+ for pool in ('bulk', 'flush', 'search', 'index', 'get'):
+ yield self._metric('thread_pool_queue',
+ thread_pools[pool]['queue'], {'pool': pool})
+ yield self._metric('thread_pool_rejected',
+ thread_pools[pool]['rejected'], {'pool': pool})
+ yield self._metric('thread_pool_completed',
+ thread_pools[pool]['completed'], {'pool': pool})
+ mem = stats['jvm']['mem']
+ yield self._metric('jvm_heap_max', mem['heap_max_in_bytes'])
+ yield self._metric('jvm_heap_used_percent', mem['heap_used_percent'])
+ yield self._metric('jvm_heap_used', mem['heap_used_in_bytes'])
+ for pool, stat in mem['pools'].items():
+ yield self._metric(
+ 'jvm_heap_pool', stat['used_in_bytes'], {'pool': pool})
+ gc = stats['jvm']['gc']
+ for pool, stat in gc['collectors'].items():
+ yield self._metric('jvm_gc_count', stat['collection_count'],
+ {'pool': pool})
+ yield self._metric('jvm_gc_time',
+ stat['collection_time_in_millis'],
+ {'pool': pool})
+
+ search = indices['search']
+ for phase in ('query', 'fetch'):
+ yield self._metric('{}_current'.format(phase),
+ search['{}_current'.format(phase)])
+ latency = self._get_latency(
+ phase,
+ search['{}_total'.format(phase)],
+ search['{}_time_in_millis'.format(phase)])
+ if latency is not None:
+ yield self._metric('{}_latency'.format(phase), latency)
+ yield self._metric('query_count', search['query_total'])
+
+ query = indices['query_cache']
+ yield self._metric('query_cache_size', query['memory_size_in_bytes'])
+ yield self._metric('query_cache_evictions', query['evictions'])
+
+ fielddata = indices['fielddata']
+ yield self._metric('fielddata_size', fielddata['memory_size_in_bytes'])
+ yield self._metric('fielddata_evictions', fielddata['evictions'])
+
+ for operation in ('merges', 'flush', 'refresh'):
+ yield self._metric(operation, indices[operation]['total'])
+ latency = self._get_latency(
+ operation,
+ indices[operation]['total'],
+ indices[operation]['total_time_in_millis'])
+ if latency is not None:
+ yield self._metric('{}_latency'.format(operation), latency)
plugin = ElasticsearchNodePlugin(collectd)
diff --git a/collectd/files/plugin/collectd_openstack.py b/collectd/files/plugin/collectd_openstack.py
index ade9b60..e118332 100644
--- a/collectd/files/plugin/collectd_openstack.py
+++ b/collectd/files/plugin/collectd_openstack.py
@@ -111,13 +111,18 @@
endpoint = item['endpoints'][0]
if self.region and self.region != endpoint['region']:
continue
+ if 'internalURL' not in endpoint and 'publicURL' not in endpoint:
+ self.logger.warning(
+ "Skipping service '{}' with no valid URL".format(
+ endpoint["name"]
+ )
+ )
+ continue
self.service_catalog.append({
'name': item['name'],
'region': endpoint['region'],
- 'service_type': item['type'],
- 'url': endpoint['internalURL'],
- 'admin_url': endpoint['adminURL'],
+ 'url': endpoint.get('internalURL', endpoint.get('publicURL')),
})
self.logger.debug("Got token '%s'" % self.token)
diff --git a/collectd/files/plugin/hypervisor_stats.py b/collectd/files/plugin/hypervisor_stats.py
index 5fc3bdb..7d1696f 100644
--- a/collectd/files/plugin/hypervisor_stats.py
+++ b/collectd/files/plugin/hypervisor_stats.py
@@ -140,6 +140,7 @@
'meta': {
'aggregate': agg,
'aggregate_id': agg_id,
+ 'meta': {'discard_hostname': True}
}
}
# Dispatch the global metrics
@@ -147,6 +148,7 @@
yield {
'type_instance': 'total_{}'.format(k),
'values': v,
+ 'meta': {'discard_hostname': True}
}
plugin = HypervisorStatsPlugin(collectd, PLUGIN_NAME,
diff --git a/collectd/files/plugin/openstack_glance.py b/collectd/files/plugin/openstack_glance.py
index 90bc9f8..efb94be 100644
--- a/collectd/files/plugin/openstack_glance.py
+++ b/collectd/files/plugin/openstack_glance.py
@@ -40,6 +40,19 @@
def itermetrics(self):
+ def default_metrics(suffix=''):
+ ret = {}
+ for name in ('snapshots', 'images'):
+ for visibility in ('public', 'private',
+ 'community', 'shared'):
+ for status in ('active', 'queued', 'saving',
+ 'killed', 'deleted', 'deactivated',
+ 'pending_delete'):
+ key = '%s%s.%s.%s' % (name, suffix,
+ visibility, status)
+ ret[key] = 0
+ return ret
+
def is_snap(d):
return d.get('image_type') == 'snapshot'
@@ -56,6 +69,8 @@
detail=False)
status = self.count_objects_group_by(images_details,
group_by_func=groupby)
+ if len(status) == 0:
+ status = default_metrics()
for s, nb in status.iteritems():
(name, visibility, state) = s.split('.')
yield {
@@ -79,6 +94,8 @@
sizes = self.count_objects_group_by(images_details,
group_by_func=groupby_size,
count_func=count_size_bytes)
+ if len(sizes) == 0:
+ sizes = default_metrics('_size')
for s, nb in sizes.iteritems():
(name, visibility, state) = s.split('.')
yield {
diff --git a/collectd/files/plugin/openstack_neutron.py b/collectd/files/plugin/openstack_neutron.py
index 1d147c7..a297b94 100644
--- a/collectd/files/plugin/openstack_neutron.py
+++ b/collectd/files/plugin/openstack_neutron.py
@@ -75,7 +75,7 @@
yield {
'plugin_instance': 'networks',
'type_instance': 'total',
- 'values': len(status),
+ 'values': len(networks),
'meta': {'discard_hostname': True},
}
diff --git a/tests/pillar/client.sls b/tests/pillar/client.sls
index b970e72..93bd7ad 100644
--- a/tests/pillar/client.sls
+++ b/tests/pillar/client.sls
@@ -1,4 +1,3 @@
-
collectd:
client:
enabled: true
@@ -8,4 +7,7 @@
engine: carbon
host: 127.0.0.1
port: 2023
-
+linux:
+ system:
+ name: hostname
+ domain: domain