Merge "Implement timeout for process execution"
diff --git a/.kitchen.yml b/.kitchen.yml
new file mode 100644
index 0000000..e96d842
--- /dev/null
+++ b/.kitchen.yml
@@ -0,0 +1,47 @@
+---
+driver:
+  name: docker
+  hostname: collectd.ci.local
+  use_sudo: false
+
+provisioner:
+  name: salt_solo
+  salt_install: bootstrap
+  salt_bootstrap_url: https://bootstrap.saltstack.com
+  salt_version: latest
+  require_chef: false
+  log_level: error
+  formula: collectd
+  grains:
+    noservices: True
+  state_top:
+    base:
+      "*":
+        - collectd
+  pillars:
+    top.sls:
+      base:
+        "*":
+          - collectd
+  dependencies:
+    - name: linux
+      repo: git
+      source: https://github.com/salt-formulas/salt-formula-linux
+
+verifier:
+  name: inspec
+  sudo: true
+
+platforms:
+  - name: <%=ENV['PLATFORM'] || 'ubuntu-xenial'%>
+    driver_config:
+      image: <%=ENV['PLATFORM'] || 'trevorj/salty-whales:xenial'%>
+      platform: ubuntu
+
+suites:
+
+  - name: client
+    provisioner:
+      pillars-from-files:
+        collectd.sls: tests/pillar/client.sls
+# vim: ft=yaml sw=2 ts=2 sts=2 tw=125
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..4f34af2
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,40 @@
+sudo: required
+services:
+  - docker
+
+install:
+  - pip install PyYAML
+  - pip install virtualenv
+  - |
+    test -e Gemfile || cat <<EOF > Gemfile
+    source 'https://rubygems.org'
+    gem 'rake'
+    gem 'test-kitchen'
+    gem 'kitchen-docker'
+    gem 'kitchen-inspec'
+    gem 'inspec'
+    gem 'kitchen-salt', :git => 'https://github.com/salt-formulas/kitchen-salt.git'
+  - bundle install
+
+env:
+    - PLATFORM=trevorj/salty-whales:trusty
+    - PLATFORM=trevorj/salty-whales:xenial
+
+
+before_script:
+  - set -o pipefail
+  - make test | tail
+
+script:
+  - test ! -e .kitchen.yml || bundle exec kitchen test -t tests/integration
+
+notifications:
+  webhooks:
+    urls:
+      - https://webhooks.gitter.im/e/6123573504759330786b
+    on_success: change  # options: [always|never|change] default: always
+    on_failure: never  # options: [always|never|change] default: always
+    on_start: never     # options: [always|never|change] default: always
+    on_cancel: never    # options: [always|never|change] default: always
+    on_error: never    # options: [always|never|change] default: always
+  email: false
diff --git a/collectd/files/plugin/collectd_elasticsearch_node.py b/collectd/files/plugin/collectd_elasticsearch_node.py
index 1ce23fa..2cfc7af 100644
--- a/collectd/files/plugin/collectd_elasticsearch_node.py
+++ b/collectd/files/plugin/collectd_elasticsearch_node.py
@@ -27,21 +27,97 @@
     def __init__(self, *args, **kwargs):
         super(ElasticsearchNodePlugin, self).__init__(*args, **kwargs)
         self.plugin = NAME
+        self._previous = {}
+
+    @staticmethod
+    def _metric(name, values, meta=None):
+        return {'type_instance': name, 'values': values, 'meta': meta or {}}
+
+    def _get_latency(self, name, count, time):
+        cname = '{}_count'.format(name)
+        tname = '{}_time'.format(name)
+        prev_count = self._previous.get(cname)
+        prev_time = self._previous.get(tname)
+        self._previous[cname] = count
+        self._previous[tname] = time
+        if prev_count and prev_time:
+            diff_count = count - prev_count
+            diff_time = time - prev_time
+            return diff_time / diff_count if diff_count > 0 else 0
 
     def itermetrics(self):
         stats = self.query_api('_nodes/_local/stats').get(
             'nodes', {}).values()[0]
-        yield {
-            'type_instance': 'documents',
-            'values': stats['indices']['docs']['count']
-        }
-        yield {
-            'type_instance': 'documents_deleted',
-            'values': stats['indices']['docs']['deleted']
-        }
-        # TODO: collectd more metrics
-        # See https://www.elastic.co/guide/en/elasticsearch/guide/current/
-        # _monitoring_individual_nodes.html
+        indices = stats['indices']
+        yield self._metric('documents', indices['docs']['count'])
+        yield self._metric('documents_deleted', indices['docs']['deleted'])
+        yield self._metric(
+            'indexing_current', indices['indexing']['index_current'])
+        yield self._metric(
+            'indexing_failed', indices['indexing']['index_failed'])
+        indexing_latency = self._get_latency(
+            'indexing', indices['indexing']['index_total'],
+            indices['indexing']['index_time_in_millis'])
+        if indexing_latency:
+            yield self._metric('indexing_latency', indexing_latency)
+        yield self._metric('store_size', indices['store']['size_in_bytes'])
+        fd_open = 0
+        if stats['process']['max_file_descriptors'] > 0:
+            fd_open = 100.0 * stats['process']['open_file_descriptors'] \
+                / stats['process']['max_file_descriptors']
+        yield self._metric('fd_open_percent', fd_open)
+
+        thread_pools = stats['thread_pool']
+        for pool in ('bulk', 'flush', 'search', 'index', 'get'):
+            yield self._metric('thread_pool_queue',
+                               thread_pools[pool]['queue'], {'pool': pool})
+            yield self._metric('thread_pool_rejected',
+                               thread_pools[pool]['rejected'], {'pool': pool})
+            yield self._metric('thread_pool_completed',
+                               thread_pools[pool]['completed'], {'pool': pool})
+        mem = stats['jvm']['mem']
+        yield self._metric('jvm_heap_max', mem['heap_max_in_bytes'])
+        yield self._metric('jvm_heap_used_percent', mem['heap_used_percent'])
+        yield self._metric('jvm_heap_used', mem['heap_used_in_bytes'])
+        for pool, stat in mem['pools'].items():
+            yield self._metric(
+                'jvm_heap_pool', stat['used_in_bytes'], {'pool': pool})
+        gc = stats['jvm']['gc']
+        for pool, stat in gc['collectors'].items():
+            yield self._metric('jvm_gc_count', stat['collection_count'],
+                               {'pool': pool})
+            yield self._metric('jvm_gc_time',
+                               stat['collection_time_in_millis'],
+                               {'pool': pool})
+
+        search = indices['search']
+        for phase in ('query', 'fetch'):
+            yield self._metric('{}_current'.format(phase),
+                               search['{}_current'.format(phase)])
+            latency = self._get_latency(
+                phase,
+                search['{}_total'.format(phase)],
+                search['{}_time_in_millis'.format(phase)])
+            if latency is not None:
+                yield self._metric('{}_latency'.format(phase), latency)
+        yield self._metric('query_count', search['query_total'])
+
+        query = indices['query_cache']
+        yield self._metric('query_cache_size', query['memory_size_in_bytes'])
+        yield self._metric('query_cache_evictions', query['evictions'])
+
+        fielddata = indices['fielddata']
+        yield self._metric('fielddata_size', fielddata['memory_size_in_bytes'])
+        yield self._metric('fielddata_evictions', fielddata['evictions'])
+
+        for operation in ('merges', 'flush', 'refresh'):
+            yield self._metric(operation, indices[operation]['total'])
+            latency = self._get_latency(
+                operation,
+                indices[operation]['total'],
+                indices[operation]['total_time_in_millis'])
+            if latency is not None:
+                yield self._metric('{}_latency'.format(operation), latency)
 
 
 plugin = ElasticsearchNodePlugin(collectd)
diff --git a/collectd/files/plugin/collectd_openstack.py b/collectd/files/plugin/collectd_openstack.py
index ade9b60..e118332 100644
--- a/collectd/files/plugin/collectd_openstack.py
+++ b/collectd/files/plugin/collectd_openstack.py
@@ -111,13 +111,18 @@
             endpoint = item['endpoints'][0]
             if self.region and self.region != endpoint['region']:
                 continue
+            if 'internalURL' not in endpoint and 'publicURL' not in endpoint:
+                self.logger.warning(
+                    "Skipping service '{}' with no valid URL".format(
+                        endpoint["name"]
+                    )
+                )
+                continue
 
             self.service_catalog.append({
                 'name': item['name'],
                 'region': endpoint['region'],
-                'service_type': item['type'],
-                'url': endpoint['internalURL'],
-                'admin_url': endpoint['adminURL'],
+                'url': endpoint.get('internalURL', endpoint.get('publicURL')),
             })
 
         self.logger.debug("Got token '%s'" % self.token)
diff --git a/collectd/files/plugin/hypervisor_stats.py b/collectd/files/plugin/hypervisor_stats.py
index 5fc3bdb..7d1696f 100644
--- a/collectd/files/plugin/hypervisor_stats.py
+++ b/collectd/files/plugin/hypervisor_stats.py
@@ -140,6 +140,7 @@
                     'meta': {
                         'aggregate': agg,
                         'aggregate_id': agg_id,
+                        'meta': {'discard_hostname': True}
                     }
                 }
         # Dispatch the global metrics
@@ -147,6 +148,7 @@
             yield {
                 'type_instance': 'total_{}'.format(k),
                 'values': v,
+                'meta': {'discard_hostname': True}
             }
 
 plugin = HypervisorStatsPlugin(collectd, PLUGIN_NAME,
diff --git a/collectd/files/plugin/openstack_glance.py b/collectd/files/plugin/openstack_glance.py
index 90bc9f8..efb94be 100644
--- a/collectd/files/plugin/openstack_glance.py
+++ b/collectd/files/plugin/openstack_glance.py
@@ -40,6 +40,19 @@
 
     def itermetrics(self):
 
+        def default_metrics(suffix=''):
+            ret = {}
+            for name in ('snapshots', 'images'):
+                for visibility in ('public', 'private',
+                                   'community', 'shared'):
+                    for status in ('active', 'queued', 'saving',
+                                   'killed', 'deleted', 'deactivated',
+                                   'pending_delete'):
+                        key = '%s%s.%s.%s' % (name, suffix,
+                                              visibility, status)
+                        ret[key] = 0
+            return ret
+
         def is_snap(d):
             return d.get('image_type') == 'snapshot'
 
@@ -56,6 +69,8 @@
                                           detail=False)
         status = self.count_objects_group_by(images_details,
                                              group_by_func=groupby)
+        if len(status) == 0:
+            status = default_metrics()
         for s, nb in status.iteritems():
             (name, visibility, state) = s.split('.')
             yield {
@@ -79,6 +94,8 @@
         sizes = self.count_objects_group_by(images_details,
                                             group_by_func=groupby_size,
                                             count_func=count_size_bytes)
+        if len(sizes) == 0:
+            sizes = default_metrics('_size')
         for s, nb in sizes.iteritems():
             (name, visibility, state) = s.split('.')
             yield {
diff --git a/collectd/files/plugin/openstack_neutron.py b/collectd/files/plugin/openstack_neutron.py
index 1d147c7..a297b94 100644
--- a/collectd/files/plugin/openstack_neutron.py
+++ b/collectd/files/plugin/openstack_neutron.py
@@ -75,7 +75,7 @@
         yield {
             'plugin_instance': 'networks',
             'type_instance': 'total',
-            'values': len(status),
+            'values': len(networks),
             'meta': {'discard_hostname': True},
         }
 
diff --git a/tests/pillar/client.sls b/tests/pillar/client.sls
index b970e72..93bd7ad 100644
--- a/tests/pillar/client.sls
+++ b/tests/pillar/client.sls
@@ -1,4 +1,3 @@
-
 collectd:
   client:
     enabled: true
@@ -8,4 +7,7 @@
         engine: carbon
         host: 127.0.0.1
         port: 2023
-
+linux:
+  system:
+    name: hostname
+    domain: domain