Improve monitoring
- Increase polling interval
- Check expected content
- Rename metric
Change-Id: I0f5265ad31434d73fc188f129d24bb20f077b8be
diff --git a/etcd/meta/collectd.yml b/etcd/meta/collectd.yml
index 6522147..3d115d9 100644
--- a/etcd/meta/collectd.yml
+++ b/etcd/meta/collectd.yml
@@ -1,9 +1,13 @@
local_plugin:
collectd_http_check:
+ polling_interval: 30
url:
etcd:
expected_code: 200
- url: http://127.0.0.1:{{ pillar.etcd.server.get('port', '4001') }}/health
+ expected_content: >-
+ {"health": "true"}
+ url: http://127.0.0.1:{{ pillar.etcd.server.bind.get('port', '4001') }}/health
+ metric_name: etcd_service_health
collectd_processes:
process:
etcd:
diff --git a/etcd/meta/heka.yml b/etcd/meta/heka.yml
index dafff73..41b28a4 100644
--- a/etcd/meta/heka.yml
+++ b/etcd/meta/heka.yml
@@ -4,7 +4,7 @@
description: 'Etcd server is locally down'
severity: down
rules:
- - metric: http_check
+ - metric: etcd_service_health
field:
service: etcd
relational_operator: '=='