StackLight: add local checks for etcd
Change-Id: Id727ed786e6b407f9c17d78388d810f4f3d3449a
diff --git a/etcd/meta/collectd.yml b/etcd/meta/collectd.yml
new file mode 100644
index 0000000..df2e5b2
--- /dev/null
+++ b/etcd/meta/collectd.yml
@@ -0,0 +1,6 @@
+local_plugin:
+ collectd_http_check:
+ url:
+ etcd:
+ expected_code: 200
+ url: http://127.0.0.1:{{ pillar.etcd.server.get('port', '4001') }}/health
diff --git a/etcd/meta/heka.yml b/etcd/meta/heka.yml
new file mode 100644
index 0000000..dafff73
--- /dev/null
+++ b/etcd/meta/heka.yml
@@ -0,0 +1,45 @@
+metric_collector:
+ trigger:
+ etcd_local_endpoint:
+ description: 'Etcd server is locally down'
+ severity: down
+ rules:
+ - metric: http_check
+ field:
+ service: etcd
+ relational_operator: '=='
+ threshold: 0
+ window: 60
+ periods: 0
+ function: last
+ alarm:
+ etcd_endpoint:
+ alerting: enabled
+ triggers:
+ - etcd_local_endpoint
+ dimension:
+ service: etcd-endpoint
+
+aggregator:
+ alarm_cluster:
+ etcd_endpoint:
+ policy: availability_of_members
+ alerting: enabled
+ group_by: hostname
+ match:
+ service: etcd-endpoint
+ members:
+ - etcd_endpoint
+ dimension:
+ service: etcd
+ nagios_host: 01-service-clusters
+ etcd:
+ policy: highest_severity
+ alerting: enabled_with_notification
+ match:
+ service: etcd
+ members:
+ - etcd_endpoint
+ dimension:
+ cluster_name: etcd
+ nagios_host: 00-top-clusters
diff --git a/metadata/service/support.yml b/metadata/service/support.yml
index fbd7745..c0e1fe2 100644
--- a/metadata/service/support.yml
+++ b/metadata/service/support.yml
@@ -2,9 +2,9 @@
etcd:
_support:
collectd:
- enabled: false
+ enabled: true
heka:
- enabled: false
+ enabled: true
sensu:
enabled: false
sphinx: