Add new alerts for prometheus
EtcdClusterSmall
EtcdServerHasLeader
Change-Id: I7edfe06903af260dc001a83e5721f77140b61cd9
diff --git a/etcd/meta/prometheus.yml b/etcd/meta/prometheus.yml
index 56926e1..f2f2d8e 100644
--- a/etcd/meta/prometheus.yml
+++ b/etcd/meta/prometheus.yml
@@ -11,4 +11,20 @@
annotations:
summary: 'High number of HTTP requests are failing on etcd'
description: '{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}'
+ EtcdServerHasLeader:
+ if: 'etcd_server_has_leader != 1'
+ labels:
+ severity: warning
+ service: etcd
+ annotations:
+ summary: 'Etcd instance lost leader'
+ description: 'Etcd {{ $labels.instance }} lost his leader'
+ EtcdClusterSmall:
+ if: 'count(up{job="etcd"} == 0) > count(up{job="etcd"}) / 2 - 1'
+ labels:
+ severity: warning
+ service: etcd
+ annotations:
+ summary: 'Etcd cluster small'
+ description: 'If one more etcd peer goes down the cluster will be unavailable'
{% endraw %}