Configure Prometheus alerts for Elasticsearch 2 kinds of alers: - cluster health status - disk allocation watermark (per node) Change-Id: I0d168ba899d8a168543c8ba0a63793001e46267b

commit: 1740e71f975491a6f50a4b77ce2902cc5cb09286 [log] [tgz]
author: Swann Croiset <scroiset@mirantis.com> Fri May 26 17:49:32 2017 +0200
committer: Swann Croiset <scroiset@mirantis.com> Wed May 31 08:54:09 2017 +0000
tree: 0bc52cc8c3a110d891de00ca1b68e32534cb91e5
parent: 499d23fcab90c871333c7200832d7a5d98c656c5 [diff]
diff --git a/elasticsearch/meta/prometheus.yml b/elasticsearch/meta/prometheus.yml
new file mode 100644
index 0000000..e2885b4
--- /dev/null
+++ b/elasticsearch/meta/prometheus.yml

@@ -0,0 +1,49 @@
+{%- if pillar.elasticsearch.server is defined %}
+{% raw %}
+server:
+  alert:
+    ElasticsearchClusterHealthStatusYellow:
+      if: >-
+        min_over_time(elasticsearch_cluster_health_status[5m]) == 2
+      labels:
+        severity: warning
+        service: elasticsearch
+      annotations:
+        summary: Elasticsearch cluster status is YELLOW
+        description: >-
+          The Elasticsearch cluster status is YELLOW for the last 5 minutes.
+    ElasticsearchClusterHealthStatusRed:
+      if: >-
+        min_over_time(elasticsearch_cluster_health_status[5m]) == 3
+      labels:
+        severity: critical
+        service: elasticsearch
+      annotations:
+        summary: 'Elasticsearch cluster status is RED'
+        description: >-
+          The Elasticsearch cluster status is RED for the last 5 minutes.
+    ElasticsearchClusterDiskLowWaterMark:
+      if: >-
+          (max(elasticsearch_fs_total_total_in_bytes) by (host, instance) - max(elasticsearch_fs_total_available_in_bytes) by (host, instance)) / max(elasticsearch_fs_total_total_in_bytes)  by (host, instance) * 100.0 >= 85
+      for: 5m
+      labels:
+        severity: warning
+        service: elasticsearch
+      annotations:
+        summary: 'Elasticsearch low disk watermark [85%] exceeded on node {{ $labels.host}} instance {{ $labels.instance }}'
+        description: >-
+          Elasticsearch will not allocate new shards to node {{ $labels.host }}
+
+    ElasticsearchClusterDiskHighWaterMark:
+      if: >-
+          (max(elasticsearch_fs_total_total_in_bytes) by (host, instance) - max(elasticsearch_fs_total_available_in_bytes) by (host, instance)) / max(elasticsearch_fs_total_total_in_bytes) by (host, instance) * 100.0 >= 90
+      for: 5m
+      annotations:
+        summary: 'Elasticsearch high disk watermark [90%] exceeded on node {{ $labels.host}} instance {{ $labels.instance }}'
+        description: >-
+          Elasticsearch will not allocate new shards to node {{ $labels.host }} and will attempt to relocate shards to another node
+      labels:
+        severity: critical
+        service: elasticsearch
+{% endraw %}
+{%- endif %}

diff --git a/metadata/service/support.yml b/metadata/service/support.yml
index 3f37b10..4e2c98a 100644
--- a/metadata/service/support.yml
+++ b/metadata/service/support.yml

@@ -13,3 +13,5 @@
         enabled: true
       grafana:
         enabled: true
+      prometheus:
+        enabled: true
commit	1740e71f975491a6f50a4b77ce2902cc5cb09286	[log] [tgz]
author	Swann Croiset <scroiset@mirantis.com>	Fri May 26 17:49:32 2017 +0200
committer	Swann Croiset <scroiset@mirantis.com>	Wed May 31 08:54:09 2017 +0000
tree	0bc52cc8c3a110d891de00ca1b68e32534cb91e5
parent	499d23fcab90c871333c7200832d7a5d98c656c5 [diff]