X-Git-Url: https://gerrit.mcp.mirantis.com/gitweb?p=salt-formulas%2Felasticsearch.git;a=blobdiff_plain;f=elasticsearch%2Fmeta%2Fprometheus.yml;h=0e4d7e0b5a7349c5ae652d9323b63083b0aebae2;hp=464111d1feb7d32f57ffe58f84a08944b1222af3;hb=7b6481c6d1a5eb080d7996786580d23b50c5cc95;hpb=85c27aa47cd904d8b487a169d0e56b4c268b9259 diff --git a/elasticsearch/meta/prometheus.yml b/elasticsearch/meta/prometheus.yml index 464111d..0e4d7e0 100644 --- a/elasticsearch/meta/prometheus.yml +++ b/elasticsearch/meta/prometheus.yml @@ -1,19 +1,13 @@ -{%- if pillar.elasticsearch.server is defined %} -{% raw %} +{%- if pillar.elasticsearch.server is defined or pillar.elasticsearch.client is defined %} +{%- from "elasticsearch/map.jinja" import server, client, monitoring with context %} + server: alert: - ElasticsearchDown: - if: >- - elasticsearch_up != 1 - labels: - severity: warning - service: elasticsearch - annotations: - summary: 'Elasticsearch service down' - description: 'Elasticsearch service is down on node {{ $labels.host }}' +{%- if client.get('enabled', False) %} +{%- raw %} ElasticsearchClusterHealthStatusYellow: if: >- - max_over_time(elasticsearch_cluster_health_status[5m]) == 2 + elasticsearch_cluster_health_status == 2 labels: severity: warning service: elasticsearch @@ -23,7 +17,7 @@ server: The Elasticsearch cluster status is YELLOW for the last 5 minutes. ElasticsearchClusterHealthStatusRed: if: >- - max_over_time(elasticsearch_cluster_health_status[5m]) == 3 + elasticsearch_cluster_health_status == 3 labels: severity: critical service: elasticsearch @@ -31,6 +25,46 @@ server: summary: 'Elasticsearch cluster status is RED' description: >- The Elasticsearch cluster status is RED for the last 5 minutes. +{%- endraw %} +{%- endif %} +{%- if server.get('enabled', False) %} +{%- raw %} + ElasticsearchInfo: + if: >- + elasticsearch_up{host=~'.*'} == 0 + labels: + severity: info + service: elasticsearch + annotations: + summary: 'Elasticsearch service is down' + description: 'Elasticsearch service is down on node {{ $labels.host }}' + ElasticsearchWarning: + if: >- + count(elasticsearch_up{host=~'.*'} == 0) >= count(elasticsearch_up{host=~'.*'}) * {% endraw %} {{ monitoring.service_failed_warning_threshold_percent }} {% raw %} + labels: + severity: warning + service: elasticsearch + annotations: + summary: 'More than {%- endraw %} {{monitoring.service_failed_warning_threshold_percent*100}}%{%- raw %} of Elasticsearch services are down' + description: 'More than {%- endraw %} {{monitoring.service_failed_warning_threshold_percent*100}}%{%- raw %} of Elasticsearch services are down' + ElasticsearchCritical: + if: >- + count(elasticsearch_up{host=~'.*'} == 0) >= count(elasticsearch_up{host=~'.*'}) * {% endraw %} {{ monitoring.service_failed_critical_threshold_percent }} {% raw %} + labels: + severity: critical + service: elasticsearch + annotations: + summary: 'More than {%- endraw %} {{monitoring.service_failed_critical_threshold_percent*100}}%{%- raw %} of Elasticsearch services are down' + description: 'More than {%- endraw %} {{monitoring.service_failed_critical_threshold_percent*100}}%{%- raw %} of Elasticsearch services are down' + ElasticsearchDown: + if: >- + count(elasticsearch_up{host=~'.*'} == 0) == count(elasticsearch_up{host=~'.*'}) + labels: + severity: down + service: elasticsearch + annotations: + summary: 'All Elasticsearch services are down' + description: 'All Elasticsearch services are down' ElasticsearchClusterDiskLowWaterMark: if: >- (max(elasticsearch_fs_total_total_in_bytes) by (host, instance) - max(elasticsearch_fs_total_available_in_bytes) by (host, instance)) / max(elasticsearch_fs_total_total_in_bytes) by (host, instance) * 100.0 >= 85 @@ -42,7 +76,6 @@ server: summary: 'Elasticsearch low disk watermark [85%] exceeded on node {{ $labels.host}} instance {{ $labels.instance }}' description: >- Elasticsearch will not allocate new shards to node {{ $labels.host }} - ElasticsearchClusterDiskHighWaterMark: if: >- (max(elasticsearch_fs_total_total_in_bytes) by (host, instance) - max(elasticsearch_fs_total_available_in_bytes) by (host, instance)) / max(elasticsearch_fs_total_total_in_bytes) by (host, instance) * 100.0 >= 90 @@ -54,5 +87,6 @@ server: labels: severity: critical service: elasticsearch -{% endraw %} +{%- endraw %} +{%- endif %} {%- endif %}