{%- if pillar.elasticsearch.server is defined or pillar.elasticsearch.client is defined %} {%- from "elasticsearch/map.jinja" import server, client, monitoring with context %} server: alert: {%- if client.get('enabled', False) %} ElasticsearchClusterHealthStatusMajor: if: >- elasticsearch_cluster_health_status == 2 {%- raw %} for: 2m labels: severity: major service: elasticsearch annotations: summary: "Elasticsearch cluster status is YELLOW" description: "The Elasticsearch cluster status is YELLOW for 2 minutes." {%- endraw %} ElasticsearchClusterHealthStatusCritical: if: >- elasticsearch_cluster_health_status == 3 {%- raw %} for: 2m labels: severity: critical service: elasticsearch annotations: summary: "Elasticsearch cluster status is RED" description: "The Elasticsearch cluster status is RED for 2 minutes." {%- endraw %} ElasticsearchIndicesInReadOnlyMode: if: >- exec_elasticsearch_indices_read_only_allow_delete > 0 {%- raw %} for: 2m labels: severity: critical service: elasticsearch annotations: summary: "Elasticsearch indices are read-only" description: "The Elasticsearch service has indices in read-only mode for 2 minutes." {%- endraw %} {%- endif %} {%- if server.get('enabled', False) %} ElasticsearchServiceDown: if: >- elasticsearch_up{host=~'.*'} == 0 {%- raw %} labels: severity: minor service: elasticsearch annotations: summary: "Elasticsearch service is down" description: "The Elasticsearch service on the {{ $labels.host }} node is down." {%- endraw %} ElasticsearchServiceDownMinor: if: >- count(elasticsearch_up{host=~'.*'} == 0) >= count(elasticsearch_up{host=~'.*'}) * {{ monitoring.service_failed_warning_threshold_percent }} {%- raw %} for: 2m labels: severity: minor service: elasticsearch annotations: summary: "{%- endraw %}{{monitoring.service_failed_warning_threshold_percent*100}}%{%- raw %} of Elasticsearch services are down" description: "{{ $value }} Elasticsearch services are down for 2 minutes." {%- endraw %} ElasticsearchServiceDownMajor: if: >- count(elasticsearch_up{host=~'.*'} == 0) >= count(elasticsearch_up{host=~'.*'}) * {{ monitoring.service_failed_critical_threshold_percent }} {%- raw %} for: 2m labels: severity: major service: elasticsearch annotations: summary: "{%- endraw %}{{monitoring.service_failed_critical_threshold_percent*100}}%{%- raw %} of Elasticsearch services are down" description: "{{ $value }} Elasticsearch services are down for 2 minutes." {%- endraw %} ElasticsearchServiceOutage: if: >- count(elasticsearch_up{host=~'.*'} == 0) == count(elasticsearch_up{host=~'.*'}) {%- raw %} labels: severity: critical service: elasticsearch annotations: summary: "Elasticsearch cluster outage" description: "All Elasticsearch services within the cluster are down." {%- endraw %} ElasticsearchDiskWaterMarkMinor: if: >- (max(elasticsearch_fs_total_total_in_bytes) by (host, instance) - max(elasticsearch_fs_total_available_in_bytes) by (host, instance)) / max(elasticsearch_fs_total_total_in_bytes) by (host, instance) >= {{monitoring.service_disk_space_watermark_minor_threshold_percent}} {%- raw %} for: 5m labels: severity: minor service: elasticsearch annotations: summary: "Elasticsearch uses {%- endraw %} {{monitoring.service_disk_space_watermark_minor_threshold_percent*100}}%{%- raw %} of disk space" description: "The Elasticsearch '{{ $labels.instance }}' instance uses {{ $value }}% of disk space on the {{ $labels.host }} node for 5 minutes." {%- endraw %} ElasticsearchDiskWaterMarkMajor: if: >- (max(elasticsearch_fs_total_total_in_bytes) by (host, instance) - max(elasticsearch_fs_total_available_in_bytes) by (host, instance)) / max(elasticsearch_fs_total_total_in_bytes) by (host, instance) >= {{monitoring.service_disk_space_watermark_major_threshold_percent}} {%- raw %} for: 5m labels: severity: major service: elasticsearch annotations: summary: "Elasticsearch uses {%- endraw %} {{monitoring.service_disk_space_watermark_major_threshold_percent*100}}%{%- raw %} of disk space" description: "The Elasticsearch '{{ $labels.instance }}' instance uses {{ $value }}% of disk space on the {{ $labels.host }} node for 5 minutes." {%- endraw %} {%- endif %} {%- endif %}