Change outage alert
As we use in new convention. Switch between outage/major alerts.
Unify quotation signs.
Change-Id: Ie20e65205c1ee318b887b8aebe27ad33e47c15de
Closes-Bug: PROD-19538
diff --git a/kibana/meta/prometheus.yml b/kibana/meta/prometheus.yml
index 39e5840..863a72f 100644
--- a/kibana/meta/prometheus.yml
+++ b/kibana/meta/prometheus.yml
@@ -10,8 +10,8 @@
severity: minor
service: kibana
annotations:
- summary: 'Kibana process is down'
- description: 'The Kibana process is down on node {{ $labels.host }}.'
+ summary: "Kibana process is down"
+ description: "The Kibana process on the {{ $labels.host }} node is down."
{% endraw %}
KibanaProcessesDownMinor:
if: >-
@@ -21,19 +21,19 @@
severity: minor
service: kibana
annotations:
- summary: 'Medium percentage of Kibana processes are down'
- description: 'More than {%- endraw %} {{monitoring.service_failed_warning_threshold_percent*100}}%{%- raw %} and less than {%- endraw %} {{monitoring.service_failed_critical_threshold_percent*100}}%{%- raw %} of Kibana processes are down.'
+ summary: "{%- endraw %}{{monitoring.service_failed_warning_threshold_percent*100}}%{%- raw %} of Kibana processes are down"
+ description: "{{ $value }} Kibana processes are down (at least {%- endraw %}{{monitoring.service_failed_warning_threshold_percent*100}}%{%- raw %})."
{% endraw %}
KibanaProcessesDownMajor:
if: >-
- count(procstat_running{process_name="kibana"} == 0) >= count(procstat_running{process_name="kibana"}) * {{ monitoring.service_failed_critical_threshold_percent }}
+ count(procstat_running{process_name="kibana"} == 0) >= count(procstat_running{process_name="kibana"}) * {{ monitoring.service_failed_critical_threshold_percent }} and count(procstat_running{process_name="kibana"} == 0) < count(procstat_running{process_name="kibana"})
{% raw %}
labels:
severity: major
service: kibana
annotations:
- summary: 'High percentage of Kibana processes are down'
- description: 'More than {%- endraw %} {{monitoring.service_failed_critical_threshold_percent*100}}%{%- raw %} of Kibana processes are down.'
+ summary: "{%- endraw %}{{monitoring.service_failed_critical_threshold_percent*100}}%{%- raw %} of Kibana processes are down"
+ description: "{{ $value }} Kibana processes are down (at least {%- endraw %}{{monitoring.service_failed_critical_threshold_percent*100}}%{%- raw %})."
{% endraw %}
KibanaServiceOutage:
if: >-
@@ -43,7 +43,7 @@
severity: critical
service: kibana
annotations:
- summary: 'Kibana service outage'
- description: 'All Kibana processes are down. The Kibana service is not available.'
+ summary: "Kibana service outage"
+ description: "All Kibana processes are down."
{% endraw %}
{%- endif %}