Alerts reworked

Change alerts names, severity and descriptions.

Change-Id: I7cf86e166cedf144809c3faae1ce4a8962ddda10
Closes-bug: PROD-20038
diff --git a/haproxy/meta/prometheus.yml b/haproxy/meta/prometheus.yml
index 2d3000a..79f6e72 100644
--- a/haproxy/meta/prometheus.yml
+++ b/haproxy/meta/prometheus.yml
@@ -3,76 +3,71 @@
 {%- if proxy.enabled and proxy.listen is defined and proxy.listen|length > 0 %}
 server:
   alert:
-    HaproxyDown:
+    HaproxyServiceDown:
 {% raw %}
       if: >-
         haproxy_up != 1
       labels:
+        severity: minor
+        service: haproxy
+      annotations:
+        summary: "HAProxy service is down"
+        description: "The HAProxy service on the {{ $labels.host }} node is down."
+    HaproxyServiceDownMajor:
+      if: >-
+         count(label_replace(haproxy_up, "cluster", "$1", "host", "([^0-9]+).+") != 1) by (cluster) >= 0.5 * count(label_replace(haproxy_up, "cluster", "$1", "host", "([^0-9]+).+")) by (cluster)
+      labels:
+        severity: major
+        service: haproxy
+      annotations:
+        summary: "50% of HAProxy services are down"
+        description: "{{ $value }} HAProxy services within the {{ $labels.cluster }} cluster are down (at least 50%)."
+    HaproxyServiceOutage:
+      if: >-
+         count(label_replace(haproxy_up, "cluster", "$1", "host", "([^0-9]+).+") != 1) by (cluster) == count(label_replace(haproxy_up, "cluster", "$1", "host", "([^0-9]+).+")) by (cluster)
+      labels:
+        severity: critical
+        service: haproxy
+      annotations:
+        summary: "HAProxy service outage"
+        description: "All HAProxy services within the {{ $labels.cluster }} are down."
+    HaproxyHTTPResponse5xxTooHigh:
+      if: >-
+        rate(haproxy_http_response_5xx{sv="FRONTEND"}[2m]) > 1
+      labels:
         severity: warning
         service: haproxy
       annotations:
-        summary: 'Haproxy service down'
-        description: 'Haproxy service is down on node {{ $labels.host }}'
-{% endraw %}
-{%- for listen_name, listen in proxy.listen.iteritems() if listen.get('check', True) %}
-{%- set camel_case_name = listen_name.replace('-','_').split('_')|map('capitalize')|join('') %}
-    HAproxy{{ camel_case_name }}HTTPResponse5xx:
-{% raw %}
+        summary: "HTTP 5xx responses on the {{ $labels.proxy }} back end"
+        description: "The average per-second rate of 5xx HTTP errors on the {{ $labels.host }} node for the {{ $labels.proxy }} back end is {{ $value }} (as measured over the last 2 minutes)."
+    HaproxyBackendDown:
       if: >-
-        rate(haproxy_http_response_5xx{sv="FRONTEND",proxy="{% endraw %}{{ listen_name }}{% raw %}"}[1m]) > 1
-      for: 2m
+        increase(haproxy_chkdown{sv="BACKEND"}[1m]) > 0
       labels:
-        severity: warning
-        service: "haproxy/{{ $labels.proxy }}"
+        severity: minor
+        service: haproxy
       annotations:
-        summary: HTTP 5xx responses on '{{ $labels.proxy }}' proxy (host {{ $labels.host }})
-        description: >-
-          Too many 5xx HTTP errors have been detected on the '{{ $labels.proxy }}' proxy for the last 2 minutes
-          ({{ $value }} error(s) per second)
-{% endraw %}
-    HAproxy{{ camel_case_name }}BackendWarning:
-{% raw %}
+        summary: "{{ $labels.proxy }} back end is down"
+        description: "The {{ $labels.proxy }} back end on the {{ $labels.host }} node is down."
+    HaproxyBackendDownMajor:
       if: >-
-        max(max_over_time(haproxy_active_servers{sv="BACKEND",proxy="{% endraw %}{{ listen_name }}{% raw %}"}[12h])) by (proxy)
-        - min(haproxy_active_servers{sv="BACKEND",proxy="{% endraw %}{{ listen_name }}{% raw %}"}) by (proxy) >= 1
-      for: 5m
+        max(max_over_time(haproxy_active_servers{sv="BACKEND"}[12h])) by (proxy) - min(haproxy_active_servers{sv="BACKEND"}) by (proxy) >= 0.5 * max(max_over_time(haproxy_active_servers{sv="BACKEND"}[12h])) by (proxy)
       labels:
-        severity: warning
-        service: "haproxy/{{ $labels.proxy }}"
+        severity: major
+        service: haproxy
       annotations:
-        summary: "At least one backend is down for '{{ $labels.proxy }}' proxy for the last 5 minutes"
-        description: >-
-           {{ $value }} of backends are down for the '{{ $labels.proxy }}' proxy
-{% endraw %}
-    HAproxy{{ camel_case_name }}BackendCritical:
-{% raw %}
+        summary: "50% of {{ $labels.proxy }} back ends are down"
+        description: "{{ $value }} {{ $labels.proxy }} back ends are down (at least 50%)."
+    HaproxyBackendOutage:
       if: >-
-        (max(max_over_time(haproxy_active_servers{sv="BACKEND",proxy="{% endraw %}{{ listen_name }}{% raw %}"}[12h])) by (proxy)
-         - min (haproxy_active_servers{sv="BACKEND",proxy="{% endraw %}{{ listen_name }}{% raw %}"}) by (proxy)
-        ) / max(max_over_time(haproxy_active_servers{sv="BACKEND",proxy="{% endraw %}{{ listen_name }}{% raw %}"}[12h])) by (proxy) * 100 >= 50
-      for: 5m
+        max(haproxy_active_servers{sv="BACKEND"}) by (proxy)
+        + max(haproxy_backup_servers{sv="BACKEND"}) by (proxy) == 0
       labels:
         severity: critical
-        service: "haproxy/{{ $labels.proxy }}"
+        service: haproxy
       annotations:
-        summary: "Less than 50% of backends are up for the '{{ $labels.proxy }}' proxy for the last 5 minutes"
-        description: >-
-           {{ $value }}% of backends are down for the '{{ $labels.proxy }}' proxy
+        summary: "{{ $labels.proxy }} back-end outage"
+        summary: "All {{ $labels.proxy }} back ends are down."
 {% endraw %}
-    HAproxy{{ camel_case_name }}BackendDown:
-{% raw %}
-      if: >-
-        max(haproxy_active_servers{sv="BACKEND",proxy="{% endraw %}{{ listen_name }}{% raw %}"}) by (proxy)
-        + max(haproxy_backup_servers{sv="BACKEND",proxy="{% endraw %}{{ listen_name }}{% raw %}"}) by (proxy) == 0
-      for: 2m
-      labels:
-        severity: down
-        service: "haproxy/{{ $labels.proxy }}"
-      annotations:
-        summary: "All backends are down for the '{{ $labels.proxy }}' proxy"
-        description: >-
-            The proxy '{{ $labels.proxy }}' has no active backend
-{% endraw %}
-{%- endfor %}
 {%- endif %}
 {%- endif %}