Alerts reworked

Change alerts names, severities and descriptions.

Change-Id: I021d06c14ebc4931ff98ca460e837e29e503a8bd
Closes-bug: PROD-19699
diff --git a/memcached/map.jinja b/memcached/map.jinja
index 463d420..661eeaf 100644
--- a/memcached/map.jinja
+++ b/memcached/map.jinja
@@ -21,3 +21,13 @@
         'slabsize': '1m',
     },
 }, merge=salt['pillar.get']('memcached:server')) %}
+
+{%- set monitoring = salt['grains.filter_by']({
+  'default': {
+    'service_evictions_threshold': 10,
+    'service_conn_yield_treshold': 5,
+    'service_respawn_seconds_treshold': 180,
+  },
+}, grain='os_family', merge=salt['pillar.get']('memcached:monitoring')) %}
+
+
diff --git a/memcached/meta/prometheus.yml b/memcached/meta/prometheus.yml
index b66ba01..65e08b2 100644
--- a/memcached/meta/prometheus.yml
+++ b/memcached/meta/prometheus.yml
@@ -1,16 +1,77 @@
-{%- from "memcached/map.jinja" import server with context %}
+{%- from "memcached/map.jinja" import server, monitoring with context %}
 {%- if server.get('enabled', False) %}
 server:
   alert:
-    MemcachedProcessDown:
+{%- raw %}
+    MemcachedServiceDown:
       if: >-
-        procstat_running{process_name="memcached"} == 0
-      {% raw %}
+        memcached_up == 0
+      labels:
+        severity: minor
+        service: memcached
+      annotations:
+        summary: "Memcached service is down"
+        description: "The Memcached service on the {{ $labels.host }} node is down."
+    MemcachedRespawnMinor:
+      if: >-
+{%- endraw %}
+        memcached_uptime < {{ monitoring.service_respawn_seconds_treshold }}
+{%- raw %}
       labels:
         severity: warning
         service: memcached
       annotations:
-        summary: 'Memcached service is down'
-        description: 'Memcached service is down on node {{ $labels.host }}'
-      {% endraw %}
+        summary: "Memcached is respawned"
+        description: "The Memcached service on the {{ $labels.host }} node was respawned."
+    MemcachedConnectionThrottled:
+      if: >-
+{%- endraw %}
+        increase(memcached_conn_yields[1m]) > {{ monitoring.service_conn_yield_treshold }}
+{%- raw %}
+      for: 2m
+      labels:
+        severity: warning
+        service: memcached
+      annotations:
+        summary: "{%- endraw %} {{ monitoring.service_conn_yield_treshold }}{%- raw %} throttled Memcached connections"
+        description: "An average of {{ $value }} client connections to the Memcached service on the {{ $labels.host }} node throttle for at least 2 minutes."
+    MemcachedConnectionsNoneMinor:
+      if: >-
+        memcached_curr_connections == 0
+      labels:
+        severity: minor
+        service: memcached
+      annotations:
+        summary: "Memcached has no open connections"
+        description: "The Memcached service on the {{ $labels.host }} node has no open connections."
+    MemcachedConnectionsNoneMajor:
+      if: >-
+        count(memcached_curr_connections == 0) == count(memcached_up)
+      labels:
+        severity: major
+        service: memcached
+      annotations:
+        summary: "Memcached has no open connections on all nodes"
+        description: "The Memcached service has no open connections on all nodes."
+    MemcachedItemsNoneMinor:
+      if: >-
+        memcached_curr_items == 0
+      labels:
+        severity: minor
+        service: memcached
+      annotations:
+        summary: "Memcached is empty"
+        description: "The Memcached service storage on the {{ $labels.host }} node has no entry."
+    MemcachedEvictionsLimit:
+{%- endraw %}
+      if: >-
+        increase(memcached_evictions[1m]) > {{ monitoring.service_evictions_threshold }}
+{%- raw %}
+      labels:
+        severity: warning
+        service: memcached
+      annotations:
+        summary: "{%- endraw %}{{ monitoring.service_evictions_threshold }}{%- raw %} evictions"
+        description: "An average of {{ $value }} evictions occurred on the {{ $labels.host }} node during the last minute."
+{%- endraw %}
 {%- endif %}