Add threshold to Kibana alarms

Change-Id: Iaf8909163b933b9019d8cf491a596271a0b78827
Partial-Bug: PROD-15203
diff --git a/kibana/map.jinja b/kibana/map.jinja
index 51e1331..93852ec 100644
--- a/kibana/map.jinja
+++ b/kibana/map.jinja
@@ -8,6 +8,13 @@
     },
 }, merge=salt['pillar.get']('kibana:server')) %}
 
+{% set monitoring = salt['grains.filter_by']({
+    'default': {
+        'service_failed_warning_threshold_percent': 0.3,
+        'service_failed_critical_threshold_percent': 0.6,
+    },
+}, grain='os_family', merge=salt['pillar.get']('kibana:monitoring')) %}
+
 {%- load_yaml as client_defaults %}
 default:
   server:
diff --git a/kibana/meta/prometheus.yml b/kibana/meta/prometheus.yml
index 8b794be..5fa6af9 100644
--- a/kibana/meta/prometheus.yml
+++ b/kibana/meta/prometheus.yml
@@ -1,17 +1,49 @@
-{%- from "kibana/map.jinja" import server with context %}
+{%- from "kibana/map.jinja" import server, monitoring with context %}
 {%- if server.get('enabled', False) %}
 server:
   alert:
-    KibanaProcessDown:
+    KibanaProcessInfo:
       if: >-
         procstat_running{process_name="kibana"} == 0
       {% raw %}
       labels:
-        severity: warning
+        severity: info
         service: kibana
       annotations:
         summary: 'Kibana service is down'
         description: 'Kibana service is down on node {{ $labels.host }}'
       {% endraw %}
+    KibanaProcessWarning:
+      if: >-
+        count(procstat_running{process_name="kibana"} == 0) >= count(procstat_running{process_name="kibana"}) * {{ monitoring.service_failed_warning_threshold_percent }}
+      {% raw %}
+      labels:
+        severity: warning
+        service: kibana
+      annotations:
+        summary: 'More than {%- endraw %} {{monitoring.service_failed_warning_threshold_percent*100}}%{%- raw %} of Kibana services are down'
+        description: 'More than {%- endraw %} {{monitoring.service_failed_warning_threshold_percent*100}}%{%- raw %} of Kibana services are down'
+      {% endraw %}
+    KibanaProcessCritical:
+      if: >-
+        count(procstat_running{process_name="kibana"} == 0) >= count(procstat_running{process_name="kibana"}) * {{ monitoring.service_failed_critical_threshold_percent }}
+      {% raw %}
+      labels:
+        severity: critical
+        service: kibana
+      annotations:
+        summary: 'More than {%- endraw %} {{monitoring.service_failed_critical_threshold_percent*100}}%{%- raw %} of Kibana services are down'
+        description: 'More than {%- endraw %} {{monitoring.service_failed_critical_threshold_percent*100}}%{%- raw %} of Kibana services are down'
+      {% endraw %}
+    KibanaProcessDown:
+      if: >-
+        count(procstat_running{process_name="kibana"} == 0) == count(procstat_running{process_name="kibana"})
+      {% raw %}
+      labels:
+        severity: down
+        service: kibana
+      annotations:
+        summary: 'All Kibana services are down'
+        description: 'All Kibana services are down'
+      {% endraw %}
 {%- endif %}
-