Fixes for Cinder alerts

Change-Id: Ia71d7a69bfac7712ec0de7b3096a5c61bc1fa253
Related-PROD: PROD-19584
diff --git a/cinder/map.jinja b/cinder/map.jinja
index 35af31f..c493410 100644
--- a/cinder/map.jinja
+++ b/cinder/map.jinja
@@ -126,5 +126,6 @@
         'error_log_rate': 0.2,
         'services_failed_warning_threshold_percent': 0.3,
         'services_failed_critical_threshold_percent': 0.6,
+        'endpoint_failed_major_threshold': 0.5,
     },
 }, grain='os_family', merge=salt['pillar.get']('cinder:monitoring')) %}
diff --git a/cinder/meta/prometheus.yml b/cinder/meta/prometheus.yml
index 27833b3..c3c911e 100644
--- a/cinder/meta/prometheus.yml
+++ b/cinder/meta/prometheus.yml
@@ -8,6 +8,7 @@
 {%- if is_controller %}
 {%- set minor_threshold = monitoring.services_failed_warning_threshold_percent|float %}
 {%- set major_threshold = monitoring.services_failed_critical_threshold_percent|float %}
+{%- set major_endpoint_threshold = monitoring.endpoint_failed_major_threshold|float %}
 {%- raw %}
     CinderAPIOutage:
       if: >-
@@ -40,6 +41,29 @@
         summary: "Host cinder-api endpoint is not accessible"
         description: >-
           The host cinder-api endpoint on the {{ $labels.host }} node is not accessible for at least 2 minutes.
+{%- endraw %}
+    CinderAPIServiceDownMajor:
+      if: >-
+        count(http_response_status{name=~"cinder-api"} == 0) >= count(http_response_status{name=~"cinder-api"}) * {{ major_endpoint_threshold }}
+      for: 2m
+      labels:
+        severity: major
+        service: cinder
+      annotations:
+        summary: "{{major_endpoint_threshold * 100}}% of host cinder-api endpoints are not accessible"
+        description: >-
+          {% raw %}{{ $value }} host cinder-api endpoints are not accessible for at least 2 minutes (at least {% endraw %}{{major_endpoint_threshold * 100}}{% raw %}%).
+    CinderAPIServiceOutage:
+      if: >-
+        count(http_response_status{name=~"cinder-api"} == 0) == count(http_response_status{name=~"cinder-api"})
+      for: 2m
+      labels:
+        severity: critical
+        service: cinder
+      annotations:
+        summary: "Host cinder-api outage"
+        description: >-
+          All available host cinder-api endpoints are not accessible for at least 2 minutes.
     CinderServiceDown:
       if: >-
           openstack_cinder_service_state == 0
@@ -93,5 +117,6 @@
         service: cinder
       annotations:
         summary: "High number of errors in Cinder logs"
-        description: "The rate of errors in Cinder logs over the last 5 minutes is too high on the {{ $labels.host }} node (current value={{ $value }}, threshold={%- endraw %}{{ log_threshold }})."
+        description: "The average per-second rate of errors in Cinder logs on the {{ $labels.host }} node is {{ $value }} (as measured over the last 5 minutes)."
+{%- endraw %}
 {%- endif %}