Alert rationalization for Nginx

Change-Id: Ie87ac9f267e76b5b8c885c7d3910086b34cd25e8
Closes-Bug: PROD-19583
diff --git a/nginx/meta/prometheus.yml b/nginx/meta/prometheus.yml
index 11e832c..21de2c9 100644
--- a/nginx/meta/prometheus.yml
+++ b/nginx/meta/prometheus.yml
@@ -1,16 +1,39 @@
-{%- from "nginx/map.jinja" import server with context %}
+{%- from "nginx/map.jinja" import server, monitoring with context %}
 {%- if server.get('enabled', False) %}
-{%- raw %}
 server:
   alert:
-    NginxDown:
+    NginxServiceDown:
       if: >-
         nginx_up != 1
+      {%- raw %}
       labels:
-        severity: warning
+        severity: minor
         service: nginx
       annotations:
-        summary: 'Nginx service down'
-        description: 'Nginx service is down on node {{ $labels.host }}'
-{%- endraw %}
-{%- endif %}
\ No newline at end of file
+        summary: "NGINX service is down"
+        description: "The NGINX service on the {{ $labels.host }} node is down."
+      {% endraw %}
+    NginxServiceOutage:
+      if: >-
+        count(label_replace(nginx_up, "cluster", "$1", "host", "([^0-9]+).+")) by (cluster) == count(label_replace(nginx_up == 0, "cluster", "$1", "host", "([^0-9]+).+")) by (cluster)
+      {%- raw %}
+      labels:
+        severity: critical
+        service: nginx
+      annotations:
+        summary: "NGINX cluster outage"
+        description: "All NGINX processes within the {{ $labels.cluster }} cluster are down."
+      {% endraw %}
+    NginxDroppedIncomingConnections:
+      if: >-
+        irate(nginx_accepts[5m]) - irate(nginx_handled[5m]) > 0
+      {%- raw %}
+      for: 5m
+      labels:
+        severity: minor
+        service: nginx
+      annotations:
+        summary: "NGINX has dropped incoming connections"
+        description: "{{ $value }} accepted connections per second were dropped by NGINX for at least 5 minutes."
+      {% endraw %}
+{%- endif %}