Enable Prometheus support

This change adds alerts on API status and log metrics.

Change-Id: I443c128ead4d33c4e2887034bdebd09dcd8837b4
diff --git a/heat/meta/prometheus.yml b/heat/meta/prometheus.yml
new file mode 100644
index 0000000..dc3d01c
--- /dev/null
+++ b/heat/meta/prometheus.yml
@@ -0,0 +1,32 @@
+{%- if pillar.heat.server is defined %}
+
+{%- from "heat/map.jinja" import server with context %}
+{%- if server.get('enabled', False) %}
+{% raw %}
+server:
+  alert:
+    HeatAPIDown:
+      if: >-
+        max(openstack_api_check_status{service=~"heat.+"}) by (service) == 0
+      for: 2m
+      labels:
+        severity: down
+        service: "{{ $labels.service }}"
+      annotations:
+        summary: "Endpoint check for '{{ $labels.service }}' is down"
+        description: >-
+            Endpoint check for '{{ $labels.service }}' is down for 2 minutes
+    HeatErrorLogsTooHigh:
+{%- endraw %}
+      {%- set log_threshold = prometheus_server.get('alert', {}).get('HeatErrorLogsTooHigh', {}).get('var', {}).get('threshold', 0.2 ) %}
+      if: >-
+        sum(rate(log_messages{service="heat",level=~"error|emergency|fatal"}[5m])) without (level) > {{ log_threshold }}
+{%- raw %}
+      labels:
+        severity: warning
+        service: "{{ $labels.service }}"
+      annotations:
+        summary: 'Too many errors in {{ $labels.service }} logs'
+        description: 'The rate of errors in {{ $labels.service }} logs over the last 5 minutes is too high on node {{ $labels.host }} (current value={{ $value }}, threshold={%- endraw %}{{ log_threshold }}).'
+{%- endif %}
+{%- endif %}
diff --git a/metadata/service/support.yml b/metadata/service/support.yml
index 27889ff..e39cbbd 100644
--- a/metadata/service/support.yml
+++ b/metadata/service/support.yml
@@ -15,3 +15,5 @@
         enabled: true
       telegraf:
         enabled: true
+      prometheus:
+        enabled: true