Merge "Add alerts on log metrics"
diff --git a/glance/meta/prometheus.yml b/glance/meta/prometheus.yml
index 4b1f8e1..7085f59 100644
--- a/glance/meta/prometheus.yml
+++ b/glance/meta/prometheus.yml
@@ -13,6 +13,16 @@
summary: "Endpoint check for '{{ $labels.service }}' is down"
description: >-
Endpoint check for '{{ $labels.service }}' is down for 2 minutes
-{% endraw %}
+ GlanceErrorLogsTooHigh:
+{%- endraw %}
+ {%- set log_threshold = prometheus_server.get('alert', {}).get('GlanceErrorLogsTooHigh', {}).get('var', {}).get('threshold', 0.2 ) %}
+ if: >-
+ sum(rate(log_messages{service="cinder",level=~"error|emergency|fatal"}[5m])) without (level) > {{ log_threshold }}
+{%- raw %}
+ labels:
+ severity: warning
+ service: "{{ $labels.service }}"
+ annotations:
+ summary: 'Too many errors in {{ $labels.service }} logs'
+ description: 'The rate of errors in {{ $labels.service }} logs over the last 5 minutes is too high on node {{ $labels.host }} (current value={{ $value }}, threshold={%- endraw %}{{ log_threshold }}).'
{%- endif %}
-