Add PrometheusRuleEvaluationsFailed alert
Change-Id: I135a88894eb50f1d09a4229b2b8c272f5f55d8f0
Related-bug: PROD-33742
diff --git a/prometheus/meta/prometheus.yml b/prometheus/meta/prometheus.yml
index 6607168..752a84f 100644
--- a/prometheus/meta/prometheus.yml
+++ b/prometheus/meta/prometheus.yml
@@ -14,6 +14,16 @@
annotations:
summary: "Prometheus target is down"
description: "The Prometheus target for the {{ $labels.job }} job on the {{ or $labels.host $labels.instance }} node is down for 2 minutes."
+ PrometheusRuleEvaluationsFailed:
+ if: >-
+ rate(prometheus_rule_evaluation_failures_total[5m]) > 0
+ for: 10m
+ labels:
+ severity: warning
+ service: prometheus
+ annotations:
+ summary: "Prometheus failed to evaluate recording rules"
+ description: "The Prometheus server for the {{ $labels.job }} job on the {{ or $labels.host $labels.instance }} node has failed evaluations for recording rules. Verify the rules state in the Status/Rules section of the Prometheus Web UI."
{% endraw %}
{%- endif %}
{%- if server.get('config', {}).get('remote_write') %}