Trigger the target down alert after 2 minutes
Otherwise the alert fires as soon as Prometheus can't scrape a target.
It is too aggressive in case of transient connectivity issues or
endpoint restart.
Change-Id: Ib3de5b141db7a7f2397bf332844a9c44d38f2d3c
diff --git a/prometheus/meta/prometheus.yml b/prometheus/meta/prometheus.yml
index 1ef4d26..07d76bd 100644
--- a/prometheus/meta/prometheus.yml
+++ b/prometheus/meta/prometheus.yml
@@ -6,12 +6,13 @@
{% raw %}
PrometheusTargetDown:
if: 'up != 1'
+ for: 2m
labels:
severity: critical
service: prometheus
annotations:
- summary: 'Prometheus endpoint {{ $labels.instance }} is down'
- description: 'Prometheus endpoint {{ $labels.instance }} is down for job {{ $labels.job }}'
+ summary: 'Prometheus endpoint {{ $labels.instance }} down'
+ description: 'The Prometheus target {{ $labels.instance }} is down for the job {{ $labels.job }}.'
{% endraw %}
{%- endif %}
{%- if remote_storage_adapter.get('enabled', False) %}