Add fire delays to OVS alerts
Change-Id: Ib7136cd5b5a6e494e455929693cfd7759218e0a1
Related-PROD: PROD-34251
diff --git a/telegraf/meta/prometheus.yml b/telegraf/meta/prometheus.yml
index decbc0d..ae9742c 100644
--- a/telegraf/meta/prometheus.yml
+++ b/telegraf/meta/prometheus.yml
@@ -42,30 +42,33 @@
OVSTooManyPortRunningOnAgent:
if: >-
sum by (host) (ovs_bridge_status) > 1500
+ for: 2m
labels:
severity: major
service: neutron
annotations:
summary: "High number of ovs ports on host"
- description: "The number of ovs port is {{ $value }} (ovs-vsctl list port ) on {{ $labels.host }} which is more than the expected limit"
+ description: "The number of ovs port is {{ $value }} (ovs-vsctl list port ) on {{ $labels.host }} which is more than the expected limit for 2 minutes."
OVSErrorOnPort:
if: >-
ovs_bridge_status == 2
+ for: 2m
labels:
severity: critical
service: neutron
annotations:
summary: "OVS port is reporting error"
- description: "OVS port {{ $labels.port }} on bridge {{ $labels.bridge }} running on {{ $labels.host }} is reporting errors"
+ description: "OVS port {{ $labels.port }} on bridge {{ $labels.bridge }} running on {{ $labels.host }} is reporting errors for 2 minutes."
OVSNonInternalPortDown:
if: >-
ovs_bridge_status{type!="internal"} == 0
+ for: 5m
labels:
severity: critical
service: neutron
annotations:
summary: "Non internal ovs port is down"
- description: "OVS port {{ $labels.port }} on bridge {{ $labels.bridge }} running on {{ $labels.host }} is reporting status down"
+ description: "OVS port {{ $labels.port }} on bridge {{ $labels.bridge }} running on {{ $labels.host }} is reporting status down for 5 minutes."
OVSGatherFailed:
if: >-
ovs_bridge_check == 0