Extend procstat_running-based metric alerts.
Related-PROD: PROD-35435
Change-Id: Icccfd65d88b0af0fd15ec5977174d6d24fc11db8
diff --git a/ironic/meta/prometheus.yml b/ironic/meta/prometheus.yml
index fdf6066..390314c 100644
--- a/ironic/meta/prometheus.yml
+++ b/ironic/meta/prometheus.yml
@@ -15,6 +15,7 @@
IronicProcessDown:
if: >-
procstat_running{process_name=~"ironic-.*"} == 0
+ for: 2m
labels:
service: ironic
severity: minor
@@ -25,6 +26,7 @@
IronicProcessDownMinor:
if: >-
count(procstat_running{process_name=~"ironic-.*"} == 0) by (process_name) >= count(procstat_running{process_name=~"ironic-.*"}) by (process_name) * 0.33
+ for: 2m
labels:
service: ironic
severity: minor
@@ -35,6 +37,7 @@
IronicProcessDownMajor:
if: >-
count(procstat_running{process_name=~"ironic-.*"} == 0) by (process_name) >= count(procstat_running{process_name=~"ironic-.*"}) by (process_name) * 0.66
+ for: 2m
labels:
service: ironic
severity: major
@@ -45,6 +48,7 @@
IronicProcessOutage:
if: >-
count(procstat_running{process_name=~"ironic-.*"} == 0) by (process_name) == count(procstat_running{process_name=~"ironic-.*"}) by (process_name)
+ for: 2m
labels:
service: ironic
severity: critical
@@ -55,6 +59,7 @@
IronicDriverMissing:
if: >-
scalar(count(procstat_running{process_name=~"ironic-conductor"} == 1)) - count(openstack_ironic_driver) by (driver) > 0
+ for: 2m
labels:
severity: major
service: ironic