Merge "Extend procstat_running-based metric alerts."
diff --git a/opencontrail/meta/prometheus.yml b/opencontrail/meta/prometheus.yml
index 7f8ef15..5173d38 100644
--- a/opencontrail/meta/prometheus.yml
+++ b/opencontrail/meta/prometheus.yml
@@ -93,6 +93,7 @@
     ContrailProcessDown:
       if: >-
         procstat_running{process_name=~"contrail.*"} == 0
+      for: 2m
       labels:
         severity: minor
         service: contrail
@@ -104,6 +105,7 @@
     ContrailProcessDownMinor:
       if: >-
         count(procstat_running{process_name=~"contrail.*"} == 0) by (process_name) >= {{ monitoring.services_failed_warning_threshold_percent }}*count(procstat_running{process_name=~"contrail.*"}) by (process_name)
+      for: 2m
       labels:
         severity: minor
         service: contrail
@@ -115,6 +117,7 @@
     ContrailProcessDownMajor:
       if: >-
         count(procstat_running{process_name=~"contrail.*"} == 0) by (process_name) >= {{ monitoring.services_failed_critical_threshold_percent }}*count(procstat_running{process_name=~"contrail.*"}) by (process_name)
+      for: 2m
       labels:
         severity: major
         service: contrail
@@ -126,6 +129,7 @@
     ContrailProcessOutage:
       if: >-
         count(procstat_running{process_name=~"contrail.*"} == 0) by (process_name) == count(procstat_running{process_name=~"contrail.*"}) by (process_name)
+      for: 2m
       labels:
         severity: critical
         service: contrail
@@ -369,6 +373,7 @@
     RedisServiceDown:
       if: >-
         procstat_running{process_name="redis-server"} == 0
+      for: 2m
       labels:
         severity: minor
         service: redis
@@ -380,6 +385,7 @@
       if: >-
         count(procstat_running{process_name="redis-server"} == 0) >= count(procstat_running{process_name="redis-server"}) *{{ monitoring.services_failed_warning_threshold_percent }}
     {%- raw %}
+      for: 2m
       labels:
         severity: minor
         service: redis
@@ -390,6 +396,7 @@
       if: >-
         count(procstat_running{process_name="redis-server"} == 0) >= count(procstat_running{process_name="redis-server"}) *{{ monitoring.services_failed_critical_threshold_percent }}
     {%- raw %}
+      for: 2m
       labels:
         severity: major
         service: redis
@@ -399,6 +406,7 @@
     RedisServiceOutage:
       if: >-
         count(procstat_running{process_name="redis-server"} == 0) == count(procstat_running{process_name="redis-server"})
+      for: 2m
       labels:
         severity: critical
         service: redis
@@ -412,6 +420,7 @@
     CassandraServiceDown:
       if: >-
         procstat_running{process_name="cassandra-server"} == 0
+      for: 2m
       labels:
         severity: minor
         service: cassandra
@@ -423,6 +432,7 @@
       if: >-
         count(procstat_running{process_name="cassandra-server"} == 0) >= count(procstat_running{process_name="cassandra-server"}) *{{ monitoring.services_failed_warning_threshold_percent }}
     {%- raw %}
+      for: 2m
       labels:
         severity: minor
         service: cassandra
@@ -433,6 +443,7 @@
       if: >-
         count(procstat_running{process_name="cassandra-server"} == 0) >= count(procstat_running{process_name="cassandra-server"}) *{{ monitoring.services_failed_critical_threshold_percent }}
     {%- raw %}
+      for: 2m
       labels:
         severity: major
         service: cassandra
@@ -442,6 +453,7 @@
     CassandraServiceOutage:
       if: >-
         count(procstat_running{process_name="cassandra-server"} == 0) == count(procstat_running{process_name="cassandra-server"})
+      for: 2m
       labels:
         severity: critical
         service: cassandra
@@ -451,6 +463,7 @@
     KafkaServiceDown:
       if: >-
         procstat_running{process_name="kafka-server"} == 0
+      for: 2m
       labels:
         severity: minor
         service: kafka
@@ -461,6 +474,7 @@
     KafkaServiceDownMinor:
       if: >-
         count(procstat_running{process_name="kafka-server"} == 0) >= count(procstat_running{process_name="kafka-server"}) *{{ monitoring.services_failed_warning_threshold_percent }}
+      for: 2m
       labels:
         severity: minor
         service: kafka
@@ -473,6 +487,7 @@
       if: >-
         count(procstat_running{process_name="kafka-server"} == 0) >= count(procstat_running{process_name="kafka-server"}) *{{ monitoring.services_failed_critical_threshold_percent }}
     {%- raw %}
+      for: 2m
       labels:
         severity: major
         service: kafka
@@ -482,6 +497,7 @@
     KafkaServiceOutage:
       if: >-
         count(procstat_running{process_name="kafka-server"} == 0) == count(procstat_running{process_name="kafka-server"})
+      for: 2m
       labels:
         severity: critical
         service: kafka