Removed ContrailFlows* Prometheus alerts
As recently discussed, we should consider removal of all ContrailFlows*
alerts as these are non-value ones, and can provide lots of false
positive alerts.
Change-Id: I93974d0bbc4aa02038d8eb9d303adddade5de4ef
Closes-Bug: PROD-27474
(cherry picked from commit 90c87bda07a8c07b2b2fe26b4fa66804837cd4d2)
diff --git a/opencontrail/map.jinja b/opencontrail/map.jinja
index 064808f..41f837b 100644
--- a/opencontrail/map.jinja
+++ b/opencontrail/map.jinja
@@ -460,15 +460,6 @@
'vrouter_dns_xmpp_sessions_variation_threshold': 5,
'vrouter_lls_too_high_threshold': 10,
'vrouter_lls_variation_threshold': 5,
- 'vrouter_flows_active_too_high_threshold': 100,
- 'vrouter_flows_discard_too_high_threshold': 0.1,
- 'vrouter_flows_flow_action_drop_too_high_threshold': 0.2,
- 'vrouter_flows_frag_err_too_high_threshold': 100,
- 'vrouter_flows_invalid_nh_too_high_threshold': 0.1,
- 'vrouter_flows_composite_invalid_interface_too_high_threshold': 0.05,
- 'vrouter_flows_invalid_label_too_high_threshold': 100,
- 'vrouter_flows_flow_queue_limit_exceeded_too_high_threshold': 0.1,
- 'vrouter_flows_flow_table_full_too_high_threshold': 100,
'services_failed_warning_threshold_percent': 0.3,
'services_failed_critical_threshold_percent': 0.6,
},
diff --git a/opencontrail/meta/prometheus.yml b/opencontrail/meta/prometheus.yml
index 118d22c..d7cf03f 100644
--- a/opencontrail/meta/prometheus.yml
+++ b/opencontrail/meta/prometheus.yml
@@ -328,124 +328,6 @@
summary: "OpenContrail vRouter LLS sessions changes reached the limit of {%- endraw %} {{ vrouter_lls_variation_threshold }}{%- raw %}"
description: "The OpenContrail vRouter LLS sessions on the {{ $labels.host }} node have changed {{ $value }} times."
{%- endraw %}
- ContrailFlowsActiveTooHigh:
- if: >-
- {%- set vrouter_flows_active_toohigh_threshold = monitoring.vrouter_flows_active_too_high_threshold %}
- deriv(contrail_vrouter_flows_active[5m]) >= {{ vrouter_flows_active_toohigh_threshold }}
-{%- raw %}
- for: 2m
- labels:
- severity: warning
- service: contrail
- annotations:
- summary: "OpenContrail vRouter active flows reached the limit of {%- endraw %} {{ vrouter_flows_active_toohigh_threshold }}{%- raw %}"
- description: "{{ $value }} OpenContrail vRouter flows per second on the {{ $labels.host }} node are active for 2 minutes."
-{%- endraw %}
- ContrailFlowsDiscardedTooHigh:
- if: >-
- {%- set vrouter_flows_discard_toohigh_threshold = monitoring.vrouter_flows_discard_too_high_threshold %}
- rate(contrail_vrouter_flows_discard[5m]) >= {{ vrouter_flows_discard_toohigh_threshold }}
-{%- raw %}
- for: 2m
- labels:
- severity: warning
- service: contrail
- annotations:
- summary: "OpenContrail vRouter discarded flows reached the limit of {%- endraw %} {{ vrouter_flows_discard_toohigh_threshold }}{%- raw %}/s"
- description: "The average per-second rate of discarded OpenContrail vRouter flows on the {{ $labels.host }} node is {{ $value }} for 2 minutes."
-{%- endraw %}
- ContrailFlowsDroppedTooHigh:
- enabled: false
- if: >-
- {%- set vrouter_flows_flow_action_drop_toohigh_threshold = monitoring.vrouter_flows_flow_action_drop_too_high_threshold %}
- rate(contrail_vrouter_flows_flow_action_drop[5m]) >= {{ vrouter_flows_flow_action_drop_toohigh_threshold }}
-{%- raw %}
- for: 2m
- labels:
- severity: warning
- service: contrail
- annotations:
- summary: "OpenContrail vRouter dropped flows reached the limit of {%- endraw %} {{ vrouter_flows_flow_action_drop_toohigh_threshold }}{%- raw %}/s"
- description: "The average per-second rate of dropped OpenContrail vRouter flows on the {{ $labels.host }} node is {{ $value }} for 2 minutes."
-{%- endraw %}
- ContrailFlowsFragErrTooHigh:
- if: >-
- {%- set vrouter_flows_frag_err_toohigh_threshold = monitoring.vrouter_flows_frag_err_too_high_threshold %}
- min(contrail_vrouter_flows_frag_err) by (host) >= {{ vrouter_flows_frag_err_toohigh_threshold }}
-{%- raw %}
- for: 2m
- labels:
- severity: warning
- service: contrail
- annotations:
- summary: "OpenContrail vRouter flows with fragment errors reached the limit of {%- endraw %} {{ vrouter_flows_flow_action_drop_toohigh_threshold }}{%- raw %}"
- description: "{{ $value }} OpenContrail vRouter flows on the {{ $labels.host }} node had fragment errors for 2 minutes."
-{%- endraw %}
- ContrailFlowsNextHopInvalidTooHigh:
- if: >-
- {%- set vrouter_flows_invalid_nh_toohigh_threshold = monitoring.vrouter_flows_invalid_nh_too_high_threshold %}
- rate(contrail_vrouter_flows_invalid_nh[5m]) >= {{ vrouter_flows_invalid_nh_toohigh_threshold }}
-{%- raw %}
- for: 2m
- labels:
- severity: warning
- service: contrail
- annotations:
- summary: "OpenContrail vRouter flows with an invalid next hop reached the limit of {%- endraw %} {{ vrouter_flows_invalid_nh_toohigh_threshold }}{%- raw %}/s"
- description: "The average per-second rate of OpenContrail vRouter flows with an invalid next hop on the {{ $labels.host }} node is {{ $value }} for 2 minutes."
-{%- endraw %}
- ContrailFlowsInterfaceInvalidTooHigh:
- if: >-
- {%- set vrouter_flows_composite_invalid_interface_toohigh_threshold = monitoring.vrouter_flows_composite_invalid_interface_too_high_threshold %}
- rate(contrail_vrouter_flows_composite_invalid_interface[5m]) >= {{ vrouter_flows_composite_invalid_interface_toohigh_threshold }}
-{%- raw %}
- for: 2m
- labels:
- severity: warning
- service: contrail
- annotations:
- summary: "OpenContrail vRouter flows with an invalid composite interface reached the limit of {%- endraw %} {{ vrouter_flows_composite_invalid_interface_toohigh_threshold }}{%- raw %}/s"
- description: "The average per-second rate of OpenContrail vRouter flows with an invalid composite interface on the {{ $labels.host }} node is {{ $value }} for 2 minutes."
-{%- endraw %}
- ContrailFlowsLabelInvalidTooHigh:
- if: >-
- {%- set vrouter_flows_invalid_label_toohigh_threshold = monitoring.vrouter_flows_invalid_label_too_high_threshold %}
- min(contrail_vrouter_flows_invalid_label) by (host) >= {{ vrouter_flows_invalid_label_toohigh_threshold }}
-{%- raw %}
- for: 2m
- labels:
- severity: warning
- service: contrail
- annotations:
- summary: "OpenContrail vRouter flows with an invalid label reached the limit of {%- endraw %} {{ vrouter_flows_invalid_label_toohigh_threshold }}{%- raw %}"
- description: "{{ $value }} OpenContrail vRouter flows on the {{ $labels.host }} node had an invalid composite interface for 2 minutes."
-{%- endraw %}
- ContrailFlowsQueueSizeExceededTooHigh:
- if: >-
- {%- set vrouter_flows_flow_queue_limit_exceeded_toohigh_threshold = monitoring.vrouter_flows_flow_queue_limit_exceeded_too_high_threshold %}
- rate(contrail_vrouter_flows_flow_queue_limit_exceeded[5m]) >= {{ vrouter_flows_flow_queue_limit_exceeded_toohigh_threshold }}
-{%- raw %}
- for: 2m
- labels:
- severity: warning
- service: contrail
- annotations:
- summary: "OpenContrail vRouter flows exceeding the queue size reached the limit of {%- endraw %} {{ vrouter_flows_flow_queue_limit_exceeded_toohigh_threshold }}{%- raw %}/s"
- description: "The average per-second rate of OpenContrail vRouter flows exceeding the queue size on the {{ $labels.host }} node is {{ $value }} for 2 minutes."
-{%- endraw %}
- ContrailFlowsTableFullTooHigh:
- if: >-
- {%- set vrouter_flows_flow_table_full_toohigh_threshold = monitoring.vrouter_flows_flow_table_full_too_high_threshold %}
- min(contrail_vrouter_flows_flow_table_full) by (host) >= {{ vrouter_flows_flow_table_full_toohigh_threshold }}
-{%- raw %}
- for: 2m
- labels:
- severity: warning
- service: contrail
- annotations:
- summary: "OpenContrail vRouter flows with full table reached the limit of {%- endraw %} {{ vrouter_flows_flow_table_full_toohigh_threshold }}{%- raw %}"
- description: "{{ $value }} OpenContrail vRouter flows on the {{ $labels.host }} node had a full table for 2 minutes."
-{%- endraw %}
{%- if web.get('enabled', False) and web.get('cache', {}).get('engine', '') == 'redis' %}
{%- raw %}
RedisServiceDown: