Merge "Tune default thresholds for the nstat_time_squeeze based alerts"
diff --git a/linux/map.jinja b/linux/map.jinja
index 4f8b5b7..37a6d59 100644
--- a/linux/map.jinja
+++ b/linux/map.jinja
@@ -431,8 +431,8 @@
'warn': 5,
},
'net_rx_action_per_cpu_threshold': {
- 'warning': '0',
- 'minor': '100'
+ 'warning': '500',
+ 'minor': '5000'
},
'packets_dropped_per_cpu_threshold': {
'minor': '0',
diff --git a/linux/meta/prometheus.yml b/linux/meta/prometheus.yml
index 3ca2b26..5a3ca05 100644
--- a/linux/meta/prometheus.yml
+++ b/linux/meta/prometheus.yml
@@ -234,24 +234,24 @@
{%- endraw %}
{%- set net_rx_action_warning_threshold = monitoring.net_rx_action_per_cpu_threshold.warning %}
if: >-
- floor(increase(nstat_time_squeeze[24h])) > {{ net_rx_action_warning_threshold }}
+ floor(increase(nstat_time_squeeze[1d])) > {{ net_rx_action_warning_threshold }}
labels:
severity: warning
service: system
annotations:
summary: "CPU terminated {{ net_rx_action_warning_threshold }}{%- raw %} net_rx_action loops"
- description: "The {{ $labels.cpu }} CPU on the {{ $labels.host }} node terminated {{ $value }} net_rx_action loops during the last 24 hours."
+ description: "The {{ $labels.cpu }} CPU on the {{ $labels.host }} node terminated {{ $value }} net_rx_action loops during the last 24 hours. Modify the net.core.netdev_budget kernel parameter."
NetRxActionByCpuMinor:
{%- endraw %}
{%- set net_rx_action_minor_threshold = monitoring.net_rx_action_per_cpu_threshold.minor %}
if: >-
- floor(increase(nstat_time_squeeze[24h])) > {{ net_rx_action_minor_threshold }}
+ floor(increase(nstat_time_squeeze[1d])) > {{ net_rx_action_minor_threshold }}
labels:
severity: minor
service: system
annotations:
summary: "CPU terminated {{ net_rx_action_minor_threshold }}{%- raw %} net_rx_action loops"
- description: "The {{ $labels.cpu }} CPU on the {{ $labels.host }} node terminated {{ $value }} net_rx_action loops during the last 24 hours."
+ description: "The {{ $labels.cpu }} CPU on the {{ $labels.host }} node terminated {{ $value }} net_rx_action loops during the last 24 hours. Modify the net.core.netdev_budget kernel parameter."
{%- endraw %}
{%- if monitoring.bond_status.interfaces is defined and monitoring.bond_status.interfaces %}
{%- raw %}