Merge "Updating reference from github to gerrit - Changing Maintainer to dev@mirantis.com"
diff --git a/linux/map.jinja b/linux/map.jinja
index 667a2dd..0fb462d 100644
--- a/linux/map.jinja
+++ b/linux/map.jinja
@@ -448,14 +448,10 @@
'failed_auths_threshold': {
'warn': 5,
},
- 'net_rx_action_per_cpu_threshold': {
- 'warning': '500',
- 'minor': '5000'
- },
+ 'netdev_budget_squeeze_rate': 0.1,
'packets_dropped_per_cpu_threshold': {
'minor': '0',
'major': '100'
}
},
}, grain='os_family', merge=salt['pillar.get']('linux:monitoring')) %}
-
diff --git a/linux/meta/prometheus.yml b/linux/meta/prometheus.yml
index 5a3ca05..1e029f3 100644
--- a/linux/meta/prometheus.yml
+++ b/linux/meta/prometheus.yml
@@ -230,28 +230,18 @@
annotations:
summary: "CPU dropped {{ packets_dropped_major_threshold }}{%- raw %} packets"
description: "The {{ $labels.cpu }} CPU on the {{ $labels.host }} node dropped {{ $value }} packets during the last 24 hours."
- NetRxActionByCpuWarning:
+ NetdevBudgetRanOutsWarning:
{%- endraw %}
- {%- set net_rx_action_warning_threshold = monitoring.net_rx_action_per_cpu_threshold.warning %}
+ {%- set squeeze_rate_threshold = monitoring.netdev_budget_squeeze_rate %}
if: >-
- floor(increase(nstat_time_squeeze[1d])) > {{ net_rx_action_warning_threshold }}
+ max(rate(nstat_time_squeeze[5m])) without (cpu) > {{ squeeze_rate_threshold }}
+ for: 7m
labels:
severity: warning
service: system
annotations:
- summary: "CPU terminated {{ net_rx_action_warning_threshold }}{%- raw %} net_rx_action loops"
- description: "The {{ $labels.cpu }} CPU on the {{ $labels.host }} node terminated {{ $value }} net_rx_action loops during the last 24 hours. Modify the net.core.netdev_budget kernel parameter."
- NetRxActionByCpuMinor:
- {%- endraw %}
- {%- set net_rx_action_minor_threshold = monitoring.net_rx_action_per_cpu_threshold.minor %}
- if: >-
- floor(increase(nstat_time_squeeze[1d])) > {{ net_rx_action_minor_threshold }}
- labels:
- severity: minor
- service: system
- annotations:
- summary: "CPU terminated {{ net_rx_action_minor_threshold }}{%- raw %} net_rx_action loops"
- description: "The {{ $labels.cpu }} CPU on the {{ $labels.host }} node terminated {{ $value }} net_rx_action loops during the last 24 hours. Modify the net.core.netdev_budget kernel parameter."
+ summary: "CPU terminated {{ squeeze_rate_threshold }}{%- raw %} net_rx_action loops per second"
+ description: "The rate of net_rx_action loops terminations on the {{ $labels.host }} node is {{ $value }} per second during the last 7 minutes. Modify the net.core.netdev_budget and net.core.netdev_budget_usecs kernel parameters."
{%- endraw %}
{%- if monitoring.bond_status.interfaces is defined and monitoring.bond_status.interfaces %}
{%- raw %}