Merge "Enable the monitoring for RX/TX Error Packets"
diff --git a/linux/map.jinja b/linux/map.jinja
index 9068aa2..276d526 100644
--- a/linux/map.jinja
+++ b/linux/map.jinja
@@ -458,9 +458,15 @@
'rx_packets_dropped_threshold': {
'warn': 60,
},
+ 'rx_packets_error_threshold': {
+ 'warn': 30,
+ },
'tx_packets_dropped_threshold': {
'warn': 100,
},
+ 'tx_packets_error_threshold': {
+ 'warn': 30,
+ },
'swap_in_rate': {
'warn': 1024 * 1024,
},
diff --git a/linux/meta/prometheus.yml b/linux/meta/prometheus.yml
index 5fe2f05..18332ec 100644
--- a/linux/meta/prometheus.yml
+++ b/linux/meta/prometheus.yml
@@ -219,6 +219,17 @@
annotations:
summary: "{{ net_rx_dropped_threshold }}{%- raw %} received packets were dropped"
description: "{{ $value }} packets received by the {{ $labels.interface }} interface on the {{ $labels.host }} node were dropped during the last minute."
+ SystemRxPacketsErrorTooHigh:
+ {%- endraw %}
+ {%- set net_rx_error_threshold = monitoring.rx_packets_error_threshold.warn %}
+ if: >-
+ increase(net_err_in[1m]) > {{ net_rx_error_threshold }}
+ labels:
+ severity: warning
+ service: system
+ annotations:
+ summary: "{{ net_rx_error_threshold }}{%- raw %} received packets had errors"
+ description: "{{ $value }} packets received by the {{ $labels.interface }} interface on the {{ $labels.host }} node had errors during the last minute."
SystemTxPacketsDroppedTooHigh:
{%- endraw %}
{%- set net_tx_dropped_threshold = monitoring.tx_packets_dropped_threshold.warn %}
@@ -230,6 +241,17 @@
annotations:
summary: "{{ net_tx_dropped_threshold }}{%- raw %} transmitted packets were dropped"
description: "{{ $value }} packets transmitted by the {{ $labels.interface }} interface on the {{ $labels.host }} node were dropped during the last minute."
+ SystemTxPacketsErrorTooHigh:
+ {%- endraw %}
+ {%- set net_tx_error_threshold = monitoring.tx_packets_error_threshold.warn %}
+ if: >-
+ increase(net_err_out[1m]) > {{ net_tx_error_threshold }}
+ labels:
+ severity: warning
+ service: system
+ annotations:
+ summary: "{{ net_tx_error_threshold }}{%- raw %} transmitted packets had errors"
+ description: "{{ $value }} packets transmitted by the {{ $labels.interface }} interface on the {{ $labels.host }} node had errors during the last minute."
CronProcessDown:
if: >-
procstat_running{process_name="cron"} == 0