Enable monitoring for ovs-vswitchd
- added telegraf procstat config
- added MEM alert >20% - warning
- added MEM alert >30% - critical
Change-Id: I0fbb172ea0f05cd1a9e26c16f8018cc43d6b4c81
Related-bug: PROD-27900 (PROD:27900)
Related-bug: PROD-27901 (PROD:27901)
diff --git a/linux/meta/prometheus.yml b/linux/meta/prometheus.yml
index 753a587..fd4b7b9 100644
--- a/linux/meta/prometheus.yml
+++ b/linux/meta/prometheus.yml
@@ -228,8 +228,29 @@
annotations:
summary: "CPU terminated {{ squeeze_rate_threshold }}{%- raw %} net_rx_action loops per second"
description: "The rate of net_rx_action loops terminations on the {{ $labels.host }} node is {{ $value }} per second during the last 7 minutes. Modify the net.core.netdev_budget and net.core.netdev_budget_usecs kernel parameters."
-{%- endraw -%}
-
+ {%- endraw %}
+ {%- if network.bridge == 'openvswitch' %}
+ {%- raw %}
+ ProcessOVSVswitchdMemoryWarning:
+ if: procstat_memory_vms{process_name="ovs-vswitchd"} / on(host) mem_total > 0.2
+ for: 5m
+ labels:
+ severity: warning
+ service: ovs
+ annotations:
+ summary: "ovs-vswitchd takes more than 20% of system memory"
+ description: "ovs-vswitchd takes more than 20% of system memory"
+ ProcessOVSVswitchdMemoryCritical:
+ if: procstat_memory_vms{process_name="ovs-vswitchd"} / on(host) mem_total > 0.3
+ for: 5m
+ labels:
+ severity: critical
+ service: ovs
+ annotations:
+ summary: "ovs-vswitchd takes more than 30% of system memory"
+ description: "ovs-vswitchd takes more than 30% of system memory"
+ {%- endraw %}
+ {%- endif %}
{%- set bond_interfaces = [] %}
{%- for interface_name, interface in network.interface.items() %}
{%- if interface.type == 'bond' and interface.enabled == True %}
diff --git a/linux/meta/telegraf.yml b/linux/meta/telegraf.yml
index 52b4fe7..972bd82 100644
--- a/linux/meta/telegraf.yml
+++ b/linux/meta/telegraf.yml
@@ -33,6 +33,10 @@
exe: sshd
cron:
exe: cron
+{%- if network.bridge == 'openvswitch' %}
+ ovs-vswitchd:
+ exe: ovs-vswitchd
+{%- endif %}
linux_sysctl_fs:
{%- set bond_interfaces = [] %}
{%- for interface_name, interface in network.interface.items() %}