Merge "Added SMART disk monitoring for physical devices" into release/2019.2.0
diff --git a/linux/meta/fluentd.yml b/linux/meta/fluentd.yml
index f6d6720..60d2ef5 100644
--- a/linux/meta/fluentd.yml
+++ b/linux/meta/fluentd.yml
@@ -29,7 +29,16 @@
label:
- name: host
value: ${Hostname}
+ metric_hdd_errors_exclude:
+ tag: metric.hdd_errors
+ type: grep
+ # Regexp: https://regex101.com/r/ZRMX9j/3
+ exclude:
+ - name: Payload
+ regexp: (virDomainNetFind|libvirt|(At|De)tach(ing)?\ volume)
metric_hdd_errors_parse:
+ require:
+ - metric_hdd_errors_exclude
tag: metric.hdd_errors
type: parser
key_name: Payload
@@ -87,6 +96,14 @@
- name: ident
regexp: '^(.*)$'
result: $1.systemd
+ {%- if pillar.get('telegraf', {}).get('agent', {}).get('enabled', False) %}
+ push_to_telegraf:
+ require_in:
+ - push_to_default
+ tag: 'telegraf.systemd'
+ type: relabel
+ label: telegraf
+ {%- endif %}
push_to_default:
tag: '*.systemd'
type: copy
@@ -119,7 +136,16 @@
label:
default_metric:
filter:
+ metric_hdd_errors_exclude:
+ tag: metric.hdd_errors
+ type: grep
+ # Regexp: https://regex101.com/r/ZRMX9j/3
+ exclude:
+ - name: Payload
+ regexp: (virDomainNetFind|libvirt|(At|De)tach(ing)?\ volume)
metric_hdd_errors_parse:
+ require:
+ - metric_hdd_errors_exclude
tag: metric.hdd_errors
type: parser
key_name: Payload
diff --git a/linux/meta/prometheus.yml b/linux/meta/prometheus.yml
index 6e4551f..c202548 100644
--- a/linux/meta/prometheus.yml
+++ b/linux/meta/prometheus.yml
@@ -286,8 +286,29 @@
annotations:
summary: "CPU terminated {{ squeeze_rate_threshold }}{%- raw %} net_rx_action loops per second"
description: "The rate of net_rx_action loops terminations on the {{ $labels.host }} node is {{ $value }} per second during the last 7 minutes. Modify the net.core.netdev_budget and net.core.netdev_budget_usecs kernel parameters."
-{%- endraw -%}
-
+ {%- endraw %}
+ {%- if network.bridge == 'openvswitch' %}
+ {%- raw %}
+ ProcessOVSVswitchdMemoryWarning:
+ if: procstat_memory_vms{process_name="ovs-vswitchd"} / on(host) mem_total > 0.2
+ for: 5m
+ labels:
+ severity: warning
+ service: ovs
+ annotations:
+ summary: "ovs-vswitchd takes more than 20% of system memory"
+ description: "ovs-vswitchd takes more than 20% of system memory"
+ ProcessOVSVswitchdMemoryCritical:
+ if: procstat_memory_vms{process_name="ovs-vswitchd"} / on(host) mem_total > 0.3
+ for: 5m
+ labels:
+ severity: critical
+ service: ovs
+ annotations:
+ summary: "ovs-vswitchd takes more than 30% of system memory"
+ description: "ovs-vswitchd takes more than 30% of system memory"
+ {%- endraw %}
+ {%- endif %}
{%- set bond_interfaces = [] %}
{%- for interface_name, interface in network.interface.items() %}
{%- if interface.type == 'bond' and interface.enabled == True %}
diff --git a/linux/meta/telegraf.yml b/linux/meta/telegraf.yml
index 1d83ca6..934824a 100644
--- a/linux/meta/telegraf.yml
+++ b/linux/meta/telegraf.yml
@@ -38,6 +38,10 @@
exe: sshd
cron:
exe: cron
+{%- if network.bridge == 'openvswitch' %}
+ ovs-vswitchd:
+ exe: ovs-vswitchd
+{%- endif %}
linux_sysctl_fs:
{%- set bond_interfaces = [] %}
{%- for interface_name, interface in network.interface.items() %}