Add bond member status monitoring.
Pillar values:
linux.monitoring.bond_status.interfaces = [ 'bond0', 'all', 'etc' ]
Leave bond_status.interfaces undefined to disable (default).
Depends-On: Ia07d4c473bf64d98170f51599caaedb46645ede3
Change-Id: I62a7d59251d37cb6c7fc7b761f63a5599930f1dc
diff --git a/linux/files/collectd_bond_status.conf b/linux/files/collectd_bond_status.conf
new file mode 100644
index 0000000..05f52bb
--- /dev/null
+++ b/linux/files/collectd_bond_status.conf
@@ -0,0 +1,7 @@
+Import "bond_status"
+
+<Module "bond_status">
+ {%- for interface in plugin.get('interfaces', []) %}
+ Bond "{{ interface }}"
+ {%- endfor %}
+</Module>
diff --git a/linux/meta/collectd.yml b/linux/meta/collectd.yml
index d38f1ae..7219cfd 100644
--- a/linux/meta/collectd.yml
+++ b/linux/meta/collectd.yml
@@ -1,3 +1,4 @@
+{%- from "linux/map.jinja" import monitoring with context %}
local_plugin:
linux_network_netlink:
plugin: netlink
@@ -45,3 +46,12 @@
plugin: swap
template: linux/files/collectd_swap.conf
report_bytes: True
+ {%- if monitoring.bond_status.interfaces is defined and monitoring.bond_status.interfaces is list %}
+ linux_bond_status:
+ plugin: python
+ template: linux/files/collectd_bond_status.conf
+ interfaces:
+ {%- for interface in monitoring.bond_status.interfaces %}
+ - {{ interface }}
+ {%- endfor %}
+ {%- endif %}
diff --git a/linux/meta/heka.yml b/linux/meta/heka.yml
index d45504d..312263e 100644
--- a/linux/meta/heka.yml
+++ b/linux/meta/heka.yml
@@ -1,3 +1,4 @@
+{%- from "linux/map.jinja" import monitoring with context %}
metric_collector:
trigger:
linux_system_cpu_critical:
@@ -136,6 +137,18 @@
window: 60
periods: 0
function: max
+ {%- if monitoring.bond_status.interfaces is defined and monitoring.bond_status.interfaces is list %}
+ linux_bond_status_critical:
+ description: Bond members are down.
+ rules:
+ - function: last
+ metric: bond_status_links_down
+ periods: 0
+ relational_operator: '>'
+ threshold: 0
+ window: 120
+ severity: critical
+ {%- endif %}
alarm:
linux_system_cpu:
alerting: enabled
@@ -167,3 +180,9 @@
alerting: enabled_with_notification
triggers:
- linux_system_hdd_errors_critical
+ {%- if monitoring.bond_status.interfaces is defined and monitoring.bond_status.interfaces is list %}
+ linux_bond_status:
+ alerting: enabled
+ triggers:
+ - linux_bond_status_critical
+ {%- endif %}