Add bond member status monitoring.

Pillar values:
linux.monitoring.bond_status.interfaces = [ 'bond0', 'all', 'etc' ]

Leave bond_status.interfaces undefined to disable (default).

Depends-On: Ia07d4c473bf64d98170f51599caaedb46645ede3

Change-Id: I62a7d59251d37cb6c7fc7b761f63a5599930f1dc
diff --git a/linux/files/collectd_bond_status.conf b/linux/files/collectd_bond_status.conf
new file mode 100644
index 0000000..05f52bb
--- /dev/null
+++ b/linux/files/collectd_bond_status.conf
@@ -0,0 +1,7 @@
+Import "bond_status"
+
+<Module "bond_status">
+  {%- for interface in plugin.get('interfaces', []) %}
+  Bond "{{ interface }}"
+  {%- endfor %}
+</Module>
diff --git a/linux/meta/collectd.yml b/linux/meta/collectd.yml
index d38f1ae..7219cfd 100644
--- a/linux/meta/collectd.yml
+++ b/linux/meta/collectd.yml
@@ -1,3 +1,4 @@
+{%- from "linux/map.jinja" import monitoring with context %}
 local_plugin:
   linux_network_netlink:
     plugin: netlink
@@ -45,3 +46,12 @@
     plugin: swap
     template: linux/files/collectd_swap.conf
     report_bytes: True
+  {%- if monitoring.bond_status.interfaces is defined and monitoring.bond_status.interfaces is list %}
+  linux_bond_status:
+    plugin: python
+    template: linux/files/collectd_bond_status.conf
+    interfaces:
+      {%- for interface in monitoring.bond_status.interfaces %}
+      - {{ interface }}
+      {%- endfor %}
+  {%- endif %}
diff --git a/linux/meta/heka.yml b/linux/meta/heka.yml
index d45504d..312263e 100644
--- a/linux/meta/heka.yml
+++ b/linux/meta/heka.yml
@@ -1,3 +1,4 @@
+{%- from "linux/map.jinja" import monitoring with context %}
 metric_collector:
   trigger:
     linux_system_cpu_critical:
@@ -136,6 +137,18 @@
         window: 60
         periods: 0
         function: max
+    {%- if monitoring.bond_status.interfaces is defined and monitoring.bond_status.interfaces is list %}
+    linux_bond_status_critical:
+      description: Bond members are down.
+      rules:
+      - function: last
+        metric: bond_status_links_down
+        periods: 0
+        relational_operator: '>'
+        threshold: 0
+        window: 120
+      severity: critical
+    {%- endif %}
   alarm:
     linux_system_cpu:
       alerting: enabled
@@ -167,3 +180,9 @@
       alerting: enabled_with_notification
       triggers:
       - linux_system_hdd_errors_critical
+    {%- if monitoring.bond_status.interfaces is defined and monitoring.bond_status.interfaces is list %}
+    linux_bond_status:
+      alerting: enabled
+      triggers:
+        - linux_bond_status_critical
+    {%- endif %}