initial commit

commit: 03ff34ef2becc70bbeb47b209edf350cee769626 [log] [tgz]
author: Ondrej Smola <ondrej.smola@tcpcloud.eu> Thu Dec 01 01:30:33 2016 +0100
committer: Ondrej Smola <ondrej.smola@tcpcloud.eu> Thu Dec 01 01:30:33 2016 +0100
tree: 16e1c37a5677c1d55fa2d59f6445d4db2fa46dc1
diff --git a/system/heka/alarm/openstack_control.yml b/system/heka/alarm/openstack_control.yml
new file mode 100644
index 0000000..7dcb331
--- /dev/null
+++ b/system/heka/alarm/openstack_control.yml

@@ -0,0 +1,102 @@
+parameters:
+  heka:
+    metric_collector:
+      trigger:
+        # Override the linux_system_cpu_critical and linux_system_cpu_warning
+        # triggers to use specific rules on control nodes
+        linux_system_cpu_critical:
+          description: 'The CPU usage is too high (controller node)'
+          severity: critical
+          rules:
+          - metric: cpu_idle
+            relational_operator: '<='
+            threshold: 5
+            window: 120
+            periods: 0
+            function: avg
+          - metric: cpu_wait
+            relational_operator: '>='
+            threshold: 35
+            window: 120
+            periods: 0
+            function: avg
+        linux_system_cpu_warning:
+          description: 'The CPU usage is high (controller node)'
+          severity: 'warning'
+          enabled: 'true'
+          rules:
+          - metric: cpu_idle
+            relational_operator: '<='
+            threshold: 15
+            window: 120
+            periods: 0
+            function: avg
+          - metric: cpu_wait
+            relational_operator: '>='
+            threshold: 25
+            window: 120
+            periods: 0
+            function: avg
+      alarm:
+        # Tag all the system alarm metrics with "node_role: control". This
+        # to be able to create an alarm cluster for control nodes.
+        linux_system_cpu:
+          alerting: enabled
+          triggers:
+          - linux_system_cpu_critical
+          - linux_system_cpu_warning
+          dimension:
+            node_role: control
+        linux_system_swap:
+          alerting: enabled
+          triggers:
+          - linux_system_swap_usage_critical
+          - linux_system_swap_activity_warning
+          - linux_system_swap_usage_warning
+          dimension:
+            node_role: control
+        linux_system_root_fs:
+          alerting: enabled
+          triggers:
+          - linux_system_root_fs_critical
+          - linux_system_root_fs_warning
+          dimension:
+            node_role: control
+        linux_system_network_rx:
+          alerting: enabled
+          triggers:
+          - linux_system_network_critical_dropped_rx
+          - linux_system_network_warning_dropped_rx
+          dimension:
+            node_role: control
+        linux_system_network_tx:
+          alerting: enabled
+          triggers:
+          - linux_system_network_critical_dropped_tx
+          - linux_system_network_warning_dropped_tx
+          dimension:
+            node_role: control
+        linux_system_hdd_errors:
+          alerting: enabled_with_notification
+          triggers:
+          - linux_system_hdd_errors_critical
+          dimension:
+            node_role: control
+    aggregator:
+      alarm_cluster:
+        control_nodes:
+          policy: majority_of_members
+          alerting: enabled_with_notification
+          group_by: hostname
+          match:
+            node_role: control
+          members:
+          - linux_system_cpu
+          - linux_system_swap
+          - linux_system_root_fs
+          - linux_system_network_rx
+          - linux_system_network_tx
+          - linux_system_hdd_errors
+          dimension:
+            cluster_name: control
+            nagios_host: 01-node-clusters
commit	03ff34ef2becc70bbeb47b209edf350cee769626	[log] [tgz]
author	Ondrej Smola <ondrej.smola@tcpcloud.eu>	Thu Dec 01 01:30:33 2016 +0100
committer	Ondrej Smola <ondrej.smola@tcpcloud.eu>	Thu Dec 01 01:30:33 2016 +0100
tree	16e1c37a5677c1d55fa2d59f6445d4db2fa46dc1