Fix disk/memory alarms
This change restores the correct order for the memory/disk triggers. It
also applies 'group_by: hostname' for those alarms because an alarm on
a single node has a direct impact on the cluster.
diff --git a/rabbitmq/meta/heka.yml b/rabbitmq/meta/heka.yml
index 8542b75..4a35b4d 100644
--- a/rabbitmq/meta/heka.yml
+++ b/rabbitmq/meta/heka.yml
@@ -89,15 +89,15 @@
rabbitmq_server_disk:
alerting: enabled
triggers:
- - rabbitmq_disk_limit_warning
- rabbitmq_disk_limit_critical
+ - rabbitmq_disk_limit_warning
dimension:
service: rabbitmq-cluster
rabbitmq_server_memory:
alerting: enabled
triggers:
- - rabbitmq_memory_limit_warning
- rabbitmq_memory_limit_critical
+ - rabbitmq_memory_limit_warning
dimension:
service: rabbitmq-cluster
rabbitmq_server_queue:
@@ -115,6 +115,9 @@
rabbitmq_cluster:
alerting: enabled
policy: highest_severity
+ # A 'hostname' group_by is required because an alarm on a single node has
+ # an impact on the whole cluster.
+ group_by: hostname
match:
service: rabbitmq-cluster
members: