Add alarms and alarm clusters
Change-Id: I5239286ac85c7ee5c2b96b564b45ceb9436b1822
diff --git a/heat/meta/heka.yml b/heat/meta/heka.yml
index d861a58..e5c2c8a 100644
--- a/heat/meta/heka.yml
+++ b/heat/meta/heka.yml
@@ -1,3 +1,4 @@
+{%- if pillar.heat is defined and pillar.heat.server is defined %}
log_collector:
decoder:
heat:
@@ -18,3 +19,159 @@
priority: ["^Seq"]
decoder: "heat_decoder"
splitter: "heat_splitter"
+metric_collector:
+ trigger:
+ heat_logs_error:
+ description: 'Too many errors have been detected in Heat logs'
+ severity: warning
+ no_data_policy: okay
+ rules:
+ - metric: log_messages
+ field:
+ service: heat
+ level: error
+ relational_operator: '>'
+ threshold: 0.1
+ window: 70
+ periods: 0
+ function: max
+ heat_api_local_endpoint:
+ description: 'Heat API is locally down'
+ severity: down
+ rules:
+ - metric: openstack_check_local_api
+ field:
+ service: heat-api
+ relational_operator: '=='
+ threshold: 0
+ window: 60
+ periods: 0
+ function: last
+ heat_cfn_api_local_endpoint:
+ description: 'Heat CFN API is locally down'
+ severity: down
+ rules:
+ - metric: openstack_check_local_api
+ field:
+ service: heat-cfn-api
+ relational_operator: '=='
+ threshold: 0
+ window: 60
+ periods: 0
+ function: last
+ alarm:
+ heat_logs:
+ alerting: enabled
+ triggers:
+ - heat_logs_error
+ dimension:
+ service: heat-logs
+ heat_api_endpoint:
+ alerting: enabled
+ triggers:
+ - heat_api_local_endpoint
+ dimension:
+ service: heat-api-endpoint
+ heat_cfn_api_endpoint:
+ alerting: enabled
+ triggers:
+ - heat_cfn_api_local_endpoint
+ dimension:
+ service: heat-cfn-api-endpoint
+remote_collector:
+ trigger:
+ heat_api_check_failed:
+ description: 'Endpoint check for heat-api is failed'
+ severity: down
+ rules:
+ - metric: openstack_check_api
+ field:
+ service: heat-api
+ relational_operator: '=='
+ threshold: 0
+ window: 60
+ periods: 0
+ function: last
+ heat_cfn_api_check_failed:
+ description: 'Endpoint check for heat-cfn-api is failed'
+ severity: down
+ rules:
+ - metric: openstack_check_api
+ field:
+ service: heat-cfn-api
+ relational_operator: '=='
+ threshold: 0
+ window: 60
+ periods: 0
+ function: last
+ alarm:
+ heat_api_check:
+ alerting: true
+ triggers:
+ - heat_api_check_failed
+ dimension:
+ service: heat-api-check
+ heat_cfn_api_check:
+ alerting: true
+ triggers:
+ - heat_cfn_api_check_failed
+ dimension:
+ service: heat-cfn-api-check
+aggregator:
+ alarm_cluster:
+ heat_logs:
+ policy: highest_severity
+ group_by: hostname
+ match:
+ service: heat-logs
+ members:
+ - heat_logs
+ dimension:
+ service: heat
+ heat_api_endpoint:
+ policy: availability_of_members
+ group_by: hostname
+ match:
+ service: heat-api-endpoint
+ members:
+ - heat_api_endpoint
+ dimension:
+ service: heat
+ heat_cfn_api_endpoint:
+ policy: availability_of_members
+ group_by: hostname
+ match:
+ service: heat-cfn-api-endpoint
+ members:
+ - heat_cfn_api_endpoint
+ dimension:
+ service: heat
+ heat_api_check:
+ policy: highest_severity
+ match:
+ service: heat-api-check
+ members:
+ - heat_api_check
+ dimension:
+ service: heat
+ heat_cfn_api_check:
+ policy: highest_severity
+ match:
+ service: heat-cfn-api-check
+ members:
+ - heat_cfn_api_check
+ dimension:
+ service: heat
+ heat:
+ policy: highest_severity
+ match:
+ service: heat
+ members:
+ - heat_logs
+ - heat_api_endpoint
+ - heat_cfn_api_endpoint
+ - heat_api_check
+ - heat_cfn_api_check
+ dimension:
+ cluster_name: heat
+{%- endif %}