initial commit
diff --git a/system/heka/aggregator/cluster.yml b/system/heka/aggregator/cluster.yml
new file mode 100644
index 0000000..e2ee129
--- /dev/null
+++ b/system/heka/aggregator/cluster.yml
@@ -0,0 +1,30 @@
+classes:
+- service.heka.aggregator.cluster
+parameters:
+ _param:
+ nagios_default_host_alarm_clusters: 00-clusters
+ nagios_host_dimension_key: nagios_host
+ heka:
+ aggregator:
+ influxdb_host: ${_param:heka_influxdb_host}
+ influxdb_port: ${_param:influxdb_port}
+ influxdb_database: ${_param:influxdb_database}
+ influxdb_username: ${_param:influxdb_user}
+ influxdb_password: ${_param:influxdb_password}
+ nagios_host: ${_param:nagios_host}
+ nagios_username: ${_param:nagios_username}
+ nagios_password: ${_param:nagios_password}
+ nagios_port: ${_param:nagios_status_port}
+ nagios_default_host_alarm_clusters: ${_param:nagios_default_host_alarm_clusters}
+ nagios_host_dimension_key: ${_param:nagios_host_dimension_key}
+ keepalived:
+ cluster:
+ instance:
+ stacklight_monitor_vip:
+ notify_action:
+ master:
+ - service aggregator start
+ backup:
+ - service aggregator stop
+ fault:
+ - service aggregator stop
diff --git a/system/heka/aggregator/single.yml b/system/heka/aggregator/single.yml
new file mode 100644
index 0000000..c252bd6
--- /dev/null
+++ b/system/heka/aggregator/single.yml
@@ -0,0 +1,19 @@
+classes:
+- service.heka.aggregator.single
+parameters:
+ _param:
+ nagios_default_host_alarm_clusters: 00-clusters
+ nagios_host_dimension_key: nagios_host
+ heka:
+ aggregator:
+ influxdb_host: ${_param:heka_influxdb_host}
+ influxdb_port: ${_param:influxdb_port}
+ influxdb_database: ${_param:influxdb_database}
+ influxdb_username: ${_param:influxdb_user}
+ influxdb_password: ${_param:influxdb_password}
+ nagios_host: ${_param:nagios_host}
+ nagios_username: ${_param:nagios_username}
+ nagios_password: ${_param:nagios_password}
+ nagios_port: ${_param:nagios_status_port}
+ nagios_default_host_alarm_clusters: ${_param:nagios_default_host_alarm_clusters}
+ nagios_host_dimension_key: ${_param:nagios_host_dimension_key}
diff --git a/system/heka/alarm/openstack_compute.yml b/system/heka/alarm/openstack_compute.yml
new file mode 100644
index 0000000..d4fda7d
--- /dev/null
+++ b/system/heka/alarm/openstack_compute.yml
@@ -0,0 +1,90 @@
+parameters:
+ heka:
+ metric_collector:
+ trigger:
+ # Override the linux_system_cpu_critical and linux_system_cpu_warning
+ # triggers to use specific rules on control nodes
+ linux_system_cpu_critical:
+ description: 'The CPU usage is too high (compute node)'
+ severity: critical
+ rules:
+ - metric: cpu_wait
+ relational_operator: '>='
+ threshold: 30
+ window: 120
+ periods: 0
+ function: avg
+ linux_system_cpu_warning:
+ description: 'The CPU usage is high (compute node)'
+ severity: 'warning'
+ enabled: 'true'
+ rules:
+ - metric: cpu_wait
+ relational_operator: '>='
+ threshold: 20
+ window: 120
+ periods: 0
+ function: avg
+ alarm:
+ # Tag all the system alarm metrics with "node_role: compute". This
+ # to be able to create an alarm cluster for compute nodes.
+ linux_system_cpu:
+ alerting: enabled
+ triggers:
+ - linux_system_cpu_critical
+ - linux_system_cpu_warning
+ dimension:
+ node_role: compute
+ linux_system_swap:
+ alerting: enabled
+ triggers:
+ - linux_system_swap_usage_critical
+ - linux_system_swap_activity_warning
+ - linux_system_swap_usage_warning
+ dimension:
+ node_role: compute
+ linux_system_root_fs:
+ alerting: enabled
+ triggers:
+ - linux_system_root_fs_critical
+ - linux_system_root_fs_warning
+ dimension:
+ node_role: compute
+ linux_system_network_rx:
+ alerting: enabled
+ triggers:
+ - linux_system_network_critical_dropped_rx
+ - linux_system_network_warning_dropped_rx
+ dimension:
+ node_role: compute
+ linux_system_network_tx:
+ alerting: enabled
+ triggers:
+ - linux_system_network_critical_dropped_tx
+ - linux_system_network_warning_dropped_tx
+ dimension:
+ node_role: compute
+ linux_system_hdd_errors:
+ alerting: enabled_with_notification
+ triggers:
+ - linux_system_hdd_errors_critical
+ dimension:
+ node_role: compute
+ aggregator:
+ alarm_cluster:
+ compute_nodes:
+ policy: majority_of_members
+ alerting: enabled_with_notification
+ group_by: hostname
+ match:
+ node_role: compute
+ members:
+ - linux_system_cpu
+ - linux_system_swap
+ - linux_system_root_fs
+ - linux_system_network_rx
+ - linux_system_network_tx
+ - linux_system_hdd_errors
+ dimension:
+ cluster_name: compute
+ nagios_host: 01-node-clusters
diff --git a/system/heka/alarm/openstack_control.yml b/system/heka/alarm/openstack_control.yml
new file mode 100644
index 0000000..7dcb331
--- /dev/null
+++ b/system/heka/alarm/openstack_control.yml
@@ -0,0 +1,102 @@
+parameters:
+ heka:
+ metric_collector:
+ trigger:
+ # Override the linux_system_cpu_critical and linux_system_cpu_warning
+ # triggers to use specific rules on control nodes
+ linux_system_cpu_critical:
+ description: 'The CPU usage is too high (controller node)'
+ severity: critical
+ rules:
+ - metric: cpu_idle
+ relational_operator: '<='
+ threshold: 5
+ window: 120
+ periods: 0
+ function: avg
+ - metric: cpu_wait
+ relational_operator: '>='
+ threshold: 35
+ window: 120
+ periods: 0
+ function: avg
+ linux_system_cpu_warning:
+ description: 'The CPU usage is high (controller node)'
+ severity: 'warning'
+ enabled: 'true'
+ rules:
+ - metric: cpu_idle
+ relational_operator: '<='
+ threshold: 15
+ window: 120
+ periods: 0
+ function: avg
+ - metric: cpu_wait
+ relational_operator: '>='
+ threshold: 25
+ window: 120
+ periods: 0
+ function: avg
+ alarm:
+ # Tag all the system alarm metrics with "node_role: control". This
+ # to be able to create an alarm cluster for control nodes.
+ linux_system_cpu:
+ alerting: enabled
+ triggers:
+ - linux_system_cpu_critical
+ - linux_system_cpu_warning
+ dimension:
+ node_role: control
+ linux_system_swap:
+ alerting: enabled
+ triggers:
+ - linux_system_swap_usage_critical
+ - linux_system_swap_activity_warning
+ - linux_system_swap_usage_warning
+ dimension:
+ node_role: control
+ linux_system_root_fs:
+ alerting: enabled
+ triggers:
+ - linux_system_root_fs_critical
+ - linux_system_root_fs_warning
+ dimension:
+ node_role: control
+ linux_system_network_rx:
+ alerting: enabled
+ triggers:
+ - linux_system_network_critical_dropped_rx
+ - linux_system_network_warning_dropped_rx
+ dimension:
+ node_role: control
+ linux_system_network_tx:
+ alerting: enabled
+ triggers:
+ - linux_system_network_critical_dropped_tx
+ - linux_system_network_warning_dropped_tx
+ dimension:
+ node_role: control
+ linux_system_hdd_errors:
+ alerting: enabled_with_notification
+ triggers:
+ - linux_system_hdd_errors_critical
+ dimension:
+ node_role: control
+ aggregator:
+ alarm_cluster:
+ control_nodes:
+ policy: majority_of_members
+ alerting: enabled_with_notification
+ group_by: hostname
+ match:
+ node_role: control
+ members:
+ - linux_system_cpu
+ - linux_system_swap
+ - linux_system_root_fs
+ - linux_system_network_rx
+ - linux_system_network_tx
+ - linux_system_hdd_errors
+ dimension:
+ cluster_name: control
+ nagios_host: 01-node-clusters
diff --git a/system/heka/log_collector/single.yml b/system/heka/log_collector/single.yml
new file mode 100644
index 0000000..73463e4
--- /dev/null
+++ b/system/heka/log_collector/single.yml
@@ -0,0 +1,7 @@
+classes:
+- service.heka.log_collector.single
+parameters:
+ heka:
+ log_collector:
+ elasticsearch_host: ${_param:heka_elasticsearch_host}
+ elasticsearch_port: 9200
diff --git a/system/heka/metric_collector/single.yml b/system/heka/metric_collector/single.yml
new file mode 100644
index 0000000..960d271
--- /dev/null
+++ b/system/heka/metric_collector/single.yml
@@ -0,0 +1,17 @@
+classes:
+- service.heka.metric_collector.single
+parameters:
+ heka:
+ metric_collector:
+ aggregator_host: ${_param:stacklight_monitor_address}
+ aggregator_port: ${_param:aggregator_port}
+ influxdb_database: lma
+ influxdb_host: ${_param:heka_influxdb_host}
+ influxdb_password: ${_param:influxdb_stacklight_password}
+ influxdb_port: 8086
+ influxdb_time_precision: ms
+ influxdb_username: lma
+ nagios_host: ${_param:nagios_host}
+ nagios_username: ${_param:nagios_username}
+ nagios_password: ${_param:nagios_password}
+ nagios_port: ${_param:nagios_status_port}
diff --git a/system/heka/remote_collector/cluster.yml b/system/heka/remote_collector/cluster.yml
new file mode 100644
index 0000000..f3344c4
--- /dev/null
+++ b/system/heka/remote_collector/cluster.yml
@@ -0,0 +1,23 @@
+classes:
+- service.heka.remote_collector.cluster
+parameters:
+ heka:
+ remote_collector:
+ influxdb_host: ${_param:heka_influxdb_host}
+ influxdb_port: ${_param:influxdb_port}
+ influxdb_database: ${_param:influxdb_database}
+ influxdb_username: ${_param:influxdb_user}
+ influxdb_password: ${_param:influxdb_password}
+ aggregator_host: ${_param:heka_aggregator_host}
+ aggregator_port: ${_param:aggregator_port}
+ keepalived:
+ cluster:
+ instance:
+ stacklight_monitor_vip:
+ notify_action:
+ master:
+ - service remote_collector start
+ backup:
+ - service remote_collector stop
+ fault:
+ - service remote_collector stop
diff --git a/system/heka/remote_collector/single.yml b/system/heka/remote_collector/single.yml
new file mode 100644
index 0000000..df33055
--- /dev/null
+++ b/system/heka/remote_collector/single.yml
@@ -0,0 +1,12 @@
+classes:
+- service.heka.remote_collector.single
+parameters:
+ heka:
+ remote_collector:
+ influxdb_host: ${_param:heka_influxdb_host}
+ influxdb_port: ${_param:influxdb_port}
+ influxdb_database: ${_param:influxdb_database}
+ influxdb_username: ${_param:influxdb_user}
+ influxdb_password: ${_param:influxdb_password}
+ aggregator_host: ${_param:heka_aggregator_host}
+ aggregator_port: ${_param:aggregator_port}
diff --git a/system/heka/router/single.yml b/system/heka/router/single.yml
new file mode 100644
index 0000000..8801e42
--- /dev/null
+++ b/system/heka/router/single.yml
@@ -0,0 +1,27 @@
+classes:
+- service.rabbitmq.server.single
+- service.heka.server.amqp.router
+parameters:
+ _param:
+ heka_router_input_exchange: heka
+ heka_router_input_host: ${_param:heka_amqp_host}
+ heka_router_input_password: ${_param:heka_amqp_password}
+ heka_router_input_vhost: log
+ heka_router_input_user: log
+ heka_router_output_host: ${_param:heka_elasticsearch_host}
+ heka_router_prefetch_count: 20
+ rabbitmq_secret_key: secret_key
+ rabbitmq_admin_name: admin
+ rabbitmq_admin_password: workshoplearning42
+ kibana_elasticsearch_host: localhost
+ heka:
+ shipper:
+ enabled: false
+ rabbitmq:
+ server:
+ host:
+ 'log':
+ enabled: true
+ user: log
+ password: ${_param:heka_amqp_password}
+
diff --git a/system/heka/shipper/single.yml b/system/heka/shipper/single.yml
new file mode 100644
index 0000000..0aee02c
--- /dev/null
+++ b/system/heka/shipper/single.yml
@@ -0,0 +1,10 @@
+classes:
+- service.heka.server.amqp.shipper
+parameters:
+ _param:
+ heka_shipper_output_exchange: heka
+ heka_shipper_output_host: ${_param:heka_amqp_host}
+ heka_shipper_output_password: ${_param:heka_amqp_password}
+ heka_shipper_output_vhost: log
+ heka_shipper_output_user: log
+