blob: fcc9931b9f40f1465d25a889948a19b229da06c3 [file] [log] [blame]
Jakub Pavlike7d12cd2015-09-03 19:02:45 +02001
2============
3Heka Formula
4============
5
Ales Komarekc9a3eb12016-10-12 11:17:55 +02006Heka is an open source stream processing software system developed by Mozilla. Heka is a Swiss Army Knife type tool for data processing.
Jakub Pavlike7d12cd2015-09-03 19:02:45 +02007
8Sample pillars
9==============
10
Ales Komarekc9a3eb12016-10-12 11:17:55 +020011Metric collector service
Ales Komarekf8d248e2016-10-21 10:27:28 +020012------------------------
13
Ales Komareke2b62602016-10-21 13:24:10 +020014Local alarm definition for nova compute role, excerpt from `nova/meta/heka.yml`.
jan kaufman1002cd92015-09-16 16:30:48 +020015
16.. code-block:: yaml
17
jan kaufman1002cd92015-09-16 16:30:48 +020018 heka:
Ales Komarekc9a3eb12016-10-12 11:17:55 +020019 metric_collector:
Ales Komareke2b62602016-10-21 13:24:10 +020020 trigger:
Ales Komarekf8d248e2016-10-21 10:27:28 +020021 nova_compute_filesystem_warning:
Ales Komarekf8d248e2016-10-21 10:27:28 +020022 enabled: True # implicit
23 description: "The nova instance filesystem's root free space is low."
24 severity: warning
25 logical_operator: or # implicit
26 rules:
27 - metric: fs_space_percent_free
28 relational_operator: '<'
29 threshold: 10
30 window: 60
31 periods: 0
32 function: min
Ales Komarek04a52952016-10-21 16:26:49 +020033 dimension:
34 fs: '/var/lib/nova'
Ales Komarekf8d248e2016-10-21 10:27:28 +020035 nova_compute_filesystem_critical:
Ales Komarekf8d248e2016-10-21 10:27:28 +020036 description: "The nova instance filesystem's root free space is low."
37 severity: warning
Ales Komarekf8d248e2016-10-21 10:27:28 +020038 rules:
39 - metric: fs_space_percent_free
40 relational_operator: '<'
41 threshold: 5
42 window: 60
43 periods: 0
44 function: min
Ales Komarek04a52952016-10-21 16:26:49 +020045 dimension:
46 fs: '/var/lib/nova'
Ales Komarek9a8bd082016-10-25 01:25:09 +020047 alarm:
48 nova_compute_filesystem:
Ales Komarekf8d248e2016-10-21 10:27:28 +020049 notifications: False
50 alerting: True
Ales Komarek04a52952016-10-21 16:26:49 +020051 dimension:
Ales Komarek83ec1a42016-10-25 11:08:13 +020052 node_role: control
Ales Komarek04a52952016-10-21 16:26:49 +020053 triggers:
54 - nova_compute_filesystem_warning
55 - nova_compute_filesystem_critical
Ales Komareke2b62602016-10-21 13:24:10 +020056 aggregator:
Ales Komarek9a8bd082016-10-25 01:25:09 +020057 alarm_cluster:
Ales Komarek04a52952016-10-21 16:26:49 +020058 nova_compute_service: # the service_role format
Ales Komareke2b62602016-10-21 13:24:10 +020059 policy: highest_severity
60 group_by: member
Ales Komarek04a52952016-10-21 16:26:49 +020061 match:
62 node_role: compute
63 dimension:
64 cluster: nova-compute-plane
Ales Komareke2b62602016-10-21 13:24:10 +020065 members:
66 - nova_compute_logs
Ales Komarek9a8bd082016-10-25 01:25:09 +020067 - nova_compute_filesystem
Ales Komareke2b62602016-10-21 13:24:10 +020068 - nova_compute_instances
69 - nova_compute_libvirt
70 - nova_compute_free_cpu
71 - nova_compute_free_mem
72 hints:
73 - neutron_compute # or contrail_vrouter for contrail nodes
Ales Komarek04a52952016-10-21 16:26:49 +020074 nova_compute_plane: # the service_role format
75 engine: gse
76 policy: highest_severity
77 group_by: member
78 match:
79 cluster: nova-compute-plane
80
Ales Komareke2b62602016-10-21 13:24:10 +020081Default CPU usage alarms, excerpt from `linux/meta/heka.yml`.
Ales Komarekf8d248e2016-10-21 10:27:28 +020082
83.. code-block:: yaml
84
85 metric_collector:
Ales Komareke2b62602016-10-21 13:24:10 +020086 trigger:
Ales Komarekf8d248e2016-10-21 10:27:28 +020087 linux_system_cpu_critical:
Ales Komarekf8d248e2016-10-21 10:27:28 +020088 description: 'The CPU usage is too high.'
89 severity: critical
Ales Komarekf8d248e2016-10-21 10:27:28 +020090 rules:
91 - metric: cpu_wait
92 relational_operator: >=
93 threshold: 35
94 window: 120
95 periods: 0
96 function: avg
97 - metric: cpu_idle
98 relational_operator: <=
99 threshold: 5
100 window: 120
101 function: avg
102 linux_system_cpu_warning:
Ales Komarekf8d248e2016-10-21 10:27:28 +0200103 description: 'The CPU wait times are high.'
104 severity: critical
Ales Komarekf8d248e2016-10-21 10:27:28 +0200105 rules:
106 - metric: cpu_wait
107 relational_operator: >=
108 threshold: 15
109 window: 120
110 periods: 0
111 function: avg
Ales Komarek9a8bd082016-10-25 01:25:09 +0200112 alarm:
Ales Komarekf8d248e2016-10-21 10:27:28 +0200113 linux_system_cpu:
Ales Komarekf8d248e2016-10-21 10:27:28 +0200114 notifications: False
115 alerting: True
Ales Komarek04a52952016-10-21 16:26:49 +0200116 triggers:
117 - linux_system_cpu_warning # will not render if referenced trigger is disabled
118 - linux_system_cpu_critical
119 dimension:
Ales Komarek83ec1a42016-10-25 11:08:13 +0200120 node_role: control
Ales Komarekf8d248e2016-10-21 10:27:28 +0200121
122
123Remote collector service
124------------------------
125
Ales Komareke2b62602016-10-21 13:24:10 +0200126Remote API check example, excerpt from `nova/meta/heka.yml`.
Ales Komarekf8d248e2016-10-21 10:27:28 +0200127
128.. code-block:: yaml
129
130 heka:
131 remote_collector:
Ales Komareke2b62602016-10-21 13:24:10 +0200132 trigger:
Ales Komarekf8d248e2016-10-21 10:27:28 +0200133 nova_control_api_fail:
Ales Komarekf8d248e2016-10-21 10:27:28 +0200134 description: 'Endpoint check for nova-api failed.'
135 severity: critical
Ales Komarekf8d248e2016-10-21 10:27:28 +0200136 rules:
137 - metric: openstack_check_api
138 relational_operator: '=='
139 threshold: 0
140 window: 60
141 periods: 0
142 function: last
Ales Komarek00ef62b2016-10-21 17:18:05 +0200143 dimension:
144 service: 'nova-api'
Ales Komarek9a8bd082016-10-25 01:25:09 +0200145 alarm:
Ales Komarekf8d248e2016-10-21 10:27:28 +0200146 nova_control_api:
Ales Komarekf8d248e2016-10-21 10:27:28 +0200147 notifications: False
148 alerting: True
Ales Komarek04a52952016-10-21 16:26:49 +0200149 dimension:
Ales Komarek83ec1a42016-10-25 11:08:13 +0200150 service: nova-control
Ales Komarek04a52952016-10-21 16:26:49 +0200151 triggers:
152 - nova_control_api_fail
Ales Komarekf8d248e2016-10-21 10:27:28 +0200153
Ales Komareke2b62602016-10-21 13:24:10 +0200154Corresponding clusters and alarms, excerpt from `nova/meta/heka.yml`.
Ales Komarekc9a3eb12016-10-12 11:17:55 +0200155
156.. code-block:: yaml
157
158 heka:
159 aggregator:
Ales Komarek9a8bd082016-10-25 01:25:09 +0200160 alarm_cluster:
Ales Komarek00ef62b2016-10-21 17:18:05 +0200161 nova_control_service:
Ales Komarekf8d248e2016-10-21 10:27:28 +0200162 policy: highest_severity
163 group_by: member
Ales Komarek04a52952016-10-21 16:26:49 +0200164 match:
Ales Komarek83ec1a42016-10-25 11:08:13 +0200165 service: nova-control
Ales Komarek04a52952016-10-21 16:26:49 +0200166 dimension:
Ales Komarek00ef62b2016-10-21 17:18:05 +0200167 cluster: openstack-control-plane
Ales Komarekf8d248e2016-10-21 10:27:28 +0200168 members:
Ales Komareke2b62602016-10-21 13:24:10 +0200169 - nova_control_api
170 - nova_control_endpoint
171 hints:
172 - neutron_control # or contrail_vrouter for contrail nodes
173 - keystone_control
Ales Komarek00ef62b2016-10-21 17:18:05 +0200174 openstack_control_plane:
Ales Komarek04a52952016-10-21 16:26:49 +0200175 engine: gse
176 policy: highest_severity
177 group_by: member
178 match:
Ales Komarek00ef62b2016-10-21 17:18:05 +0200179 cluster: openstack-control-plane
Jakub Pavlike7d12cd2015-09-03 19:02:45 +0200180
181Read more
182=========
183
jan kaufman1002cd92015-09-16 16:30:48 +0200184* https://hekad.readthedocs.org/en/latest/index.html