blob: 1950e9b785f3a10eebf323cb48db415c3329a60c [file] [log] [blame]
Olivier Bourdoncad047a2017-07-19 03:14:25 +02001{% from "nova/map.jinja" import controller, compute, monitoring with context %}
Simon Pasquieraba66d32017-06-27 12:27:43 +02002
3{%- set is_controller = controller.get('enabled', False) %}
4{%- set is_compute = compute.get('enabled', False) %}
5
6{%- if is_controller or is_compute %}
Vasyl Saienkocc87b1b2019-03-11 15:44:42 +02007{%- if is_compute and exporters is defined and compute.get('compute_driver', 'libvirt.LibvirtDriver') == 'libvirt.LibvirtDriver' %}
Olivier Bourdon47b48a52017-07-10 11:48:08 +02008{%- set packages = exporters.get('libvirt', {}).get('packages', ('libvirt-exporter', )) %}
9 {%- load_yaml as new_exporters_cfg %}
10exporters:
11 libvirt:
12 enabled: true
13{%- if packages is defined %}
14 packages:
15 {% for pkg in packages %}
16 - {{ pkg }}
17 {% endfor %}
18{%- endif %}
19 services:
20 qemu:
21 enabled: true
22 bind:
23 address: 0.0.0.0
24 port: 9177
25 {%- endload %}
26{{ new_exporters_cfg|yaml(False) }}
27{%- endif %}
28
Simon Pasquieraba66d32017-06-27 12:27:43 +020029server:
30 alert:
31{%- if is_controller %}
Ildar Svetlove3bb3f02018-04-24 11:17:45 +040032{%- set minor_threshold = monitoring.services_failed_warning_threshold_percent|float %}
33{%- set major_threshold = monitoring.services_failed_critical_threshold_percent|float %}
34{%- set minor_compute_threshold = monitoring.computes_failed_warning_threshold_percent|float %}
35{%- set major_compute_threshold = monitoring.computes_failed_critical_threshold_percent|float %}
Ildar Svetlov88baa462018-05-16 15:08:34 +040036{%- set major_endpoint_threshold = monitoring.endpoint_failed_major_threshold|float %}
Simon Pasquieraba66d32017-06-27 12:27:43 +020037{% raw %}
Michal Kobus4a83ddb2018-06-08 14:04:43 +020038 NovaApiOutage:
Ildar Svetlove3bb3f02018-04-24 11:17:45 +040039 if: >-
40 max(openstack_api_check_status{name=~"nova.*|placement"}) == 0
41 labels:
42 severity: critical
43 service: nova
44 annotations:
45 summary: "Nova API outage"
46 description: >-
47 Nova API is not accessible for all available Nova endpoints in the OpenStack service catalog.
Michal Kobus4a83ddb2018-06-08 14:04:43 +020048 NovaApiDown:
Simon Pasquieraba66d32017-06-27 12:27:43 +020049 if: >-
Ildar Svetlove3bb3f02018-04-24 11:17:45 +040050 openstack_api_check_status{name=~"nova.*|placement"} == 0
Simon Pasquieraba66d32017-06-27 12:27:43 +020051 labels:
Ildar Svetlove3bb3f02018-04-24 11:17:45 +040052 severity: major
53 service: nova
Simon Pasquieraba66d32017-06-27 12:27:43 +020054 annotations:
Ildar Svetlove3bb3f02018-04-24 11:17:45 +040055 summary: "{{ $labels.name }} endpoint is not accessible"
Simon Pasquieraba66d32017-06-27 12:27:43 +020056 description: >-
Ildar Svetlove3bb3f02018-04-24 11:17:45 +040057 Nova API is not accessible for the {{ $labels.name }} endpoint.
Michal Kobus4a83ddb2018-06-08 14:04:43 +020058 NovaApiEndpointDown:
Olivier Bourdon196d4da2017-09-20 16:31:51 +020059 if: >-
Ildar Svetlove3bb3f02018-04-24 11:17:45 +040060 http_response_status{name=~"nova-api"} == 0
Olivier Bourdon196d4da2017-09-20 16:31:51 +020061 for: 2m
62 labels:
Ildar Svetlove3bb3f02018-04-24 11:17:45 +040063 severity: minor
64 service: nova
Olivier Bourdon196d4da2017-09-20 16:31:51 +020065 annotations:
Michal Kobus4a83ddb2018-06-08 14:04:43 +020066 summary: "nova-api endpoint is not accessible"
Olivier Bourdon196d4da2017-09-20 16:31:51 +020067 description: >-
Michal Kobus4a83ddb2018-06-08 14:04:43 +020068 The nova-api endpoint on the {{ $labels.host }} node is not accessible for 2 minutes.
Ildar Svetlov88baa462018-05-16 15:08:34 +040069{%- endraw %}
Michal Kobus4a83ddb2018-06-08 14:04:43 +020070 NovaApiEndpointsDownMajor:
Ildar Svetlov88baa462018-05-16 15:08:34 +040071 if: >-
72 count(http_response_status{name=~"nova-api"} == 0) >= count(http_response_status{name=~"nova-api"}) * {{ major_endpoint_threshold }}
73 for: 2m
74 labels:
75 severity: major
76 service: nova
77 annotations:
Michal Kobus4a83ddb2018-06-08 14:04:43 +020078 summary: "{{major_endpoint_threshold * 100}}% of nova-api endpoints are not accessible"
Ildar Svetlov88baa462018-05-16 15:08:34 +040079 description: >-
Michal Kobus4a83ddb2018-06-08 14:04:43 +020080 {% raw %}{{ $value }} nova-api endpoints (>= {% endraw %} {{major_endpoint_threshold * 100}}{% raw %}%) are not accessible for 2 minutes.
81 NovaApiEndpointsOutage:
Ildar Svetlov88baa462018-05-16 15:08:34 +040082 if: >-
83 count(http_response_status{name=~"nova-api"} == 0) == count(http_response_status{name=~"nova-api"})
84 for: 2m
85 labels:
86 severity: critical
87 service: nova
88 annotations:
Michal Kobus4a83ddb2018-06-08 14:04:43 +020089 summary: "nova-api endpoints outage"
Ildar Svetlov88baa462018-05-16 15:08:34 +040090 description: >-
Michal Kobus4a83ddb2018-06-08 14:04:43 +020091 All available nova-api endpoints are not accessible for 2 minutes.
Ildar Svetlove3bb3f02018-04-24 11:17:45 +040092 NovaServiceDown:
Olivier Bourdona8b46eb2017-07-03 12:57:34 +020093 if: >-
Ildar Svetlove3bb3f02018-04-24 11:17:45 +040094 openstack_nova_service_state == 0
Olivier Bourdona8b46eb2017-07-03 12:57:34 +020095 labels:
Ildar Svetlove3bb3f02018-04-24 11:17:45 +040096 severity: minor
97 service: nova
Olivier Bourdona8b46eb2017-07-03 12:57:34 +020098 annotations:
Ildar Svetlove3bb3f02018-04-24 11:17:45 +040099 summary: "{{ $labels.binary }} service is down"
Olivier Bourdona8b46eb2017-07-03 12:57:34 +0200100 description: >-
Ildar Svetlove3bb3f02018-04-24 11:17:45 +0400101 The {{ $labels.binary }} service on the {{ $labels.hostname }} node is down.
102{%- endraw %}
103 NovaServicesDownMinor:
Olivier Bourdona8b46eb2017-07-03 12:57:34 +0200104 if: >-
Ildar Svetlovd4902c22018-05-07 08:35:46 +0400105 count(openstack_nova_service_state{binary!~"nova-compute"} == 0) by (binary) >= on (binary) count(openstack_nova_service_state{binary!~"nova-compute"}) by (binary) * {{minor_threshold}}
Ildar Svetlove3bb3f02018-04-24 11:17:45 +0400106 labels:
107 severity: minor
108 service: nova
109 annotations:
110 summary: "{{minor_threshold * 100}}%{%- raw %} of {{ $labels.binary }} services are down"
111 description: >-
Michal Kobus4a83ddb2018-06-08 14:04:43 +0200112 {{ $value }} {{ $labels.binary }} services (>= {%- endraw %} {{minor_threshold * 100}}%) are down.
Ildar Svetlove3bb3f02018-04-24 11:17:45 +0400113 NovaComputeServicesDownMinor:
114 if: >-
Ildar Svetlovd4902c22018-05-07 08:35:46 +0400115 count(openstack_nova_service_state{binary="nova-compute"} == 0) >= count(openstack_nova_service_state{binary="nova-compute"}) * {{minor_compute_threshold}}
Ildar Svetlove3bb3f02018-04-24 11:17:45 +0400116 labels:
117 severity: minor
118 service: nova
119 annotations:
120 summary: "{{minor_compute_threshold * 100}}%{%- raw %} of nova-compute services are down"
121 description: >-
Michal Kobus4a83ddb2018-06-08 14:04:43 +0200122 {{ $value }} nova-compute services (>= {%- endraw %} {{minor_compute_threshold * 100}}%) are down.
Ildar Svetlove3bb3f02018-04-24 11:17:45 +0400123 NovaServicesDownMajor:
124 if: >-
125 count(openstack_nova_service_state{binary!~"nova-compute"} == 0) by (binary) >= on (binary) count(openstack_nova_service_state{binary!~"nova-compute"}) by (binary) * {{major_threshold}}
126 labels:
127 severity: major
128 service: nova
129 annotations:
130 summary: "{{major_threshold * 100}}%{%- raw %} of {{ $labels.binary }} services are down"
131 description: >-
Michal Kobus4a83ddb2018-06-08 14:04:43 +0200132 {{ $value }} {{ $labels.binary }} services (>= {%- endraw %} {{major_threshold * 100}}%) are down.
Ildar Svetlove3bb3f02018-04-24 11:17:45 +0400133 NovaComputeServicesDownMajor:
134 if: >-
135 count(openstack_nova_service_state{binary="nova-compute"} == 0) >= count(openstack_nova_service_state{binary="nova-compute"}) * {{major_compute_threshold}}
136 labels:
137 severity: major
138 service: nova
139 annotations:
140 summary: "{{major_compute_threshold * 100}}%{%- raw %} of nova-compute services are down"
141 description: >-
Michal Kobus4a83ddb2018-06-08 14:04:43 +0200142 {{ $value }} nova-compute services (>= {%- endraw %} {{major_compute_threshold * 100}}{%- raw %}%) are down.
Ildar Svetlove3bb3f02018-04-24 11:17:45 +0400143 NovaServiceOutage:
144 if: >-
145 count(openstack_nova_service_state == 0) by (binary) == on (binary) count(openstack_nova_service_state) by (binary)
Olivier Bourdona8b46eb2017-07-03 12:57:34 +0200146 labels:
147 severity: critical
Ildar Svetlove3bb3f02018-04-24 11:17:45 +0400148 service: nova
Olivier Bourdona8b46eb2017-07-03 12:57:34 +0200149 annotations:
Ildar Svetlove3bb3f02018-04-24 11:17:45 +0400150 summary: "{{ $labels.binary }} service outage"
Olivier Bourdona8b46eb2017-07-03 12:57:34 +0200151 description: >-
Ildar Svetlove3bb3f02018-04-24 11:17:45 +0400152 All {{ $labels.binary }} services are down.
Ildar Svetlovc87868a2018-07-09 08:05:39 +0400153{%- endraw %}
Ildar Svetlovaebd3ff2018-06-01 18:16:02 +0400154{%- set cpu_minor_threshold = monitoring.cpu_minor_threshold|float %}
155{%- set cpu_major_threshold = monitoring.cpu_major_threshold|float %}
Ildar Svetlovd4902c22018-05-07 08:35:46 +0400156{%- set ram_major_threshold = monitoring.ram_major_threshold|float %}
157{%- set ram_critical_threshold = monitoring.ram_critical_threshold|float %}
158{%- set disk_major_threshold = monitoring.disk_major_threshold|float %}
Ildar Svetlovc87868a2018-07-09 08:05:39 +0400159{%- set disk_critical_threshold = monitoring.disk_critical_threshold|float %}
Ildar Svetlovaebd3ff2018-06-01 18:16:02 +0400160 NovaHypervisorVCPUsFullMinor:
Olivier Bourdona8b46eb2017-07-03 12:57:34 +0200161 if: >-
Ildar Svetlovc87868a2018-07-09 08:05:39 +0400162 label_replace(system_load15, "hostname", "$1", "host", "(.*)") > on (hostname) openstack_nova_vcpus * {{ cpu_minor_threshold }}
Olivier Bourdona8b46eb2017-07-03 12:57:34 +0200163 labels:
Ildar Svetlovd4902c22018-05-07 08:35:46 +0400164 severity: minor
Ildar Svetlov8b062ca2017-09-08 17:26:41 +0400165 service: nova
Olivier Bourdona8b46eb2017-07-03 12:57:34 +0200166 annotations:
Michal Kobus4a83ddb2018-06-08 14:04:43 +0200167 summary: "{{ cpu_minor_threshold * 100 }}% of hypervisor VCPUs are used"
Ildar Svetlovc87868a2018-07-09 08:05:39 +0400168 description: "{% raw %}{{ $value }} VCPUs on the {{ $labels.hostname }} node (> {% endraw %} {{ cpu_minor_threshold * 100 }}%) are used."
Ildar Svetlovaebd3ff2018-06-01 18:16:02 +0400169 NovaHypervisorVCPUsFullMajor:
Olivier Bourdona8b46eb2017-07-03 12:57:34 +0200170 if: >-
Ildar Svetlovc87868a2018-07-09 08:05:39 +0400171 label_replace(system_load15, "hostname", "$1", "host", "(.*)") > on (hostname) openstack_nova_vcpus * {{ cpu_major_threshold }}
Olivier Bourdona8b46eb2017-07-03 12:57:34 +0200172 labels:
Ildar Svetlovd4902c22018-05-07 08:35:46 +0400173 severity: major
Ildar Svetlov8b062ca2017-09-08 17:26:41 +0400174 service: nova
Olivier Bourdona8b46eb2017-07-03 12:57:34 +0200175 annotations:
Michal Kobus4a83ddb2018-06-08 14:04:43 +0200176 summary: "{{ cpu_major_threshold * 100 }}% of hypervisor VCPUs are used"
Ildar Svetlovc87868a2018-07-09 08:05:39 +0400177 description: "{% raw %}{{ $value }} VCPUs on the {{ $labels.hostname }} node (> {% endraw %} {{ cpu_major_threshold * 100 }}%) are used."
Ildar Svetlovd4902c22018-05-07 08:35:46 +0400178 NovaHypervisorMemoryFullMajor:
Olivier Bourdona8b46eb2017-07-03 12:57:34 +0200179 if: >-
Ildar Svetlovc87868a2018-07-09 08:05:39 +0400180 openstack_nova_used_ram > openstack_nova_ram * {{ ram_major_threshold }}
Olivier Bourdona8b46eb2017-07-03 12:57:34 +0200181 labels:
Ildar Svetlovd4902c22018-05-07 08:35:46 +0400182 severity: major
Ildar Svetlov8b062ca2017-09-08 17:26:41 +0400183 service: nova
Olivier Bourdona8b46eb2017-07-03 12:57:34 +0200184 annotations:
Michal Kobus4a83ddb2018-06-08 14:04:43 +0200185 summary: "{{ ram_major_threshold * 100 }}% of hypervisor RAM is used"
Ildar Svetlovc87868a2018-07-09 08:05:39 +0400186 description: "{% raw %}{{ $value }}MB of RAM on the {{ $labels.hostname }} node (> {% endraw %} {{ ram_major_threshold * 100 }}%) is used."
Ildar Svetlovd4902c22018-05-07 08:35:46 +0400187 NovaHypervisorMemoryFullCritical:
Olivier Bourdona8b46eb2017-07-03 12:57:34 +0200188 if: >-
Ildar Svetlovc87868a2018-07-09 08:05:39 +0400189 openstack_nova_used_ram > openstack_nova_ram * {{ ram_critical_threshold }}
Olivier Bourdona8b46eb2017-07-03 12:57:34 +0200190 labels:
191 severity: critical
Ildar Svetlov8b062ca2017-09-08 17:26:41 +0400192 service: nova
Olivier Bourdona8b46eb2017-07-03 12:57:34 +0200193 annotations:
Michal Kobus4a83ddb2018-06-08 14:04:43 +0200194 summary: "{{ ram_critical_threshold * 100 }}% of hypervisor RAM is used"
Ildar Svetlovc87868a2018-07-09 08:05:39 +0400195 description: "{% raw %}{{ $value }}MB of RAM on the {{ $labels.hostname }} node (> {% endraw %} {{ ram_critical_threshold * 100 }}%) is used."
Ildar Svetlovd4902c22018-05-07 08:35:46 +0400196 NovaHypervisorDiskFullMajor:
197 if: >-
Ildar Svetlovc87868a2018-07-09 08:05:39 +0400198 openstack_nova_used_disk > openstack_nova_disk * {{ disk_major_threshold }}
Ildar Svetlovd4902c22018-05-07 08:35:46 +0400199 labels:
200 severity: major
201 service: nova
202 annotations:
Michal Kobus4a83ddb2018-06-08 14:04:43 +0200203 summary: "{{ disk_major_threshold * 100 }}% of hypervisor disk space is used"
Ildar Svetlovc87868a2018-07-09 08:05:39 +0400204 description: "{% raw %}{{ $value }}GB of disk space on the {{ $labels.hostname }} node (> {% endraw %} {{ disk_major_threshold * 100 }}%) is used."
Ildar Svetlovd4902c22018-05-07 08:35:46 +0400205 NovaHypervisorDiskFullCritical:
206 if: >-
Ildar Svetlovc87868a2018-07-09 08:05:39 +0400207 openstack_nova_used_disk > openstack_nova_disk * {{ disk_critical_threshold }}
Ildar Svetlovd4902c22018-05-07 08:35:46 +0400208 labels:
209 severity: critical
210 service: nova
211 annotations:
Michal Kobus4a83ddb2018-06-08 14:04:43 +0200212 summary: "{{ disk_critical_threshold * 100 }}% of hypervisor disk space is used"
Ildar Svetlovc87868a2018-07-09 08:05:39 +0400213 description: "{% raw %}{{ $value }}GB of disk space on the {{ $labels.hostname }} node (> {% endraw %} {{ disk_critical_threshold * 100 }}%) is used."
Ildar Svetlovd4902c22018-05-07 08:35:46 +0400214 NovaAggregateMemoryFullMajor:
215 if: >-
Ildar Svetlovc87868a2018-07-09 08:05:39 +0400216 openstack_nova_aggregate_used_ram > openstack_nova_aggregate_ram * {{ ram_major_threshold }}
Ildar Svetlovd4902c22018-05-07 08:35:46 +0400217 labels:
218 severity: major
219 service: nova
220 annotations:
Michal Kobus4a83ddb2018-06-08 14:04:43 +0200221 summary: "{{ ram_major_threshold * 100 }}% of aggregate RAM is used"
Ildar Svetlovc87868a2018-07-09 08:05:39 +0400222 description: "{% raw %}{{ $value }}MB of RAM on the {{ $labels.aggregate }} aggregate (> {% endraw %} {{ ram_major_threshold * 100 }}%) is used."
Ildar Svetlovd4902c22018-05-07 08:35:46 +0400223 NovaAggregateMemoryFullCritical:
224 if: >-
Ildar Svetlovc87868a2018-07-09 08:05:39 +0400225 openstack_nova_aggregate_used_ram > openstack_nova_aggregate_ram * {{ ram_critical_threshold }}
Ildar Svetlovd4902c22018-05-07 08:35:46 +0400226 labels:
227 severity: critical
228 service: nova
229 annotations:
Michal Kobus4a83ddb2018-06-08 14:04:43 +0200230 summary: "{{ ram_critical_threshold * 100 }}% of aggregate RAM is used"
Ildar Svetlovc87868a2018-07-09 08:05:39 +0400231 description: "{% raw %}{{ $value }}MB of RAM on the {{ $labels.aggregate }} aggregate (> {% endraw %} {{ ram_critical_threshold * 100 }}%) is used."
Ildar Svetlovd4902c22018-05-07 08:35:46 +0400232 NovaAggregateDiskFullMajor:
233 if: >-
Ildar Svetlovc87868a2018-07-09 08:05:39 +0400234 openstack_nova_aggregate_used_disk > openstack_nova_aggregate_disk * {{ disk_major_threshold }}
Ildar Svetlovd4902c22018-05-07 08:35:46 +0400235 labels:
236 severity: major
237 service: nova
238 annotations:
Michal Kobus4a83ddb2018-06-08 14:04:43 +0200239 summary: "{{ disk_major_threshold * 100 }}% of aggregate disk space is used"
Ildar Svetlovc87868a2018-07-09 08:05:39 +0400240 description: "{% raw %}{{ $value }}GB of disk space on the {{ $labels.aggregate }} aggregate (> {% endraw %} {{ disk_major_threshold * 100 }}%) is used."
Ildar Svetlovd4902c22018-05-07 08:35:46 +0400241 NovaAggregateDiskFullCritical:
242 if: >-
Ildar Svetlovc87868a2018-07-09 08:05:39 +0400243 openstack_nova_aggregate_used_disk > openstack_nova_aggregate_disk * {{ disk_critical_threshold }}
Ildar Svetlovd4902c22018-05-07 08:35:46 +0400244 labels:
245 severity: critical
246 service: nova
247 annotations:
Michal Kobus4a83ddb2018-06-08 14:04:43 +0200248 summary: "{{ disk_critical_threshold * 100 }}% of aggregate disk space is used"
Ildar Svetlovc87868a2018-07-09 08:05:39 +0400249 description: "{% raw %}{{ $value }}GB of disk space on the {{ $labels.aggregate }} aggregate (> {% endraw %} {{ disk_critical_threshold * 100 }}%) is used."
Ildar Svetlovaebd3ff2018-06-01 18:16:02 +0400250 NovaTotalVCPUsFullMinor:
Ildar Svetlovd4902c22018-05-07 08:35:46 +0400251 if: >-
Ildar Svetlovc87868a2018-07-09 08:05:39 +0400252 sum(label_replace(system_load15, "hostname", "$1", "host", "(.*)") and on (hostname) openstack_nova_vcpus) > max(sum(openstack_nova_vcpus) by (instance)) * {{ cpu_minor_threshold }}
Ildar Svetlovd4902c22018-05-07 08:35:46 +0400253 labels:
254 severity: minor
255 service: nova
256 annotations:
Michal Kobus4a83ddb2018-06-08 14:04:43 +0200257 summary: "{{ cpu_minor_threshold * 100 }}% of cloud VCPUs are used"
Ildar Svetlovc87868a2018-07-09 08:05:39 +0400258 description: "{% raw %}{{ $value }} VCPUs in the cloud (> {% endraw %} {{ cpu_minor_threshold * 100 }}%) are used."
Ildar Svetlovaebd3ff2018-06-01 18:16:02 +0400259 NovaTotalVCPUsFullMajor:
Ildar Svetlovd4902c22018-05-07 08:35:46 +0400260 if: >-
Ildar Svetlovc87868a2018-07-09 08:05:39 +0400261 sum(label_replace(system_load15, "hostname", "$1", "host", "(.*)") and on (hostname) openstack_nova_vcpus) > max(sum(openstack_nova_vcpus) by (instance)) * {{ cpu_major_threshold }}
Ildar Svetlovd4902c22018-05-07 08:35:46 +0400262 labels:
263 severity: major
264 service: nova
265 annotations:
Michal Kobus4a83ddb2018-06-08 14:04:43 +0200266 summary: "{{ cpu_major_threshold * 100 }}% of cloud VCPUs are used"
Ildar Svetlovc87868a2018-07-09 08:05:39 +0400267 description: "{% raw %}{{ $value }} VCPUs in the cloud (> {% endraw %} {{ cpu_major_threshold * 100 }}%) are used."
Ildar Svetlovd4902c22018-05-07 08:35:46 +0400268 NovaTotalMemoryFullMajor:
269 if: >-
Ildar Svetlovc87868a2018-07-09 08:05:39 +0400270 openstack_nova_total_used_ram > openstack_nova_total_ram * {{ ram_major_threshold }}
Ildar Svetlovd4902c22018-05-07 08:35:46 +0400271 labels:
272 severity: major
273 service: nova
274 annotations:
Michal Kobus4a83ddb2018-06-08 14:04:43 +0200275 summary: "{{ ram_major_threshold * 100 }}% of cloud RAM is used"
Ildar Svetlovc87868a2018-07-09 08:05:39 +0400276 description: "{% raw %}{{ $value }}MB of RAM in the cloud (> {% endraw %} {{ ram_major_threshold * 100 }}%) is used."
Ildar Svetlovd4902c22018-05-07 08:35:46 +0400277 NovaTotalMemoryFullCritical:
278 if: >-
Ildar Svetlovc87868a2018-07-09 08:05:39 +0400279 openstack_nova_total_used_ram > openstack_nova_total_ram * {{ ram_critical_threshold }}
Ildar Svetlovd4902c22018-05-07 08:35:46 +0400280 labels:
281 severity: critical
282 service: nova
283 annotations:
Michal Kobus4a83ddb2018-06-08 14:04:43 +0200284 summary: "{{ ram_critical_threshold * 100 }}% of cloud RAM is used"
Ildar Svetlovc87868a2018-07-09 08:05:39 +0400285 description: "{% raw %}{{ $value }}MB of RAM in the cloud (> {% endraw %} {{ ram_critical_threshold * 100 }}%) is used."
Ildar Svetlovd4902c22018-05-07 08:35:46 +0400286 NovaTotalDiskFullMajor:
287 if: >-
Ildar Svetlovc87868a2018-07-09 08:05:39 +0400288 openstack_nova_total_used_disk > openstack_nova_total_disk * {{ disk_major_threshold }}
Ildar Svetlovd4902c22018-05-07 08:35:46 +0400289 labels:
290 severity: major
291 service: nova
292 annotations:
Michal Kobus4a83ddb2018-06-08 14:04:43 +0200293 summary: "{{ disk_major_threshold * 100 }}% of cloud disk space is used"
Ildar Svetlovc87868a2018-07-09 08:05:39 +0400294 description: "{% raw %}{{ $value }}GB of disk space in the cloud (> {% endraw %} {{ disk_major_threshold * 100 }}%) is used."
Ildar Svetlovd4902c22018-05-07 08:35:46 +0400295 NovaTotalDiskFullCritical:
296 if: >-
Ildar Svetlovc87868a2018-07-09 08:05:39 +0400297 openstack_nova_total_used_disk > openstack_nova_total_disk * {{ disk_critical_threshold }}
Ildar Svetlovd4902c22018-05-07 08:35:46 +0400298 labels:
299 severity: critical
300 service: nova
301 annotations:
Michal Kobus4a83ddb2018-06-08 14:04:43 +0200302 summary: "{{ disk_critical_threshold * 100 }}% of cloud disk space is used"
Ildar Svetlovc87868a2018-07-09 08:05:39 +0400303 description: "{% raw %}{{ $value }}GB of disk space in the cloud (> {% endraw %} {{ disk_critical_threshold * 100 }}%) is used."
Simon Pasquieraba66d32017-06-27 12:27:43 +0200304{%- endif %}
305 NovaErrorLogsTooHigh:
Olivier Bourdoncad047a2017-07-19 03:14:25 +0200306 {%- set log_threshold = monitoring.error_log_rate.warn|float %}
Simon Pasquieraba66d32017-06-27 12:27:43 +0200307 if: >-
Dmitry Kalashnik97d174e2018-01-19 17:16:31 +0400308 sum(rate(log_messages{service="nova",level=~"(?i:(error|emergency|fatal))"}[5m])) without (level) > {{ log_threshold }}
Simon Pasquieraba66d32017-06-27 12:27:43 +0200309{%- raw %}
310 labels:
311 severity: warning
Ildar Svetlove3bb3f02018-04-24 11:17:45 +0400312 service: nova
Simon Pasquieraba66d32017-06-27 12:27:43 +0200313 annotations:
Ildar Svetlove3bb3f02018-04-24 11:17:45 +0400314 summary: "High number of errors in Nova logs"
Ildar Svetlov88baa462018-05-16 15:08:34 +0400315 description: "The average per-second rate of errors in Nova logs on the {{ $labels.host }} node is {{ $value }} (as measured over the last 5 minutes)."
316{%- endraw %}
Vasyl Saienkocc87b1b2019-03-11 15:44:42 +0200317{%- if is_compute and exporters is defined and compute.get('compute_driver', 'libvirt.LibvirtDriver') == 'libvirt.LibvirtDriver'%}
Olivier Bourdona8b46eb2017-07-03 12:57:34 +0200318{%- raw %}
Ildar Svetlove3bb3f02018-04-24 11:17:45 +0400319 LibvirtDown:
Olivier Bourdona8b46eb2017-07-03 12:57:34 +0200320 if: >-
Ildar Svetlove3bb3f02018-04-24 11:17:45 +0400321 libvirt_up == 0
Olivier Bourdona8b46eb2017-07-03 12:57:34 +0200322 for: 2m
323 labels:
Ildar Svetlove3bb3f02018-04-24 11:17:45 +0400324 severity: critical
325 service: libvirt
Olivier Bourdona8b46eb2017-07-03 12:57:34 +0200326 annotations:
Ildar Svetlove3bb3f02018-04-24 11:17:45 +0400327 summary: "Failure to gather Libvirt metrics"
Michal Kobus4a83ddb2018-06-08 14:04:43 +0200328 description: "The Libvirt metric exporter fails to gather metrics on the {{ $labels.host }} node for 2 minutes."
Olivier Bourdona8b46eb2017-07-03 12:57:34 +0200329{%- endraw %}
Olivier Bourdon47b48a52017-07-10 11:48:08 +0200330{%- include "prometheus/_exporters_config.sls" %}
Simon Pasquieraba66d32017-06-27 12:27:43 +0200331{%- endif %}
Olivier Bourdon7a77dbc2017-06-27 15:16:44 +0200332{%- endif %}