blob: 53f3359f96a7dee4399c5acf7ff682e30cd6b576 [file] [log] [blame]
Bartosz Kupidura83afdab2017-06-30 09:44:22 +02001{%- from "keepalived/map.jinja" import cluster with context %}
2{%- if cluster.get('enabled', False) %}
3server:
4 alert:
Simon Pasquier31d3a562017-07-24 15:34:42 +02005 KeepalivedProcessDown:
Bartosz Kupidura83afdab2017-06-30 09:44:22 +02006 if: >-
7 procstat_running{process_name="keepalived"} == 0
8 {% raw %}
Ksawery Dziekoński081224f2020-08-04 14:50:19 +02009 for: 2m
Bartosz Kupidura83afdab2017-06-30 09:44:22 +020010 labels:
Mateusz Matuszkowiak7b1285a2018-04-23 13:26:40 +020011 severity: major
Bartosz Kupidura83afdab2017-06-30 09:44:22 +020012 service: keepalived
13 annotations:
Michal Kobusfd3d8802018-06-07 18:22:44 +020014 summary: "Keepalived process is down"
15 description: "The Keepalived process on the {{ $labels.host }} node is down."
Bartosz Kupidura83afdab2017-06-30 09:44:22 +020016 {% endraw %}
Michal Kobusf49aba02018-11-01 13:24:21 +010017 KeepalivedProcessNotResponsive:
18 if: >-
19 keepalived_up == 0
20 {% raw %}
21 labels:
22 severity: major
23 service: keepalived
24 annotations:
25 summary: "Keepalived process is not responding"
26 description: "The Keepalived process on the {{ $labels.host }} node is not responding."
27 {% endraw %}
Mateusz Matuszkowiak7b1285a2018-04-23 13:26:40 +020028 KeepalivedFailedState:
Mateusz Matuszkowiak97623682018-04-12 16:22:49 +020029 if: >-
30 keepalived_state == 0
31 {% raw %}
32 labels:
Mateusz Matuszkowiak7b1285a2018-04-23 13:26:40 +020033 severity: minor
Mateusz Matuszkowiak97623682018-04-12 16:22:49 +020034 service: keepalived
35 annotations:
Michal Kobus8e4ab4e2018-07-20 14:47:28 +020036 summary: "Keepalived VRRP state is FAILED"
Michal Kobusf49aba02018-11-01 13:24:21 +010037 description: "The Keepalived VRRP {{ $labels.name }} is in the FAILED state on the {{ $labels.host }} node."
38 {% endraw %}
39 KeepalivedUnknownState:
40 if: >-
41 keepalived_state == -1
42 {% raw %}
43 labels:
44 severity: minor
45 service: keepalived
46 annotations:
47 summary: "Keepalived VRRP state is UNKNOWN"
48 description: "The Keepalived VRRP {{ $labels.name }} is in the UNKNOWN state on the {{ $labels.host }} node."
Mateusz Matuszkowiak97623682018-04-12 16:22:49 +020049 {% endraw %}
Mateusz Matuszkowiak270d5ea2018-04-23 16:13:37 +020050 KeepalivedMultipleIPAddr:
Mateusz Matuszkowiaka1862302018-04-24 12:04:06 +020051 if: >-
52 count(ipcheck_assigned) by (ip) > 1
Mateusz Matuszkowiak270d5ea2018-04-23 16:13:37 +020053 {% raw %}
54 for: 2m
55 labels:
56 severity: major
57 service: keepalived
58 annotations:
Michal Kobusfd3d8802018-06-07 18:22:44 +020059 summary: "Keepalived VIP is assigned more than once"
60 description: "The Keepalived {{ $labels.ip }} virtual IP is assigned more than once."
Mateusz Matuszkowiak270d5ea2018-04-23 16:13:37 +020061 {% endraw %}
Mateusz Matuszkowiaka1862302018-04-24 12:04:06 +020062 KeepalivedServiceOutage:
63 if: >-
64 count(label_replace(procstat_running{process_name="keepalived"}, "cluster", "$1", "host", "([^0-9]+).+")) by (cluster) == count(label_replace(procstat_running{process_name="keepalived"} == 0, "cluster", "$1", "host", "([^0-9]+).+")) by (cluster)
65 {% raw %}
Ksawery Dziekoński081224f2020-08-04 14:50:19 +020066 for: 2m
Mateusz Matuszkowiaka1862302018-04-24 12:04:06 +020067 labels:
68 severity: critical
69 service: keepalived
70 annotations:
Michal Kobusfd3d8802018-06-07 18:22:44 +020071 summary: "Keepalived service outage"
72 description: "All Keepalived processes within the {{ $labels.cluster }} cluster are down."
Mateusz Matuszkowiaka1862302018-04-24 12:04:06 +020073 {% endraw %}
Bartosz Kupidura83afdab2017-06-30 09:44:22 +020074{%- endif %}