blob: cbacd5eed90b3e73140d9bbabc4fadcb76ba2290 [file] [log] [blame]
{%- from "keepalived/map.jinja" import cluster with context %}
{%- if cluster.get('enabled', False) %}
server:
alert:
KeepalivedProcessDown:
if: >-
procstat_running{process_name="keepalived"} == 0
{% raw %}
labels:
severity: major
service: keepalived
annotations:
summary: "Keepalived process is down"
description: "The Keepalived process on the {{ $labels.host }} node is down."
{% endraw %}
KeepalivedFailedState:
if: >-
keepalived_state == 0
{% raw %}
labels:
severity: minor
service: keepalived
annotations:
summary: "Keepalived VRRP state is FAILED"
description: "Keepalived VRRP is in the FAILED state on the {{ $labels.host }} node."
{% endraw %}
KeepalivedMultipleIPAddr:
if: >-
count(ipcheck_assigned) by (ip) > 1
{% raw %}
for: 2m
labels:
severity: major
service: keepalived
annotations:
summary: "Keepalived VIP is assigned more than once"
description: "The Keepalived {{ $labels.ip }} virtual IP is assigned more than once."
{% endraw %}
KeepalivedServiceOutage:
if: >-
count(label_replace(procstat_running{process_name="keepalived"}, "cluster", "$1", "host", "([^0-9]+).+")) by (cluster) == count(label_replace(procstat_running{process_name="keepalived"} == 0, "cluster", "$1", "host", "([^0-9]+).+")) by (cluster)
{% raw %}
labels:
severity: critical
service: keepalived
annotations:
summary: "Keepalived service outage"
description: "All Keepalived processes within the {{ $labels.cluster }} cluster are down."
{% endraw %}
{%- endif %}