blob: 79f6e72aa0bb0d6fad11a9a1c94109a80effa843 [file] [log] [blame]
{%- if pillar.haproxy is defined and pillar.haproxy.proxy is defined %}
{%- from "haproxy/map.jinja" import proxy with context %}
{%- if proxy.enabled and proxy.listen is defined and proxy.listen|length > 0 %}
server:
alert:
HaproxyServiceDown:
{% raw %}
if: >-
haproxy_up != 1
labels:
severity: minor
service: haproxy
annotations:
summary: "HAProxy service is down"
description: "The HAProxy service on the {{ $labels.host }} node is down."
HaproxyServiceDownMajor:
if: >-
count(label_replace(haproxy_up, "cluster", "$1", "host", "([^0-9]+).+") != 1) by (cluster) >= 0.5 * count(label_replace(haproxy_up, "cluster", "$1", "host", "([^0-9]+).+")) by (cluster)
labels:
severity: major
service: haproxy
annotations:
summary: "50% of HAProxy services are down"
description: "{{ $value }} HAProxy services within the {{ $labels.cluster }} cluster are down (at least 50%)."
HaproxyServiceOutage:
if: >-
count(label_replace(haproxy_up, "cluster", "$1", "host", "([^0-9]+).+") != 1) by (cluster) == count(label_replace(haproxy_up, "cluster", "$1", "host", "([^0-9]+).+")) by (cluster)
labels:
severity: critical
service: haproxy
annotations:
summary: "HAProxy service outage"
description: "All HAProxy services within the {{ $labels.cluster }} are down."
HaproxyHTTPResponse5xxTooHigh:
if: >-
rate(haproxy_http_response_5xx{sv="FRONTEND"}[2m]) > 1
labels:
severity: warning
service: haproxy
annotations:
summary: "HTTP 5xx responses on the {{ $labels.proxy }} back end"
description: "The average per-second rate of 5xx HTTP errors on the {{ $labels.host }} node for the {{ $labels.proxy }} back end is {{ $value }} (as measured over the last 2 minutes)."
HaproxyBackendDown:
if: >-
increase(haproxy_chkdown{sv="BACKEND"}[1m]) > 0
labels:
severity: minor
service: haproxy
annotations:
summary: "{{ $labels.proxy }} back end is down"
description: "The {{ $labels.proxy }} back end on the {{ $labels.host }} node is down."
HaproxyBackendDownMajor:
if: >-
max(max_over_time(haproxy_active_servers{sv="BACKEND"}[12h])) by (proxy) - min(haproxy_active_servers{sv="BACKEND"}) by (proxy) >= 0.5 * max(max_over_time(haproxy_active_servers{sv="BACKEND"}[12h])) by (proxy)
labels:
severity: major
service: haproxy
annotations:
summary: "50% of {{ $labels.proxy }} back ends are down"
description: "{{ $value }} {{ $labels.proxy }} back ends are down (at least 50%)."
HaproxyBackendOutage:
if: >-
max(haproxy_active_servers{sv="BACKEND"}) by (proxy)
+ max(haproxy_backup_servers{sv="BACKEND"}) by (proxy) == 0
labels:
severity: critical
service: haproxy
annotations:
summary: "{{ $labels.proxy }} back-end outage"
summary: "All {{ $labels.proxy }} back ends are down."
{% endraw %}
{%- endif %}
{%- endif %}