Add alert on Keystone response times
Change-Id: Ic6dec1b3f2356514753b3011373feceef6e20b09
diff --git a/keystone/map.jinja b/keystone/map.jinja
index 35a2613..01dc3f4 100644
--- a/keystone/map.jinja
+++ b/keystone/map.jinja
@@ -62,6 +62,7 @@
{% set monitoring = salt['grains.filter_by']({
'default': {
'error_log_rate': 0.2,
+ 'http_response_time_p90': 0.3,
'failed_auths': {
'percentage': 50,
'all_auths_rate': 0.1,
diff --git a/keystone/meta/prometheus.yml b/keystone/meta/prometheus.yml
index d584553..fc3568f 100644
--- a/keystone/meta/prometheus.yml
+++ b/keystone/meta/prometheus.yml
@@ -49,5 +49,17 @@
annotations:
summary: 'Too many failed authentications in Keystone'
description: 'The rate of failed authentications in Keystone over the last 5 minutes is too high (current value={{ $value }}, threshold={%- endraw %}{{ auth_threshold }}).'
+ KeystoneAPITooSlow:
+ {%- set response_time_threshold = monitoring.http_response_time_p90|float %}
+ if: >-
+ max by(host) (openstack_keystone_http_response_times_upper_90{http_method=~"^(GET|POST)$",http_status="2xx"}) >= {{ response_time_threshold }}
+{%- raw %}
+ for: 2m
+ labels:
+ severity: warning
+ service: keystone
+ annotations:
+ summary: 'Keystone API too slow'
+ description: 'The 90th percentile of the Keystone API response times for GET and POST requests is too high on node {{ $labels.host }} (current value={{ $value }}s, threshold={%- endraw %}{{ response_time_threshold }}s).'
{%- endif %}