blob: 84ed391ec783d9e8b894168028c8573cd4a6e8f4 [file] [log] [blame]
{%- from "keystone/map.jinja" import server with context %}
{%- if server.get('enabled', False) %}
log_collector:
decoder:
keystone:
engine: sandbox
module_file: /usr/share/lma_collector/decoders/openstack_log.lua
module_dir: /usr/share/lma_collector/common;/usr/share/heka/lua_modules
adjust_timezone: true
splitter:
keystone:
engine: token
delimiter: '\n'
input:
keystone_log:
engine: logstreamer
log_directory: "/var/log"
file_match: 'keystone/(?P<Service>.+)\.log\.?(?P<Seq>\d*)$'
differentiator: ['keystone', '_', 'Service']
priority: ["^Seq"]
decoder: "keystone_decoder"
splitter: "keystone_splitter"
metric_collector:
trigger:
keystone_response_time_duration:
description: 'Keystone API is too slow'
severity: warning
no_data_policy: okay
rules:
- metric: openstack_keystone_http_response_times
field:
http_method: '== GET || == POST'
http_status: '== 2xx'
relational_operator: '>'
threshold: 0.3
window: 60
periods: 0
value: upper_90
function: max
keystone_logs_error:
description: 'Too many errors have been detected in Keystone logs'
severity: warning
no_data_policy: okay
rules:
- metric: log_messages
field:
service: keystone
level: error
relational_operator: '>'
threshold: 0.1
window: 70
periods: 0
function: max
keystone_public_api_local_endpoint:
description: 'Keystone public API is locally down'
severity: down
rules:
- metric: openstack_check_local_api
field:
service: keystone-public-api
relational_operator: '=='
threshold: 0
window: 60
periods: 0
function: last
alarm:
keystone_response_time:
alerting: enabled
triggers:
- keystone_response_time_duration
dimension:
service: keystone-response-time
keystone_logs:
alerting: enabled
triggers:
- keystone_logs_error
dimension:
service: keystone-logs
keystone_public_api_endpoint:
alerting: enabled
triggers:
- keystone_public_api_local_endpoint
dimension:
service: keystone-public-api-endpoint
remote_collector:
trigger:
keystone_public_api_check_failed:
description: 'Endpoint check for keystone-public-api is failed'
severity: down
rules:
- metric: openstack_check_api
field:
service: keystone-public-api
relational_operator: '=='
threshold: 0
window: 60
periods: 0
function: last
keystone_failed_authentications_too_high:
description: 'Too many failed authentications have been detected for Keystone'
severity: warning
no_data_policy: okay
logical_operator: and
rules:
- metric: authentications_percent
value: failed
relational_operator: '>'
threshold: 80
window: 120
periods: 0
function: avg
# The second condition is to avoid triggering the alarm when the volume
# of authentication requests is too low to be relevant
- metric: authentications_rate
value: all
relational_operator: '>'
threshold: 0.1
window: 120
periods: 0
function: avg
alarm:
keystone_public_api_check:
alerting: enabled
triggers:
- keystone_public_api_check_failed
dimension:
service: keystone-public-api-check
keystone_failed_authentications:
alerting: enabled
triggers:
- keystone_failed_authentications_too_high
dimension:
service: keystone-failed-authentications
aggregator:
alarm_cluster:
keystone_response_time:
policy: status_of_members
alerting: enabled
group_by: hostname
match:
service: keystone-response-time
members:
- keystone_response_time
dimension:
service: keystone
nagios_host: 01-service-clusters
keystone_logs:
policy: status_of_members
alerting: enabled
group_by: hostname
match:
service: keystone-logs
members:
- keystone_logs
dimension:
service: keystone
nagios_host: 01-service-clusters
keystone_public_api_endpoint:
policy: availability_of_members
alerting: enabled
group_by: hostname
match:
service: keystone-public-api-endpoint
members:
- keystone_public_api_endpoint
dimension:
service: keystone
nagios_host: 01-service-clusters
keystone_public_api_check:
policy: highest_severity
alerting: enabled
match:
service: keystone-public-api-check
members:
- keystone_public_api_check
dimension:
service: keystone
nagios_host: 01-service-clusters
keystone_failed_authentications:
policy: highest_severity
alerting: enabled
match:
service: keystone-failed-authentications
members:
- keystone_failed_authentications
dimension:
service: keystone
nagios_host: 01-service-clusters
keystone:
policy: highest_severity
alerting: enabled_with_notification
match:
service: keystone
members:
- keystone_response_time
- keystone_logs
- keystone_public_api_endpoint
- keystone_public_api_check
- keystone_failed_authentications
dimension:
cluster_name: keystone
nagios_host: 00-top-clusters
{%- endif %}