blob: 7186c509e54511be14eb8e62218f48af54a1685b [file] [log] [blame]
{%- from "cinder/map.jinja" import controller as _controller with context %}
{%- from "cinder/map.jinja" import volume as _volume with context %}
{%- set controller = _controller.get('enabled', False) %}
{%- set volume = _volume.get('enabled', False) %}
log_collector:
decoder:
cinder:
engine: sandbox
module_file: /usr/share/lma_collector/decoders/openstack_log.lua
module_dir: /usr/share/lma_collector/common;/usr/share/heka/lua_modules
adjust_timezone: true
splitter:
cinder:
engine: token
delimiter: '\n'
input:
cinder_log:
engine: logstreamer
log_directory: "/var/log"
file_match: 'cinder/(?P<Service>.+)\.log\.?(?P<Seq>\d*)$'
differentiator: ['cinder', '_', 'Service']
priority: ["^Seq"]
decoder: "cinder_decoder"
splitter: "cinder_splitter"
metric_collector:
trigger:
{%- if controller or volume %}
cinder_logs_error:
description: 'Too many errors have been detected in Cinder logs'
severity: warning
no_data_policy: okay
rules:
- metric: log_messages
field:
service: cinder
level: error
relational_operator: '>'
threshold: 0.1
window: 70
periods: 0
function: max
{%- endif %}
{%- if controller %}
cinder_api_local_endpoint:
description: 'Cinder API is locally down'
severity: down
rules:
- metric: openstack_check_local_api
field:
service: cinder-api
relational_operator: '=='
threshold: 0
window: 60
periods: 0
function: last
{%- endif %}
alarm:
{%- if controller %}
cinder_logs:
alerting: enabled
triggers:
- cinder_logs_error
dimension:
service: cinder-logs
cinder_api_endpoint:
alerting: enabled
triggers:
- cinder_api_local_endpoint
dimension:
service: cinder-api-endpoint
{%- endif %}
{%- if volume %}
cinder_logs_volume:
alerting: enabled
triggers:
- cinder_logs_error
dimension:
service: cinder-logs-volume
{%- endif %}
remote_collector:
trigger:
{%- if controller %}
cinder_api_check_failed:
description: 'Endpoint check for cinder-api is failed'
severity: down
rules:
- metric: openstack_check_api
field:
service: cinder-api
relational_operator: '=='
threshold: 0
window: 60
periods: 0
function: last
cinder_scheduler_two_up:
description: 'Some Cinder schedulers are down'
severity: warning
logical_operator: and
rules:
- metric: openstack_cinder_services
field:
service: scheduler
state: up
relational_operator: '>='
threshold: 2
window: 60
periods: 0
function: last
- metric: openstack_cinder_services
field:
service: scheduler
state: down
relational_operator: '>'
threshold: 0
window: 60
periods: 0
function: last
cinder_scheduler_one_up:
description: 'Only one Cinder scheduler is up'
severity: critical
logical_operator: and
rules:
- metric: openstack_cinder_services
field:
service: scheduler
state: up
relational_operator: '=='
threshold: 1
window: 60
periods: 0
function: last
- metric: openstack_cinder_services_percent
field:
service: scheduler
state: up
relational_operator: '<'
threshold: 100
window: 60
periods: 0
function: last
cinder_scheduler_zero_up:
description: 'All Cinder schedulers are down or disabled'
severity: down
rules:
- metric: openstack_cinder_services
field:
service: scheduler
state: up
relational_operator: '=='
threshold: 0
window: 60
periods: 0
function: last
{%- endif %}
{%- if volume %}
# we treat "up" and "disabled" states in the same way, considering
# that "disabled" should not be treated as an error
cinder_volume_some_down:
description: 'Some Cinder volumes are down'
severity: warning
logical_operator: and
rules:
- metric: openstack_cinder_services_percent
field:
service: volume
state: down
relational_operator: '>'
threshold: 0
window: 60
periods: 0
function: last
cinder_volume_majority_down:
description: 'Majority of Cinder volumes are down'
severity: critical
rules:
- metric: openstack_cinder_services_percent
field:
service: volume
state: down
relational_operator: '>'
threshold: 50
window: 60
periods: 0
function: last
cinder_volume_all_down:
description: 'All Cinder volumes are down'
severity: down
rules:
- metric: openstack_cinder_services_percent
field:
service: volume
state: down
relational_operator: '=='
threshold: 100
window: 60
periods: 0
function: last
{%- endif %}
alarm:
{%- if controller %}
cinder_api_check:
alerting: enabled
triggers:
- cinder_api_check_failed
dimension:
service: cinder-api-check
cinder_scheduler:
alerting: enabled
triggers:
- cinder_scheduler_zero_up
- cinder_scheduler_one_up
- cinder_scheduler_two_up
dimension:
service: cinder-scheduler
{%- endif %}
{%- if volume %}
cinder_volume:
alerting: enabled
triggers:
- cinder_volume_all_down
- cinder_volume_majority_down
- cinder_volume_some_down
dimension:
service: cinder-volume
{%- endif %}
aggregator:
alarm_cluster:
{%- if volume %}
cinder_logs_volume:
policy: majority_of_node_members
alerting: enabled
group_by: hostname
match:
service: cinder-logs-volume
members:
- cinder_logs_volume
dimension:
service: cinder-data
nagios_host: 01-service-clusters
cinder_volume:
policy: highest_severity
alerting: enabled
match:
service: cinder-volume
members:
- cinder_volume
dimension:
service: cinder-data
nagios_host: 01-service-clusters
cinder_data:
policy: highest_severity
alerting: enabled_with_notification
match:
service: cinder-data
members:
- cinder_logs_volume
- cinder_volume
dimension:
cluster_name: cinder-data
nagios_host: 00-top-clusters
{%- endif %}
{%- if controller %}
cinder_logs:
policy: status_of_members
alerting: enabled
group_by: hostname
match:
service: cinder-logs
members:
- cinder_logs
dimension:
service: cinder-control
nagios_host: 01-service-clusters
cinder_api_endpoint:
policy: availability_of_members
alerting: enabled
group_by: hostname
match:
service: cinder-api-endpoint
members:
- cinder_api_endpoint
dimension:
service: cinder-control
nagios_host: 01-service-clusters
cinder_api_check:
policy: highest_severity
alerting: enabled
match:
service: cinder-api-check
members:
- cinder_api_check
dimension:
service: cinder-control
nagios_host: 01-service-clusters
cinder_scheduler:
policy: highest_severity
alerting: enabled
match:
service: cinder-scheduler
members:
- cinder_scheduler
dimension:
service: cinder-control
nagios_host: 01-service-clusters
cinder_control:
policy: highest_severity
alerting: enabled_with_notification
match:
service: cinder-control
members:
- cinder_logs
- cinder_api_endpoint
- cinder_api_check
- cinder_scheduler
dimension:
cluster_name: cinder-control
nagios_host: 00-top-clusters
{%- endif %}