| {%- from "cinder/map.jinja" import controller, volume, monitoring with context %} |
| {%- set is_controller = controller.get('enabled', False) %} |
| {%- set is_volume = volume.get('enabled', False) %} |
| |
| {%- if is_controller or is_volume %} |
| server: |
| alert: |
| {%- if is_controller %} |
| {%- set minor_threshold = monitoring.services_failed_warning_threshold_percent|float %} |
| {%- set major_threshold = monitoring.services_failed_critical_threshold_percent|float %} |
| {%- set major_endpoint_threshold = monitoring.endpoint_failed_major_threshold|float %} |
| {%- raw %} |
| CinderAPIOutage: |
| if: >- |
| max(openstack_api_check_status{name=~"cinder.*"}) == 0 |
| labels: |
| severity: critical |
| service: cinder |
| annotations: |
| summary: "Cinder API outage" |
| description: >- |
| Cinder API is not accessible for all available Cinder endpoints in the OpenStack service catalog. |
| CinderAPIDown: |
| if: >- |
| openstack_api_check_status{name=~"cinder.*"} == 0 |
| labels: |
| severity: major |
| service: cinder |
| annotations: |
| summary: "{{ $labels.name }} endpoint is not accessible" |
| description: >- |
| Cinder API is not accessible for the {{ $labels.name }} endpoint. |
| CinderAPIServiceDown: |
| if: >- |
| http_response_status{name=~"cinder-api"} == 0 |
| for: 2m |
| labels: |
| severity: minor |
| service: cinder |
| annotations: |
| summary: "Host cinder-api endpoint is not accessible" |
| description: >- |
| The host cinder-api endpoint on the {{ $labels.host }} node is not accessible for at least 2 minutes. |
| {%- endraw %} |
| CinderAPIServiceDownMajor: |
| if: >- |
| count(http_response_status{name=~"cinder-api"} == 0) >= count(http_response_status{name=~"cinder-api"}) * {{ major_endpoint_threshold }} |
| for: 2m |
| labels: |
| severity: major |
| service: cinder |
| annotations: |
| summary: "{{major_endpoint_threshold * 100}}% of host cinder-api endpoints are not accessible" |
| description: >- |
| {% raw %}{{ $value }} host cinder-api endpoints are not accessible for at least 2 minutes (at least {% endraw %}{{major_endpoint_threshold * 100}}{% raw %}%). |
| CinderAPIServiceOutage: |
| if: >- |
| count(http_response_status{name=~"cinder-api"} == 0) == count(http_response_status{name=~"cinder-api"}) |
| for: 2m |
| labels: |
| severity: critical |
| service: cinder |
| annotations: |
| summary: "Host cinder-api outage" |
| description: >- |
| All available host cinder-api endpoints are not accessible for at least 2 minutes. |
| CinderServiceDown: |
| if: >- |
| openstack_cinder_service_state == 0 |
| labels: |
| severity: minor |
| service: cinder |
| annotations: |
| summary: "{{ $labels.binary }} service is down" |
| description: >- |
| The {{ $labels.binary }} service on the {{ $labels.hostname }} node is down. |
| {%- endraw %} |
| CinderServicesDownMinor: |
| if: >- |
| count(openstack_cinder_service_state == 0) by (binary) >= on (binary) count(openstack_cinder_service_state) by (binary) * {{minor_threshold}} |
| labels: |
| severity: minor |
| service: cinder |
| annotations: |
| summary: "{{minor_threshold * 100}}%{%- raw %} of {{ $labels.binary }} services are down" |
| description: >- |
| {{ $value }} {{ $labels.binary }} services are down {%- endraw %}(at least {{minor_threshold * 100}}%). |
| CinderServicesDownMajor: |
| if: >- |
| count(openstack_cinder_service_state == 0) by (binary) >= on (binary) count(openstack_cinder_service_state) by (binary) * {{major_threshold}} |
| labels: |
| severity: major |
| service: cinder |
| annotations: |
| summary: "{{major_threshold * 100}}%{%- raw %} of {{ $labels.binary }} services are down" |
| description: >- |
| {{ $value }} {{ $labels.binary }} services are down {%- endraw %}(at least {{major_threshold * 100}}%).{%- raw %} |
| CinderServiceOutage: |
| if: >- |
| count(openstack_cinder_service_state == 0) by (binary) == on (binary) count(openstack_cinder_service_state) by (binary) |
| labels: |
| severity: critical |
| service: cinder |
| annotations: |
| summary: "{{ $labels.binary }} service outage" |
| description: >- |
| All {{ $labels.binary }} services are down. |
| {%- endraw %} |
| {%- endif %} |
| CinderErrorLogsTooHigh: |
| {%- set log_threshold = monitoring.error_log_rate|float %} |
| if: >- |
| sum(rate(log_messages{service="cinder",level=~"(?i:(error|emergency|fatal))"}[5m])) without (level) > {{ log_threshold }} |
| {%- raw %} |
| labels: |
| severity: warning |
| service: cinder |
| annotations: |
| summary: "High number of errors in Cinder logs" |
| description: "The average per-second rate of errors in Cinder logs on the {{ $labels.host }} node is {{ $value }} (as measured over the last 5 minutes)." |
| {%- endraw %} |
| {%- endif %} |