{%- set is_controller = controller.get('enabled', False) %}
{%- set is_volume = volume.get('enabled', False) %}
+{%- set minor_threshold = monitoring.services_failed_warning_threshold_percent|float %}
+{%- set major_threshold = monitoring.services_failed_critical_threshold_percent|float %}
+{%- set major_endpoint_threshold = monitoring.endpoint_failed_major_threshold|float %}
{%- if is_controller or is_volume %}
server:
alert:
{%- if is_controller %}
-{%- set minor_threshold = monitoring.services_failed_warning_threshold_percent|float %}
-{%- set major_threshold = monitoring.services_failed_critical_threshold_percent|float %}
-{%- set major_endpoint_threshold = monitoring.endpoint_failed_major_threshold|float %}
{%- raw %}
CinderApiOutage:
if: >-
description: >-
All {{ $labels.binary }} services are down.
{%- endraw %}
+{%- endif %}
+{%- if is_volume %}
+ CinderVolumeProcessDown:
+ if: >-
+ procstat_running{process_name="cinder-volume"} == 0
+ {% raw %}
+ labels:
+ severity: minor
+ service: cinder
+ annotations:
+ summary: "Cinder-volume process is down"
+ description: "The cinder-volume process on the {{ $labels.host }} node is down."
+ {% endraw %}
+ CinderVolumeProcessesDownMinor:
+ if: >-
+ count(procstat_running{process_name="cinder-volume"} == 0) >= count(procstat_running{process_name="cinder-volume"}) * {{ minor_threshold }}
+ {% raw %}
+ labels:
+ severity: minor
+ service: cinder
+ annotations:
+ summary: "{%- endraw %}{{minor_threshold*100}}%{%- raw %} of cinder-volume processes are down"
+ description: "{{ $value }} cinder-volume processes (>= {%- endraw %}{{minor_threshold*100}}%{%- raw %}) are down."
+ {% endraw %}
+ CinderVolumeProcessesDownMajor:
+ if: >-
+ count(procstat_running{process_name="cinder-volume"} == 0) >= count(procstat_running{process_name="cinder-volume"}) * {{ major_threshold }}
+ {% raw %}
+ labels:
+ severity: major
+ service: cinder
+ annotations:
+ summary: "{%- endraw %}{{major_threshold*100}}%{%- raw %} of cinder-volume processes are down"
+ description: "{{ $value }} cinder-volume processes (>= {%- endraw %}{{major_threshold*100}}%{%- raw %}) are down."
+ {% endraw %}
+ CinderVolumeServiceOutage:
+ if: >-
+ count(procstat_running{process_name="cinder-volume"} == 0) == count(procstat_running{process_name="cinder-volume"})
+ {% raw %}
+ labels:
+ severity: critical
+ service: cinder
+ annotations:
+ summary: "Cinder-volume service outage"
+ description: "All cinder-volume processes are down."
+ {% endraw %}
{%- endif %}
CinderErrorLogsTooHigh:
{%- set log_threshold = monitoring.error_log_rate|float %}