Merge "Update cinder policy management"
diff --git a/cinder/meta/prometheus.yml b/cinder/meta/prometheus.yml
index 0748ba3..b9d548c 100644
--- a/cinder/meta/prometheus.yml
+++ b/cinder/meta/prometheus.yml
@@ -2,13 +2,13 @@
{%- set is_controller = controller.get('enabled', False) %}
{%- set is_volume = volume.get('enabled', False) %}
+{%- set minor_threshold = monitoring.services_failed_warning_threshold_percent|float %}
+{%- set major_threshold = monitoring.services_failed_critical_threshold_percent|float %}
+{%- set major_endpoint_threshold = monitoring.endpoint_failed_major_threshold|float %}
{%- if is_controller or is_volume %}
server:
alert:
{%- if is_controller %}
-{%- set minor_threshold = monitoring.services_failed_warning_threshold_percent|float %}
-{%- set major_threshold = monitoring.services_failed_critical_threshold_percent|float %}
-{%- set major_endpoint_threshold = monitoring.endpoint_failed_major_threshold|float %}
{%- raw %}
CinderApiOutage:
if: >-
@@ -107,6 +107,52 @@
All {{ $labels.binary }} services are down.
{%- endraw %}
{%- endif %}
+{%- if is_volume %}
+ CinderVolumeProcessDown:
+ if: >-
+ procstat_running{process_name="cinder-volume"} == 0
+ {% raw %}
+ labels:
+ severity: minor
+ service: cinder
+ annotations:
+ summary: "Cinder-volume process is down"
+ description: "The cinder-volume process on the {{ $labels.host }} node is down."
+ {% endraw %}
+ CinderVolumeProcessesDownMinor:
+ if: >-
+ count(procstat_running{process_name="cinder-volume"} == 0) >= count(procstat_running{process_name="cinder-volume"}) * {{ minor_threshold }}
+ {% raw %}
+ labels:
+ severity: minor
+ service: cinder
+ annotations:
+ summary: "{%- endraw %}{{minor_threshold*100}}%{%- raw %} of cinder-volume processes are down"
+ description: "{{ $value }} cinder-volume processes (>= {%- endraw %}{{minor_threshold*100}}%{%- raw %}) are down."
+ {% endraw %}
+ CinderVolumeProcessesDownMajor:
+ if: >-
+ count(procstat_running{process_name="cinder-volume"} == 0) >= count(procstat_running{process_name="cinder-volume"}) * {{ major_threshold }}
+ {% raw %}
+ labels:
+ severity: major
+ service: cinder
+ annotations:
+ summary: "{%- endraw %}{{major_threshold*100}}%{%- raw %} of cinder-volume processes are down"
+ description: "{{ $value }} cinder-volume processes (>= {%- endraw %}{{major_threshold*100}}%{%- raw %}) are down."
+ {% endraw %}
+ CinderVolumeServiceOutage:
+ if: >-
+ count(procstat_running{process_name="cinder-volume"} == 0) == count(procstat_running{process_name="cinder-volume"})
+ {% raw %}
+ labels:
+ severity: critical
+ service: cinder
+ annotations:
+ summary: "Cinder-volume service outage"
+ description: "All cinder-volume processes are down."
+ {% endraw %}
+{%- endif %}
CinderErrorLogsTooHigh:
{%- set log_threshold = monitoring.error_log_rate|float %}
if: >-
diff --git a/cinder/meta/telegraf.yml b/cinder/meta/telegraf.yml
index 0657c94..9a1dac6 100644
--- a/cinder/meta/telegraf.yml
+++ b/cinder/meta/telegraf.yml
@@ -1,10 +1,18 @@
-{%- from "cinder/map.jinja" import controller with context %}
-{%- if controller.get('enabled', False) and controller.get('osapi') %}
-{%- set protocol = controller.get('identity', {}).get('protocol', 'http') %}
+{%- from "cinder/map.jinja" import controller, volume with context %}
+{%- if (controller.get('enabled', False) and controller.get('osapi')) or volume.get('enabled', False) %}
agent:
input:
+ {%- if controller.get('enabled', False) and controller.get('osapi') %}
+ {%- set protocol = controller.get('identity', {}).get('protocol', 'http') %}
http_response:
cinder-api:
address: "{{ protocol }}://{{ controller.osapi.host|replace('0.0.0.0', '127.0.0.1') }}:8776/"
expected_code: {% if controller.version in ('juno', 'kilo', 'liberty') %}200{% else %}300{% endif %}
+ {%- endif %}
+ {%- if volume.get('enabled', False) %}
+ procstat:
+ process:
+ cinder-volume:
+ pattern: cinder-volume
+ {%- endif %}
{%- endif %}