Split out SystemLoad alerts for Nova Compute nodes.
Fixes PROD-34626
Change-Id: Iee4617067e1f491ffc46ddacbbb2c8d517fe3766
diff --git a/nova/meta/prometheus.yml b/nova/meta/prometheus.yml
index cc48363..04d3384 100644
--- a/nova/meta/prometheus.yml
+++ b/nova/meta/prometheus.yml
@@ -389,6 +389,30 @@
All {{ $labels.binary }} services are down.
{%- endraw %}
{%- endif %}
+{%- if is_compute %}
+{%- raw %}
+ NovaComputeSystemLoadTooHighWarning:
+ if: >-
+ system_load15{host=~".*cmp[0-9]+"} / system_n_cpus > 1.0
+ for: 5m
+ labels:
+ severity: warning
+ service: system
+ annotations:
+ summary: "System load is 1.0"
+ description: "The system load per CPU on the {{ $labels.host }} node is {{ $value }} for 5 minutes."
+ NovaComputeSystemLoadTooHighCritical:
+ if: >-
+ system_load15{host=~".*cmp[0-9]+"} / system_n_cpus > 2.0
+ for: 5m
+ labels:
+ severity: critical
+ service: system
+ annotations:
+ summary: "System load is 2.0"
+ description: "The system load per CPU on the {{ $labels.host }} node is {{ $value }} for 5 minutes."
+{%- endraw %}
+{%- endif %}
{%- raw %}
NovaErrorLogsTooHigh:
if: >-