Add CPU steal time alerts

- warning for cpu_usage_steal>5 for 5 minutes
- critical for cpu_usage_steal>10 for 5 minutes

Change-Id: I6fc5c7ed369655b88a5da8c9e2821f84cbc5b509
Related-bug: PROD-32803
diff --git a/linux/map.jinja b/linux/map.jinja
index 44cacc1..a969268 100644
--- a/linux/map.jinja
+++ b/linux/map.jinja
@@ -414,6 +414,10 @@
         'cpu_usage_percentage': {
               'warn': 90.0,
         },
+        'cpu_steal_percentage': {
+              'warn': 5.0,
+              'crit': 10.0,
+        },
         'memory_usage_percentage': {
             'warn': 90.0,
             'major': 95.0,
diff --git a/linux/meta/prometheus.yml b/linux/meta/prometheus.yml
index 03e0cca..2fe7036 100644
--- a/linux/meta/prometheus.yml
+++ b/linux/meta/prometheus.yml
@@ -1,7 +1,30 @@
 {%- from "linux/map.jinja" import monitoring, network with context %}
 server:
   alert:
+    {%- set cpu_steal_warn = monitoring.cpu_steal_percentage.warn|float %}
+    {%- set cpu_steal_crit = monitoring.cpu_steal_percentage.crit|float %}
+    SystemCpuStealTimeWarning:
+      if: >-
+        cpu_usage_steal > {{ cpu_steal_warn }}
+      for: 5m
+      labels:
+        severity: warning
+        service: system
+      annotations:
+        summary: "CPU steal time warning"
+        description: "The CPU steal time was above {{ cpu_steal_warn }}% on the {%- raw %} {{ $labels.host }}{%- endraw %} node for 5 minutes."
+    SystemCpuStealTimeCritical:
+      if: >-
+        cpu_usage_steal > {{ cpu_steal_crit }}
+      for: 5m
+      labels:
+        severity: critical
+        service: system
+      annotations:
+        summary: "CPU steal time critical"
+        description: "The CPU steal time was above {{ cpu_steal_crit }}% on the {%- raw %} {{ $labels.host }} node for 5 minutes."
     SystemCpuFullWarning:
+      {%- endraw %}
       {%- set cpu_usage_threshold = monitoring.cpu_usage_percentage.warn|float %}
       if: >-
         100 - avg_over_time(cpu_usage_idle{cpu="cpu-total"}[5m]) > {{ cpu_usage_threshold }}