Clean prometheus alerts
Change-Id: Ib42a891dcef58066aaf2b8097d128c414fb1719d
diff --git a/kubernetes/meta/prometheus.yml b/kubernetes/meta/prometheus.yml
index 8ab9a9f..1e9edd3 100644
--- a/kubernetes/meta/prometheus.yml
+++ b/kubernetes/meta/prometheus.yml
@@ -25,7 +25,6 @@
{%- endif %}
{%- endif %}
{%- endif %}
-{% raw %}
recording:
cluster_namespace_controller_pod_container:spec_memory_limit_bytes:
query: >-
@@ -149,50 +148,57 @@
if: >-
avg_over_time(kubelet_running_container_count[2m]) * 1.3 <
avg_over_time(kubelet_running_container_count[10m])
+ {% raw %}
labels:
severity: warning
service: kubernetes
annotations:
summary: 'Container count is low'
description: 'Container count from last 2m is lower than avarage from 10m'
+ {% endraw %}
AvgKubeletRunningPODCountLow:
if: >-
avg_over_time(kubelet_running_pod_count[2m]) * 1.3 <
avg_over_time(kubelet_running_pod_count[10m])
+ {% raw %}
labels:
severity: warning
service: kubernetes
annotations:
summary: 'POD count is low'
description: 'POD count from last 2m is lower than avarage from 10m'
+ {% endraw %}
ContainerScrapeError:
if: 'container_scrape_error != 0'
+ {% raw %}
labels:
severity: warning
service: kubernetes
annotations:
summary: 'Fail to scrape container'
description: 'Prometheus was not able to scrape metrics from container on {{ $labels.instance }}'
+ {% endraw %}
ProcstatRunningKubernetes:
if: >-
procstat_running{process_name=~"hypercube-.*"} == 0
+ {% raw %}
labels:
severity: warning
service: kubernetes
annotations:
summary: 'Kubernetes service {{ $labels.process_name }} is down'
description: 'Kubernetes service {{ $labels.process_name }} is down on node {{ $labels.host }}'
-{% endraw %}
+ {% endraw %}
{%- if network is defined and network.get('engine', None) == 'calico' %}
-{% raw %}
ProcstatRunningCalico:
if: >-
procstat_running{process_name=~"calico-felix|bird|bird6|confd"} == 0
+ {% raw %}
labels:
severity: warning
service: calico
annotations:
summary: 'Calico service {{ $labels.process_name }} is down'
description: 'Calico service {{ $labels.process_name }} is down on node {{ $labels.host }}'
-{% endraw %}
+ {% endraw %}
{% endif %}