Add support.yml for alerts and recording rules
Change-Id: If1927033922c350257999f59ba3031445689e11b
diff --git a/metadata/service/support.yml b/metadata/service/support.yml
index 76476fd..df2b7c3 100644
--- a/metadata/service/support.yml
+++ b/metadata/service/support.yml
@@ -1,5 +1,7 @@
parameters:
prometheus:
_support:
+ prometheus:
+ enabled: true
grafana:
enabled: true
diff --git a/prometheus/collector.sls b/prometheus/collector.sls
new file mode 100644
index 0000000..9b7946b
--- /dev/null
+++ b/prometheus/collector.sls
@@ -0,0 +1,29 @@
+{%- set service_grains = {'prometheus': {'server': {'alert': {}, 'recording': []}}} %}
+{%- for service_name, service in pillar.items() %}
+ {%- if service.get('_support', {}).get('prometheus', {}).get('enabled', False) %}
+ {%- set grains_fragment_file = service_name+'/meta/prometheus.yml' %}
+ {%- macro load_grains_file() %}{% include grains_fragment_file ignore missing %}{% endmacro %}
+ {%- set grains_yaml = load_grains_file()|load_yaml %}
+ {%- if grains_yaml is mapping %}
+ {%- set service_grains = salt['grains.filter_by']({'default': service_grains}, merge={'prometheus': grains_yaml}) %}
+ {%- endif %}
+ {%- endif %}
+{%- endfor %}
+
+prometheus_grains_dir:
+ file.directory:
+ - name: /etc/salt/grains.d
+ - mode: 700
+ - makedirs: true
+ - user: root
+
+prometheus_grain:
+ file.managed:
+ - name: /etc/salt/grains.d/prometheus
+ - source: salt://prometheus/files/prometheus.grain
+ - template: jinja
+ - mode: 600
+ - defaults:
+ service_grains: {{ service_grains|yaml }}
+ - require:
+ - file: prometheus_grains_dir
diff --git a/prometheus/files/alerts.yml b/prometheus/files/alerts.yml
index 68c1c88..d9ebc7e 100644
--- a/prometheus/files/alerts.yml
+++ b/prometheus/files/alerts.yml
@@ -1,13 +1,44 @@
{%- from "prometheus/map.jinja" import server with context %}
-{%- if server.recording is defined %}
-{%- for recording_rule in server.recording %}
-{{ recording_rule.name }} = {{ recording_rule.query }}
-{%- endfor %}
-{%- endif %}
+{%- set alerts = {} %}
+{%- set recordings = {} %}
-{%- if server.alert is defined %}
-{%- for alertname, alert in server.alert.iteritems() %}
+
+{%- for recording_rule in server.get('recording', []) %}
+ {%- if recording_rule.name not in recordings %}
+ {%- do recordings.update({recording_rule.name: recording_rule.query}) %}
+ {%- endif %}
+{%- endfor %}
+
+{%- for node_name, node_grains in salt['mine.get']('*', 'grains.items').iteritems() %}
+ {%- set server_grain = node_grains.get('prometheus', {}).get('server', {}) %}
+ {%- for recording_rule in server_grain.get('recording', []) %}
+ {%- if recording_rule.name not in recordings %}
+ {%- do recordings.update({recording_rule.name: recording_rule.query}) %}
+ {%- endif %}
+ {%- endfor %}
+{%- endfor %}
+
+{%- for recording_name, query in recordings.iteritems() %}
+{{ recording_name }} = {{ query }}
+{%- endfor %}
+
+{%- for alertname, alert in server.get('alert', {}).iteritems() %}
+ {%- if alertname not in alerts %}
+ {%- do alerts.update({alertname: alert}) %}
+ {%- endif %}
+{%- endfor %}
+
+{%- for node_name, node_grains in salt['mine.get']('*', 'grains.items').iteritems() %}
+ {%- set server_grain = node_grains.get('prometheus', {}).get('server', {}) %}
+ {%- for alertname, alert in server_grain.get('alert', {}).iteritems() %}
+ {%- if alertname not in alerts %}
+ {%- do alerts.update({alertname: alert}) %}
+ {%- endif %}
+ {%- endfor %}
+{%- endfor %}
+
+{%- for alertname, alert in alerts.iteritems() %}
ALERT {{ alertname }}
IF {{ alert.if }}
{%- if alert.for is defined %}FOR {{ alert.for }}{%- endif %}
@@ -26,4 +57,3 @@
}
{%- endif %}
{%- endfor %}
-{%- endif %}
diff --git a/prometheus/files/prometheus.grain b/prometheus/files/prometheus.grain
new file mode 100644
index 0000000..3e3b373
--- /dev/null
+++ b/prometheus/files/prometheus.grain
@@ -0,0 +1 @@
+{{ service_grains|yaml(False) }}
diff --git a/prometheus/meta/prometheus.yml b/prometheus/meta/prometheus.yml
new file mode 100644
index 0000000..b395c17
--- /dev/null
+++ b/prometheus/meta/prometheus.yml
@@ -0,0 +1,12 @@
+{% raw %}
+server:
+ alert:
+ PrometheusUP:
+ if: 'up != 0'
+ labels:
+ severity: critical
+ service: prometheus
+ annotations:
+ summary: 'Prometheus endpoint {{ $labels.instance }} is down'
+ description: 'Prometheus endpoint {{ $labels.instance }} is down for job {{ $labels.job }}'
+{% endraw %}