Merge "Add sf-notifier monitoring:"
diff --git a/metadata/service/gainsight/elasticsearch_container.yml b/metadata/service/gainsight/elasticsearch_container.yml
new file mode 100644
index 0000000..0129de2
--- /dev/null
+++ b/metadata/service/gainsight/elasticsearch_container.yml
@@ -0,0 +1,9 @@
+applications:
+ - prometheus
+parameters:
+ prometheus:
+ gainsight_elasticsearch:
+ enabled: true
+ dir:
+ config: /srv/volumes/local/gainsight_elasticsearch/config
+ crontab: /srv/volumes/local/gainsight_elasticsearch/cron.d
diff --git a/metadata/service/sf_notifier/container.yml b/metadata/service/sf_notifier/container.yml
index 324c876..73223e3 100644
--- a/metadata/service/sf_notifier/container.yml
+++ b/metadata/service/sf_notifier/container.yml
@@ -8,5 +8,3 @@
logs: /srv/volumes/local/sf_notifier/logs
uwsgi:
bind_port: 5000
- workers: 4
- buffer_size: 32768
diff --git a/prometheus/files/gainsight/gainsight_elasticsearch_config.yml b/prometheus/files/gainsight/gainsight_elasticsearch_config.yml
new file mode 100644
index 0000000..0388409
--- /dev/null
+++ b/prometheus/files/gainsight/gainsight_elasticsearch_config.yml
@@ -0,0 +1,5 @@
+{% from "prometheus/map.jinja" import gainsight_elasticsearch with context %}
+[Queries]
+{%- for query,value in gainsight_elasticsearch.queries.iteritems() %}
+{{ query }}={{ value }}
+{%- endfor %}
diff --git a/prometheus/files/gainsight/gainsight_elasticsearch_crontab.yml b/prometheus/files/gainsight/gainsight_elasticsearch_crontab.yml
new file mode 100644
index 0000000..beaa8f2
--- /dev/null
+++ b/prometheus/files/gainsight/gainsight_elasticsearch_crontab.yml
@@ -0,0 +1,2 @@
+{% from "prometheus/map.jinja" import gainsight_elasticsearch with context %}
+{{ gainsight_elasticsearch.crontab.duration }} root . /opt/gainsight/vars && /opt/gainsight/entrypoint.py >> /var/log/cron.log 2>&1
diff --git a/prometheus/gainsight_elasticsearch.sls b/prometheus/gainsight_elasticsearch.sls
new file mode 100644
index 0000000..3ca5c39
--- /dev/null
+++ b/prometheus/gainsight_elasticsearch.sls
@@ -0,0 +1,29 @@
+{% from "prometheus/map.jinja" import gainsight_elasticsearch with context %}
+{%- if gainsight_elasticsearch.enabled %}
+
+{%- if pillar.docker is defined and pillar.docker.host is defined %}
+
+{{gainsight_elasticsearch.dir.config}}:
+ file.directory:
+ - makedirs: True
+
+{{gainsight_elasticsearch.dir.crontab}}:
+ file.directory:
+ - makedirs: True
+
+{{gainsight_elasticsearch.dir.config}}/config.ini:
+ file.managed:
+ - source: salt://prometheus/files/gainsight/gainsight_elasticsearch_config.yml
+ - template: jinja
+ - require:
+ - file: {{gainsight_elasticsearch.dir.config}}
+
+{{gainsight_elasticsearch.dir.crontab}}/crontab:
+ file.managed:
+ - source: salt://prometheus/files/gainsight/gainsight_elasticsearch_crontab.yml
+ - template: jinja
+ - require:
+ - file: {{gainsight_elasticsearch.dir.crontab}}
+
+{%- endif %}
+{%- endif %}
diff --git a/prometheus/map.jinja b/prometheus/map.jinja
index 506bc22..f75eabf 100644
--- a/prometheus/map.jinja
+++ b/prometheus/map.jinja
@@ -68,6 +68,16 @@
}
}, merge=salt['pillar.get']('prometheus:gainsight')) %}}
+{% set gainsight_elasticsearch = salt['grains.filter_by']({
+ 'default': {
+ 'queries': {
+ },
+ 'crontab': {
+ 'duration': '23 58 * * *'
+ },
+ }
+}, merge=salt['pillar.get']('prometheus:gainsight_elasticsearch')) %}}
+
{% set sf_notifier = salt['grains.filter_by']({
'default': {
},
diff --git a/prometheus/meta/prometheus.yml b/prometheus/meta/prometheus.yml
index f53d66d..ebbf94e 100644
--- a/prometheus/meta/prometheus.yml
+++ b/prometheus/meta/prometheus.yml
@@ -138,7 +138,7 @@
service: prometheus
annotations:
summary: "50% of Prometheus relay services are down"
- description: "{{ $value }} of Prometheus relay services (>= 50%) are down for 2 minutes."
+ description: "{{ $value }}% of Prometheus relay services (>= 50%) are down for 2 minutes."
PrometheusRelayServiceOutage:
if: >-
count(procstat_running{process_name="prometheus-relay"} == 0) == count(procstat_running{process_name="prometheus-relay"})
@@ -163,7 +163,7 @@
annotations:
summary: "Prometheus Long Term Storage service is down"
description: "The Prometheus Long Term Storage service on the {{$labels.host}} node is down for 2 minutes."
- PrometheusRelayServiceDownMajor:
+ PrometheusLTSServiceDownMajor:
if: >-
count(procstat_running{process_name="prometheus"} == 0) >= count(procstat_running{process_name="prometheus"}) * 0.5
for: 2m
@@ -172,8 +172,8 @@
service: prometheus
annotations:
summary: "50% of Prometheus Long Term Storage services are down"
- description: "{{ $value }} of Prometheus Long Term Storage services (>= 50%) are down for 2 minutes."
- PrometheusRelayServiceOutage:
+ description: "{{ $value }}% of Prometheus Long Term Storage services (>= 50%) are down for 2 minutes."
+ PrometheusLTSServiceOutage:
if: >-
count(procstat_running{process_name="prometheus"} == 0) == count(procstat_running{process_name="prometheus"})
for: 2m