Use 'influxdb_up' metric for alert
This change also removes the monitoring of the InfluxDB monitoring
since it is redundant with the InfluxDB input plugin.
Change-Id: Ib5e0acacdab562fe6b997a4d988527fe81d0d031
diff --git a/influxdb/meta/prometheus.yml b/influxdb/meta/prometheus.yml
index 995fde3..02573e6 100644
--- a/influxdb/meta/prometheus.yml
+++ b/influxdb/meta/prometheus.yml
@@ -4,18 +4,18 @@
{%- if server.get('enabled', False) %}
server:
alert:
- ProcstatRunningInfluxdb:
+{%- if server.get('http', {}).get('enabled', False) %}
+ InfluxdbQDown:
if: >-
- procstat_running{process_name="influxdb"} == 0
- {% raw %}
+ influxdb_up != 1
labels:
severity: warning
service: influxdb
annotations:
- summary: 'Influxdb service is down'
- description: 'Influxdb service is down on node {{ $labels.host }}'
+ summary: 'InfluxDB service down'
+ {% raw %}
+ description: 'InfluxDB service is down on node {{ $labels.host }}'
{% endraw %}
-{%- if server.get('http', {}).get('enabled', False) %}
InfluxdbHTTPClientError:
{%- set influx_http_client_error_threshold = monitoring.http_errors_percentage %}
if: >-
diff --git a/influxdb/meta/telegraf.yml b/influxdb/meta/telegraf.yml
index a3ccc5a..2ed4308 100644
--- a/influxdb/meta/telegraf.yml
+++ b/influxdb/meta/telegraf.yml
@@ -4,10 +4,6 @@
{%- if server.get('enabled', False) and server.get('http', {}).get('enabled', False) %}
agent:
input:
- procstat:
- process:
- influxdb:
- exe: influxd
influxdb:
servers:
- url: http://{{ server.http.bind.address|replace('0.0.0.0', '127.0.0.1') }}:{{ server.http.bind.port }}/debug/vars