From: Simon Pasquier Date: Tue, 25 Jul 2017 09:04:21 +0000 (+0200) Subject: Add alerts on the number of series X-Git-Url: https://gerrit.mcp.mirantis.com/gitweb?p=salt-formulas%2Finfluxdb.git;a=commitdiff_plain;h=363844afb06635c9194086497341ea490c5e77e2;hp=b0b30342033fcf26ec712397d88279add381b171 Add alerts on the number of series Change-Id: Iea33775b44390876aff5e00505b74aede07e0f20 --- diff --git a/influxdb/files/influxdb.conf b/influxdb/files/influxdb.conf index ecddad0..181ba43 100644 --- a/influxdb/files/influxdb.conf +++ b/influxdb/files/influxdb.conf @@ -60,7 +60,7 @@ reporting-disabled = {{ server.reporting_disabled | lower }} wal-dir = "{{ server.data.wal_dir }}" trace-logging-enabled = {{ server.data.trace_logging_enabled|default('false')|lower }} query-log-enabled = {{ server.data.query_log_enabled | lower }} - max-series-per-database = {{ server.data.max_series_per_database|default(1000000) }} + max-series-per-database = {{ server.data.max_series_per_database }} cache-max-memory-size = {{ server.data.cache_max_memory_size|default(1048576000) }} cache-snapshot-memory-size = {{ server.data.cache_snapshot_memory_size|default(26214400) }} cache-snapshot-write-cold-duration = "{{ server.data.cache_snapshot_write_cold_duration|default('10m') }}" diff --git a/influxdb/map.jinja b/influxdb/map.jinja index c059cf4..d2d4a89 100644 --- a/influxdb/map.jinja +++ b/influxdb/map.jinja @@ -27,6 +27,7 @@ default: dir: '/var/lib/influxdb/data' wal_dir: '/var/lib/influxdb/wal' query_log_enabled: false + max_series_per_database: 1000000 meta: enabled: true dir: '/var/lib/influxdb/meta' @@ -36,6 +37,7 @@ default: {%- set monitoring = salt['grains.filter_by']({ 'default': { + 'max_series_percentage': 95, 'http_errors_percentage': 5, 'failed_points_percentage': 5, 'dropped_points_percentage': 5, diff --git a/influxdb/meta/prometheus.yml b/influxdb/meta/prometheus.yml index 02573e6..54a8b13 100644 --- a/influxdb/meta/prometheus.yml +++ b/influxdb/meta/prometheus.yml @@ -5,18 +5,39 @@ server: alert: {%- if server.get('http', {}).get('enabled', False) %} - InfluxdbQDown: + InfluxdbDown: if: >- influxdb_up != 1 labels: severity: warning service: influxdb annotations: + {%- raw %} summary: 'InfluxDB service down' - {% raw %} description: 'InfluxDB service is down on node {{ $labels.host }}' - {% endraw %} - InfluxdbHTTPClientError: + {%- endraw %} + InfluxdbSeriesNumberHigh: + {%- set influx_max_series_threshold = monitoring.max_series_percentage * server.data.max_series_per_database / 100 %} + if: >- + influxdb_database_numSeries >= {{ influx_max_series_threshold }} + labels: + severity: warning + service: influxdb + annotations: + {% raw %} + summary: 'InfluxDB high number of series for {{ $labels.database }}' + description: 'The InfluxDB {{ $labels.database }} database is getting close to the maximum number of series (value={{ $value }}{%- endraw %},threshold={{ influx_max_series_threshold }}).' + InfluxdbSeriesNumberTooHigh: + if: >- + influxdb_database_numSeries >= {{ server.data.max_series_per_database }} + labels: + severity: critical + service: influxdb + annotations: + {% raw %} + summary: 'InfluxDB too many series for {{ $labels.database }}' + description: 'The InfluxDB {{ $labels.database }} database has exceeded the maximum number of series (value={{ $value }}{%- endraw %},threshold={{ server.data.max_series_per_database }}).' + InfluxdbHTTPClientErrors: {%- set influx_http_client_error_threshold = monitoring.http_errors_percentage %} if: >- rate(influxdb_httpd_clientError[2m]) / rate(influxdb_httpd_req[2m]) * 100 > {{ influx_http_client_error_threshold }}