Add InfluxDB Relay alerts

Change-Id: I9299d81a592fdfee33f3f4e1d5ca6300d18d94cc
diff --git a/influxdb/map.jinja b/influxdb/map.jinja
index 47a9c00..a2a181c 100644
--- a/influxdb/map.jinja
+++ b/influxdb/map.jinja
@@ -41,6 +41,8 @@
     'http_errors_percentage': 5,
     'failed_points_percentage': 5,
     'dropped_points_percentage': 5,
+    'max_relay_buffer_percentage': 70,
+    'relay_failed_requests_percentage': 5,
   },
 }, grain='os_family', merge=salt['pillar.get']('influxdb:monitoring')) %}
 
diff --git a/influxdb/meta/prometheus.yml b/influxdb/meta/prometheus.yml
index ea66b51..c266dfc 100644
--- a/influxdb/meta/prometheus.yml
+++ b/influxdb/meta/prometheus.yml
@@ -70,6 +70,40 @@
       annotations:
         summary: 'Influxdb too many dropped writes'
         description: '{{ printf `%.1f` $value }}% of written points have been dropped on {{ $labels.host }} (threshold={%- endraw %}{{ influx_http_points_written_dropped_threshold }}).'
+{%- if relay.get('enabled', False) and relay.telemetry is defined and relay.telemetry.get('enabled') %}
+    {%- set buffer_sizes = [] %}
+    {%- for name, listen in relay.listen.iteritems()|sort %}
+      {%- for backend_name, backend in listen.output.iteritems()|sort %}
+        {%- do buffer_sizes.append(backend.get('buffer_size_mb', 0)|float) %}
+      {%- endfor %}
+    {%- endfor %}
+    {%- set buffer_sizes = buffer_sizes|sort %}
+    {%- set buffer_size = buffer_sizes[-1] * 1024 * 1024 %}
+    {%- if buffer_size > 0 %}
+    InfluxdbRelayBufferNearFull:
+      {%- set influx_relay_buffer_size_threshold = monitoring.max_relay_buffer_percentage %}
+      if: >-
+        influxdb_relay_backend_buffer_bytes > {{ buffer_size }} * {{ influx_relay_buffer_size_threshold }} / 100
+      {% raw %}
+      labels:
+        severity: warning
+        service: influxdb-relay
+      annotations:
+        summary: 'InfluxDB Relay buffer almost full'
+        description: 'The buffer size for the {{ $labels.instance }}/{{ $labels.backend }} backend is getting full (current value={{ $value }} bytes, threshold={%- endraw %}{{ buffer_size * influx_relay_buffer_size_threshold / 100 }}).'
+    {%- endif %}
+    InfluxdbRelayFailedRequests:
+      {%- set influx_relay_failed_requests_threshold = monitoring.relay_failed_requests_percentage %}
+      if: >-
+        rate(influxdb_relay_failed_requests_total[5m]) / rate(influxdb_relay_requests_total[5m]) * 100 > {{ influx_relay_failed_requests_threshold }}
+      {% raw %}
+      labels:
+        severity: warning
+        service: influxdb-relay
+      annotations:
+        summary: 'InfluxDB Relay too many failed requests'
+        description: '{{ printf `%.1f` $value }}% of requests have been dropped on {{ $labels.instance }} (threshold={%- endraw %}{{ influx_relay_failed_requests_threshold }}).'
+
 {%- endif %}
 
 {%- if relay.get('enabled') and relay.telemetry.get('enabled') %}
@@ -96,3 +130,4 @@
 
 {%- endif %}
 {%- endif %}
+{%- endif %}
\ No newline at end of file