Merge pull request #25 from Perceptyx/master
Ensure custom paths are created and have correct permissions
diff --git a/.kitchen.travis.yml b/.kitchen.travis.yml
new file mode 100644
index 0000000..6bcad13
--- /dev/null
+++ b/.kitchen.travis.yml
@@ -0,0 +1,6 @@
+suites:
+
+ - name: <%= ENV['SUITE'] %>
+ provisioner:
+ pillars-from-files:
+ influxdb.sls: tests/pillar/<%= ENV['SUITE'] %>.sls
diff --git a/.travis.yml b/.travis.yml
index 7a77247..2e36211 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -17,15 +17,19 @@
- bundle install
env:
- - PLATFORM=trevorj/salty-whales:trusty
- - PLATFORM=trevorj/salty-whales:xenial
+ - PLATFORM=trevorj/salty-whales:trusty SUITE=client
+ - PLATFORM=trevorj/salty-whales:xenial SUITE=client
+ - PLATFORM=trevorj/salty-whales:trusty SUITE=cluster
+ - PLATFORM=trevorj/salty-whales:xenial SUITE=cluster
+ - PLATFORM=trevorj/salty-whales:trusty SUITE=single
+ - PLATFORM=trevorj/salty-whales:xenial SUITE=single
before_script:
- set -o pipefail
- make test | tail
script:
- - test ! -e .kitchen.yml || bundle exec kitchen test -t tests/integration
+ - KITCHEN_LOCAL_YAML=.kitchen.travis.yml bundle exec kitchen test -t tests/integration
notifications:
webhooks:
diff --git a/README.rst b/README.rst
index 2b8349d..769bb2d 100644
--- a/README.rst
+++ b/README.rst
@@ -264,6 +264,34 @@
database: mydb1
privilege: all
+InfluxDB relay with HTTP outputs:
+
+.. code-block:: yaml
+
+ influxdb:
+ relay:
+ enabled: true
+ telemetry:
+ enabled: true
+ bind:
+ address: 127.0.0.1
+ port: 9196
+ listen:
+ http_backend:
+ type: http
+ bind:
+ address: 127.0.0.1
+ port: 9096
+ output:
+ server1:
+ location: http://server1:8086/write
+ timeout: 20s
+ buffer_size_mb: 512
+ max_batch_kb: 1024
+ max_delay_interval: 30s
+ server2:
+ location: http://server2:8086/write
+
Read more
=========
diff --git a/influxdb/files/grafana_dashboards/influxdb_prometheus.json b/influxdb/files/grafana_dashboards/influxdb_prometheus.json
index 9b5291f..9b3d0fa 100644
--- a/influxdb/files/grafana_dashboards/influxdb_prometheus.json
+++ b/influxdb/files/grafana_dashboards/influxdb_prometheus.json
@@ -13,6 +13,334 @@
"rows": [
{
"collapse": false,
+ "height": 250,
+ "panels": [
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": null,
+ "decimals": 0,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 7,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 3,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "count_scalar(influxdb_up)",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 60
+ }
+ ],
+ "thresholds": "",
+ "title": "Total instances",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": null,
+ "decimals": 0,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 8,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 3,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "count_scalar(influxdb_up == 1)",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 60
+ }
+ ],
+ "thresholds": "",
+ "title": "Running instances",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": null,
+ "decimals": 0,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 9,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 3,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "count_scalar(influxdb_up == 0)",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 60
+ }
+ ],
+ "thresholds": "",
+ "title": "Stopped instances",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": null,
+ "decimals": 0,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 10,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 3,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "min(haproxy_active_servers{proxy=~\"influxdb-backend\", sv=\"BACKEND\"})",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 60
+ }
+ ],
+ "thresholds": "",
+ "title": "InfluxDB backends",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "General",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
"height": "250px",
"panels": [
{
@@ -348,7 +676,7 @@
}
],
"thresholds": "",
- "title": "Go routines",
+ "title": "Goroutines",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
@@ -562,10 +890,10 @@
"multi": false,
"name": "server",
"options": [],
- "query": "label_values(influxdb_httpd_authFail, host)",
+ "query": "label_values(influxdb_up, host)",
"refresh": 1,
"regex": "",
- "sort": 0,
+ "sort": 1,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
diff --git a/influxdb/files/grafana_dashboards/influxdb_relay_prometheus.json b/influxdb/files/grafana_dashboards/influxdb_relay_prometheus.json
new file mode 100644
index 0000000..f7f4e9b
--- /dev/null
+++ b/influxdb/files/grafana_dashboards/influxdb_relay_prometheus.json
@@ -0,0 +1,439 @@
+{% raw %}
+{
+ "annotations": {
+ "list": []
+ },
+ "editable": true,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "id": null,
+ "links": [],
+ "refresh": "1m",
+ "rows": [
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "prometheus",
+ "fill": 0,
+ "id": 1,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(influxdb_relay_requests_total{instance=~\"$instance\"}[1m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "total ({{instance}})",
+ "refId": "A",
+ "step": 10
+ },
+ {
+ "expr": "irate(influxdb_relay_failed_requests_total{instance=~\"$instance\"}[1m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "failed ({{instance}})",
+ "refId": "B",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Requests",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "fill": 0,
+ "id": 2,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(influxdb_relay_received_points_total{instance=~\"$instance\"}[1m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}}",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Received points",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "wps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Instance metrics",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "fill": 0,
+ "id": 3,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(influxdb_relay_backend_sent_bytes_total{backend=~\"$backend\",instance=~\"$instance\"}[1m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "sent ({{instance}} -> {{backend}})",
+ "refId": "A",
+ "step": 10
+ },
+ {
+ "expr": "irate(influxdb_relay_backend_failed_bytes_total{backend=~\"$backend\",instance=~\"$instance\"}[1m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "failed ({{instance}} -> {{backend}})",
+ "refId": "B",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "I/O",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "fill": 0,
+ "id": 4,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "influxdb_relay_backend_buffer_bytes{backend=~\"$backend\",instance=~\"$instance\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} -> {{backend}}",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Buffer",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "decbytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Backend metrics",
+ "titleSize": "h6"
+ }
+ ],
+ "schemaVersion": 14,
+ "style": "dark",
+ "tags": [],
+ "templating": {
+ "list": [
+ {
+ "allValue": null,
+ "current": {},
+ "datasource": "prometheus",
+ "hide": 0,
+ "includeAll": true,
+ "label": null,
+ "multi": true,
+ "name": "instance",
+ "options": [],
+ "query": "label_values(influxdb_relay_backend_buffer_bytes, instance)",
+ "refresh": 1,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {},
+ "datasource": "prometheus",
+ "hide": 0,
+ "includeAll": true,
+ "label": null,
+ "multi": true,
+ "name": "backend",
+ "options": [],
+ "query": "label_values(influxdb_relay_backend_buffer_bytes, backend)",
+ "refresh": 1,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "browser",
+ "title": "InfluxDB Relay",
+ "version": 1
+}
+{% endraw %}
\ No newline at end of file
diff --git a/influxdb/files/influxdb-relay.conf b/influxdb/files/influxdb-relay.conf
new file mode 100644
index 0000000..06fd210
--- /dev/null
+++ b/influxdb/files/influxdb-relay.conf
@@ -0,0 +1,59 @@
+{%- from "influxdb/map.jinja" import relay with context -%}
+
+{%- if relay.get('enabled') %}
+
+{%- if relay.telemetry.get('enabled') %}
+[telemetry]
+bind-addr = "{{ relay.telemetry.bind.get('address', '') }}:{{ relay.telemetry.bind.port }}"
+{%- endif %}
+
+{%- for name, listen in relay.listen.iteritems()|sort %}
+
+{%- if listen.get('enabled', True) and listen.get('type', 'http') in ('http', 'udp') %}
+
+{%- set listen_type = listen.get('type', 'http') %}
+[[{{ listen_type }}]]
+name = "{{ name }}"
+bind-addr = "{{ listen.bind.get('address', '') }}:{{ listen.bind.port }}"
+{%- if listen_type == 'http' and listen.default_retention_policy is defined %}
+default-retention-policy = "{{ listen.default_retention_policy }}"
+{%- endif %}
+{%- if listen_type == 'udp' and listen.precision is defined %}
+precision = "{{ listen.precision }}"
+{%- endif %}
+{%- if listen_type == 'udp' and listen.read_buffer is defined %}
+read-buffer = {{ listen.read_buffer|int }}
+{%- endif %}
+
+{%- set outputs = [] %}
+{%- for output_name, output in listen.get('output', {}).iteritems()|sort %}
+ {%- set tmp = ['name = "{}"'.format(output_name), 'location = "{}"'.format( output.location)] %}
+ {%- if listen_type == 'http' and output.timeout is defined %}
+ {%- do tmp.append('timeout = "{}"'.format(output.timeout)) %}
+ {%- endif %}
+ {%- if listen_type == 'http' and output.buffer_size_mb is defined %}
+ {%- do tmp.append('buffer-size-mb = {}'.format(output.buffer_size_mb)) %}
+ {%- endif %}
+ {%- if listen_type == 'http' and output.max_batch_kb is defined %}
+ {%- do tmp.append('max-batch-kb = {}'.format(output.max_batch_kb)) %}
+ {%- endif %}
+ {%- if listen_type == 'http' and output.max_delay_interval is defined %}
+ {%- do tmp.append('max-delay-interval = "{}"'.format(output.max_delay_interval)) %}
+ {%- endif %}
+ {%- if listen_type == 'udp' and output.mtu is defined %}
+ {%- do tmp.append('mtu = {}'.format(output.mtu)) %}
+ {%- endif %}
+ {%- do outputs.append(tmp) %}
+{%- endfor %}
+
+output = [
+{%- for output in outputs %}
+ { {{ output|join(', ') }} },
+{%- endfor %}
+]
+
+{%- endif %}
+
+{%- endfor %}
+
+{%- endif %}
diff --git a/influxdb/init.sls b/influxdb/init.sls
index a878715..95c7731 100644
--- a/influxdb/init.sls
+++ b/influxdb/init.sls
@@ -6,4 +6,7 @@
{%- if pillar.influxdb.client is defined %}
- influxdb.client
{%- endif %}
+{%- if pillar.influxdb.relay is defined %}
+- influxdb.relay
+{%- endif %}
{%- endif %}
diff --git a/influxdb/map.jinja b/influxdb/map.jinja
index 2c61594..83047b6 100644
--- a/influxdb/map.jinja
+++ b/influxdb/map.jinja
@@ -43,6 +43,10 @@
'http_errors_percentage': 5,
'failed_points_percentage': 5,
'dropped_points_percentage': 5,
+ 'max_relay_buffer_percentage': 70,
+ 'relay_failed_requests_percentage': 5,
+ 'service_failed_warning_threshold_percent': 0.3,
+ 'service_failed_critical_threshold_percent': 0.6,
},
}, grain='os_family', merge=salt['pillar.get']('influxdb:monitoring')) %}
@@ -50,3 +54,12 @@
'default': {
},
}, merge=salt['pillar.get']('influxdb:client')) %}
+
+{%- set relay = salt['grains.filter_by']({
+ 'default': {
+ 'pkgs': ['influxdb-relay'],
+ 'service': 'influxdb-relay',
+ 'listen': {},
+ 'telemetry': {},
+ },
+}, merge=salt['pillar.get']('influxdb:relay')) %}
diff --git a/influxdb/meta/grafana.yml b/influxdb/meta/grafana.yml
index 74c3f9e..0dead01 100644
--- a/influxdb/meta/grafana.yml
+++ b/influxdb/meta/grafana.yml
@@ -7,3 +7,22 @@
datasource: influxdb
format: json
template: influxdb/files/grafana_dashboards/influxdb_influxdb.json
+ influxdb_relay_prometheus:
+ datasource: prometheus
+ format: json
+ template: influxdb/files/grafana_dashboards/influxdb_relay_prometheus.json
+ main_prometheus:
+ datasource: prometheus
+ row:
+ ost-middleware:
+ title: Middleware
+ panel:
+ influxdb:
+ title: InfluxDB
+ links:
+ - dashboard: InfluxDB
+ title: InfluxDB
+ type: dashboard
+ target:
+ cluster_status:
+ expr: avg(influxdb_up) by (name)
\ No newline at end of file
diff --git a/influxdb/meta/prometheus.yml b/influxdb/meta/prometheus.yml
index 54a8b13..affbd77 100644
--- a/influxdb/meta/prometheus.yml
+++ b/influxdb/meta/prometheus.yml
@@ -1,21 +1,48 @@
{%- if pillar.influxdb.server is defined %}
-{%- from "influxdb/map.jinja" import server, monitoring with context %}
+{%- from "influxdb/map.jinja" import server, relay, monitoring with context %}
-{%- if server.get('enabled', False) %}
+{%- if server.get('enabled', False) or relay.get('enabled') %}
server:
alert:
{%- if server.get('http', {}).get('enabled', False) %}
- InfluxdbDown:
+ InfluxdbInfo:
if: >-
- influxdb_up != 1
+ influxdb_up == 0
labels:
- severity: warning
+ severity: info
service: influxdb
annotations:
{%- raw %}
summary: 'InfluxDB service down'
description: 'InfluxDB service is down on node {{ $labels.host }}'
{%- endraw %}
+ InfluxdbWarning:
+ if: >-
+ count(influxdb_up == 0) >= count(influxdb_up) * {{ monitoring.service_failed_warning_threshold_percent }}
+ labels:
+ severity: warning
+ service: influxdb
+ annotations:
+ summary: 'More than {{monitoring.service_failed_warning_threshold_percent*100}}% of InfluxDB services are down'
+ description: 'More than {{monitoring.service_failed_warning_threshold_percent*100}}% of InfluxDB services are down'
+ InfluxdbCritical:
+ if: >-
+ count(influxdb_up == 0) >= count(influxdb_up) * {{ monitoring.service_failed_critical_threshold_percent }}
+ labels:
+ severity: critical
+ service: influxdb
+ annotations:
+ summary: 'More than {{monitoring.service_failed_critical_threshold_percent*100}}% of InfluxDB services are down'
+ description: 'More than {{monitoring.service_failed_critical_threshold_percent*100}}% of InfluxDB services are down'
+ InfluxdbDown:
+ if: >-
+ count(influxdb_up == 0) == count(influxdb_up)
+ labels:
+ severity: down
+ service: influxdb
+ annotations:
+ summary: 'All InfluxDB services are down'
+ description: 'All InfluxDB services are down'
InfluxdbSeriesNumberHigh:
{%- set influx_max_series_threshold = monitoring.max_series_percentage * server.data.max_series_per_database / 100 %}
if: >-
@@ -70,6 +97,64 @@
annotations:
summary: 'Influxdb too many dropped writes'
description: '{{ printf `%.1f` $value }}% of written points have been dropped on {{ $labels.host }} (threshold={%- endraw %}{{ influx_http_points_written_dropped_threshold }}).'
+{%- if relay.get('enabled', False) and relay.telemetry is defined and relay.telemetry.get('enabled') %}
+ {%- set buffer_sizes = [] %}
+ {%- for name, listen in relay.listen.iteritems()|sort %}
+ {%- for backend_name, backend in listen.output.iteritems()|sort %}
+ {%- do buffer_sizes.append(backend.get('buffer_size_mb', 0)|float) %}
+ {%- endfor %}
+ {%- endfor %}
+ {%- set buffer_sizes = buffer_sizes|sort %}
+ {%- set buffer_size = buffer_sizes[-1] * 1024 * 1024 %}
+ {%- if buffer_size > 0 %}
+ InfluxdbRelayBufferNearFull:
+ {%- set influx_relay_buffer_size_threshold = monitoring.max_relay_buffer_percentage %}
+ if: >-
+ influxdb_relay_backend_buffer_bytes > {{ buffer_size }} * {{ influx_relay_buffer_size_threshold }} / 100
+ {% raw %}
+ labels:
+ severity: warning
+ service: influxdb-relay
+ annotations:
+ summary: 'InfluxDB Relay buffer almost full'
+ description: 'The buffer size for the {{ $labels.instance }}/{{ $labels.backend }} backend is getting full (current value={{ $value }} bytes, threshold={%- endraw %}{{ buffer_size * influx_relay_buffer_size_threshold / 100 }}).'
+ {%- endif %}
+ InfluxdbRelayFailedRequests:
+ {%- set influx_relay_failed_requests_threshold = monitoring.relay_failed_requests_percentage %}
+ if: >-
+ rate(influxdb_relay_failed_requests_total[5m]) / rate(influxdb_relay_requests_total[5m]) * 100 > {{ influx_relay_failed_requests_threshold }}
+ {% raw %}
+ labels:
+ severity: warning
+ service: influxdb-relay
+ annotations:
+ summary: 'InfluxDB Relay too many failed requests'
+ description: '{{ printf `%.1f` $value }}% of requests have been dropped on {{ $labels.instance }} (threshold={%- endraw %}{{ influx_relay_failed_requests_threshold }}).'
+
+{%- endif %}
+
+{%- if relay.get('enabled') and relay.telemetry.get('enabled') %}
+
+{%- set addresses = [] %}
+{%- if relay.telemetry.get('bind', {}).address is defined and not relay.telemetry.bind.address.startswith('127') and relay.telemetry.bind.address != '0.0.0.0' %}
+{%- do addresses.append(relay.telemetry.bind.address) %}
+{%- endif %}
+{%- for address in grains['fqdn_ip4'] %}
+{%- if not address.startswith('127') %}
+{%- do addresses.append(address) %}
+{%- endif %}
+{%- endfor %}
+
+ target:
+ static:
+ influxdb_relay:
+ enabled: true
+ endpoint:
+ - address: {{ addresses[0] }}
+ port: {{ relay.telemetry.bind.port }}
+
+{%- endif %}
+
{%- endif %}
{%- endif %}
-{%- endif %}
+{%- endif %}
\ No newline at end of file
diff --git a/influxdb/relay.sls b/influxdb/relay.sls
new file mode 100644
index 0000000..58f0413
--- /dev/null
+++ b/influxdb/relay.sls
@@ -0,0 +1,26 @@
+{%- from "influxdb/map.jinja" import relay with context %}
+{%- if relay.get('enabled') %}
+
+influxdb_relay_packages:
+ pkg.installed:
+ - names: {{ relay.pkgs }}
+
+influxdb_relay_config:
+ file.managed:
+ - name: //etc/influxdb-relay/influxdb-relay.conf
+ - source: salt://influxdb/files/influxdb-relay.conf
+ - template: jinja
+ - require:
+ - pkg: influxdb_relay_packages
+
+influxdb_relay_service:
+ service.running:
+ - enable: true
+ - name: {{ relay.service }}
+{%- if grains.get('noservices') %}
+ - onlyif: /bin/false
+{%- endif %}
+ - watch:
+ - file: influxdb_relay_config
+
+{%- endif %}
diff --git a/metadata/service/relay/cluster.yml b/metadata/service/relay/cluster.yml
new file mode 100644
index 0000000..0dbe9b4
--- /dev/null
+++ b/metadata/service/relay/cluster.yml
@@ -0,0 +1,43 @@
+applications:
+- influxdb
+classes:
+- service.influxdb.support
+parameters:
+ _param:
+ influxdb_relay_timeout: 10s
+ influxdb_relay_buffer_size_mb: 512
+ influxdb_relay_max_batch_kb: 512
+ influxdb_relay_max_delay_inteval: 10s
+ influxdb:
+ relay:
+ enabled: true
+ telemetry:
+ enabled: true
+ bind:
+ address: ${_param:cluster_local_address}
+ port: 9196
+ listen:
+ http:
+ type: http
+ bind:
+ address: ${_param:cluster_local_address}
+ port: 9096
+ output:
+ influxdb01:
+ location: http://${_param:cluster_node01_address}:8086/write
+ timeout: ${_param:influxdb_relay_timeout}
+ buffer_size_mb: ${_param:influxdb_relay_buffer_size_mb}
+ max_batch_kb: ${_param:influxdb_relay_max_batch_kb}
+ max_delay_interval: ${_param:influxdb_relay_max_delay_inteval}
+ influxdb02:
+ location: http://${_param:cluster_node02_address}:8086/write
+ timeout: ${_param:influxdb_relay_timeout}
+ buffer_size_mb: ${_param:influxdb_relay_buffer_size_mb}
+ max_batch_kb: ${_param:influxdb_relay_max_batch_kb}
+ max_delay_interval: ${_param:influxdb_relay_max_delay_inteval}
+ influxdb03:
+ location: http://${_param:cluster_node03_address}:8086/write
+ timeout: ${_param:influxdb_relay_timeout}
+ buffer_size_mb: ${_param:influxdb_relay_buffer_size_mb}
+ max_batch_kb: ${_param:influxdb_relay_max_batch_kb}
+ max_delay_interval: ${_param:influxdb_relay_max_delay_inteval}
diff --git a/tests/pillar/relay.sls b/tests/pillar/relay.sls
new file mode 100644
index 0000000..cf7e866
--- /dev/null
+++ b/tests/pillar/relay.sls
@@ -0,0 +1,34 @@
+influxdb:
+ relay:
+ enabled: true
+ telemetry:
+ enabled: true
+ bind:
+ address: 127.0.0.1
+ port: 9196
+ listen:
+ http_backend:
+ type: http
+ bind:
+ address: 127.0.0.1
+ port: 9096
+ output:
+ server1:
+ location: http://server1:8086/write
+ timeout: 20s
+ buffer_size_mb: 512
+ max_batch_kb: 1024
+ max_delay_interval: 30s
+ server2:
+ location: http://server2:8086/write
+ udp_backend:
+ type: udp
+ bind:
+ address: 127.0.0.1
+ port: 9196
+ output:
+ server1:
+ location: http://server1:8086/write
+ mtu: 1500
+ server2:
+ location: http://server2:8086/write
diff --git a/tests/run_tests.sh b/tests/run_tests.sh
index a4cac88..29fb975 100755
--- a/tests/run_tests.sh
+++ b/tests/run_tests.sh
@@ -6,11 +6,13 @@
CURDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
METADATA=${CURDIR}/../metadata.yml
FORMULA_NAME=$(cat $METADATA | python -c "import sys,yaml; print yaml.load(sys.stdin)['name']")
+FORMULA_META_DIR=${CURDIR}/../${FORMULA_NAME}/meta
## Overrideable parameters
PILLARDIR=${PILLARDIR:-${CURDIR}/pillar}
BUILDDIR=${BUILDDIR:-${CURDIR}/build}
VENV_DIR=${VENV_DIR:-${BUILDDIR}/virtualenv}
+MOCK_BIN_DIR=${MOCK_BIN_DIR:-${CURDIR}/mock_bin}
DEPSDIR=${BUILDDIR}/deps
SALT_FILE_DIR=${SALT_FILE_DIR:-${BUILDDIR}/file_root}
@@ -40,6 +42,15 @@
pip install salt${PIP_SALT_VERSION}
}
+setup_mock_bin() {
+ # If some state requires a binary, a lightweight replacement for
+ # such binary can be put into MOCK_BIN_DIR for test purposes
+ if [ -d "${MOCK_BIN_DIR}" ]; then
+ PATH="${MOCK_BIN_DIR}:$PATH"
+ export PATH
+ fi
+}
+
setup_pillar() {
[ ! -d ${SALT_PILLAR_DIR} ] && mkdir -p ${SALT_PILLAR_DIR}
echo "base:" > ${SALT_PILLAR_DIR}/top.sls
@@ -121,6 +132,7 @@
[ -d ${BUILDDIR} ] && mkdir -p ${BUILDDIR}
which salt-call || setup_virtualenv
+ setup_mock_bin
setup_pillar
setup_salt
install_dependencies
@@ -130,7 +142,26 @@
for pillar in ${PILLARDIR}/*.sls; do
grep ${FORMULA_NAME}: ${pillar} &>/dev/null || continue
state_name=$(basename ${pillar%.sls})
+ salt_run grains.set 'noservices' False force=True
+
+ echo "Checking state ${FORMULA_NAME}.${state_name} ..."
salt_run --id=${state_name} state.show_sls ${FORMULA_NAME} || (log_err "Execution of ${FORMULA_NAME}.${state_name} failed"; exit 1)
+
+ # Check that all files in 'meta' folder can be rendered using any valid pillar
+ for meta in `find ${FORMULA_META_DIR} -type f`; do
+ meta_name=$(basename ${meta})
+ echo "Checking meta ${meta_name} ..."
+ salt_run --out=quiet --id=${state_name} cp.get_template ${meta} ${SALT_CACHE_DIR}/${meta_name} \
+ || (log_err "Failed to render meta ${meta} using pillar ${FORMULA_NAME}.${state_name}"; exit 1)
+ cat ${SALT_CACHE_DIR}/${meta_name}
+ done
+ done
+}
+
+real_run() {
+ for pillar in ${PILLARDIR}/*.sls; do
+ state_name=$(basename ${pillar%.sls})
+ salt_run --id=${state_name} state.sls ${FORMULA_NAME} || (log_err "Execution of ${FORMULA_NAME}.${state_name} failed"; exit 1)
done
}
@@ -159,6 +190,9 @@
run)
run
;;
+ real-run)
+ real_run
+ ;;
*)
prepare
run