Add Grafana dashboard with Prometheus datasource
Change-Id: I7ac9422882594f0cdca13a511722fc2556957b36
diff --git a/etcd/files/grafana_dashboards/etcd_prometheus.json b/etcd/files/grafana_dashboards/etcd_prometheus.json
new file mode 100644
index 0000000..2bdff24
--- /dev/null
+++ b/etcd/files/grafana_dashboards/etcd_prometheus.json
@@ -0,0 +1,795 @@
+{
+ "annotations": {
+ "list": []
+ },
+ "description": "etcd sample Grafana dashboard with Prometheus",
+ "editable": true,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "id": null,
+ "links": [],
+ "refresh": false,
+ "rows": [
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "editable": true,
+ "error": false,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 28,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 3,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "targets": [
+ {
+ "expr": "sum(etcd_server_has_leader)",
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "metric": "etcd_server_has_leader",
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": "",
+ "title": "Up",
+ "type": "singlestat",
+ "valueFontSize": "200%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "avg"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "id": 3,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 1,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 4,
+ "stack": false,
+ "steppedLine": true,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket [5m])) by (instance, le))",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} WAL fsync",
+ "metric": "etcd_disk_wal_fsync_duration_seconds_bucket",
+ "refId": "A",
+ "step": 2
+ },
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket [5m])) by (instance, le))",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} DB fsync",
+ "metric": "etcd_disk_backend_commit_duration_seconds_bucket",
+ "refId": "B",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Disk Sync Duration",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "id": 29,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 5,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "process_resident_memory_bytes{job=\"etcd\"}",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} Resident Memory",
+ "metric": "process_resident_memory_bytes",
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Memory",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Row",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "editable": true,
+ "error": false,
+ "fill": 5,
+ "id": 22,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 3,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(etcd_network_client_grpc_received_bytes_total [1m])",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} Client Traffic In",
+ "metric": "etcd_network_client_grpc_received_bytes_total",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Client Traffic In",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "editable": true,
+ "error": false,
+ "fill": 5,
+ "id": 21,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 3,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(etcd_network_client_grpc_sent_bytes_total [1m])",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} Client Traffic Out",
+ "metric": "etcd_network_client_grpc_sent_bytes_total",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Client Traffic Out",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "id": 20,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 3,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(etcd_network_peer_received_bytes_total [1m])) by (instance)",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} Peer Traffic In",
+ "metric": "etcd_network_peer_received_bytes_total",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Peer Traffic In",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "decimals": null,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "id": 16,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 3,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(etcd_network_peer_sent_bytes_total [1m])) by (instance)",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} Peer Traffic Out",
+ "metric": "etcd_network_peer_sent_bytes_total",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Peer Traffic Out",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "New row",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "id": 40,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(etcd_server_proposals_failed_total [1m]))",
+ "intervalFactor": 2,
+ "legendFormat": "Proposal Failure Rate",
+ "metric": "etcd_server_proposals_failed_total",
+ "refId": "A",
+ "step": 2
+ },
+ {
+ "expr": "sum(etcd_server_proposals_pending)",
+ "intervalFactor": 2,
+ "legendFormat": "Proposal Pending Total",
+ "metric": "etcd_server_proposals_pending",
+ "refId": "B",
+ "step": 2
+ },
+ {
+ "expr": "sum(rate(etcd_server_proposals_committed_total [1m]))",
+ "intervalFactor": 2,
+ "legendFormat": "Proposal Commit Rate",
+ "metric": "etcd_server_proposals_committed_total",
+ "refId": "C",
+ "step": 2
+ },
+ {
+ "expr": "sum(rate(etcd_server_proposals_applied_total [1m]))",
+ "intervalFactor": 2,
+ "legendFormat": "Proposal Apply Rate",
+ "refId": "D",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Raft Proposals",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "decimals": 0,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "id": 19,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "etcd_server_leader_changes_seen_total",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} Leader Change Seen",
+ "metric": "etcd_server_leader_changes_seen_total",
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Rate Leader Elections",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "New row",
+ "titleSize": "h6"
+ }
+ ],
+ "schemaVersion": 14,
+ "style": "dark",
+ "tags": [],
+ "templating": {
+ "list": []
+ },
+ "time": {
+ "from": "now-15m",
+ "to": "now"
+ },
+ "timepicker": {
+ "now": true,
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "browser",
+ "title": "Etcd Cluster",
+ "version": 0
+}
diff --git a/etcd/meta/grafana.yml b/etcd/meta/grafana.yml
index 810f5cf..f189fd7 100644
--- a/etcd/meta/grafana.yml
+++ b/etcd/meta/grafana.yml
@@ -1,4 +1,22 @@
+{%- from "etcd/map.jinja" import server with context %}
+
dashboard:
+{%- if server.monitoring is defined %}
+ {%- set backend = server.monitoring.get('backend') %}
+ {%- if backend is string %}
+ {%- set backend = [backend] %}
+ {%- endif %}
+{%- else %}
+{# legacy StackLight behavior #}
+{% set backend = ['influxdb'] %}
+{%- endif %}
+
+{%- if 'prometheus' in backend %}
+ etcd-cluster-prometheus:
+ format: json
+ template: etcd/files/grafana_dashboards/etcd_prometheus.json.json
+{%- endif %}
+{%- if 'influxdb' in backend %}
etcd:
format: json
template: etcd/files/grafana_dashboards/etcd_influxdb.json
@@ -17,3 +35,4 @@
cluster_status:
rawQuery: true
query: SELECT last(value) FROM cluster_status WHERE cluster_name = 'etcd' AND environment_label = '$environment' AND $timeFilter GROUP BY time($interval) fill(null)
+{%- endif %}