Add Grafana support
Change-Id: Ie04d826ffc427f86e7aed7c66e469b5abf91e8d7
diff --git a/kubernetes/files/grafana_dashboards/kubernetes_influxdb.json b/kubernetes/files/grafana_dashboards/kubernetes_influxdb.json
new file mode 100644
index 0000000..0f57f9a
--- /dev/null
+++ b/kubernetes/files/grafana_dashboards/kubernetes_influxdb.json
@@ -0,0 +1,2025 @@
+{
+ "annotations": {
+ "list": [
+ {
+ "datasource": "lma",
+ "enable": true,
+ "iconColor": "rgba(255, 96, 96, 1)",
+ "limit": 100,
+ "name": "status",
+ "query": "select title,tags,text from annotations where $timeFilter and cluster =~ /k8s/",
+ "tagsColumn": "tags",
+ "textColumn": "text",
+ "titleColumn": "title",
+ "type": "alert"
+ }
+ ]
+ },
+ "editable": true,
+ "gnetId": null,
+ "graphTooltip": 1,
+ "hideControls": false,
+ "id": null,
+ "links": [],
+ "refresh": "1m",
+ "rows": [
+ {
+ "collapse": false,
+ "height": 250,
+ "panels": [
+ {
+ "cacheTimeout": null,
+ "colorBackground": true,
+ "colorValue": false,
+ "colors": [
+ "rgba(71, 212, 59, 0.4)",
+ "rgba(241, 181, 37, 0.73)",
+ "rgba(225, 40, 40, 0.59)"
+ ],
+ "editable": true,
+ "error": false,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 2,
+ "interval": "> 60s",
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "targets": [
+ {
+ "column": "value",
+ "condition": "",
+ "dsType": "influxdb",
+ "fill": "",
+ "function": "last",
+ "groupBy": [],
+ "groupByTags": [],
+ "groupby_field": "",
+ "interval": "",
+ "measurement": "cluster_status",
+ "policy": "default",
+ "query": "SELECT last(\"value\") FROM \"nginx\" WHERE \"environment_label\" = '$environment' AND $timeFilter GROUP BY time($interval) fill(null)",
+ "rawQuery": false,
+ "refId": "A",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "value"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "last"
+ }
+ ]
+ ],
+ "tags": [
+ {
+ "key": "environment_label",
+ "operator": "=~",
+ "value": "/^$environment$/"
+ },
+ {
+ "condition": "AND",
+ "key": "cluster_name",
+ "operator": "=",
+ "value": "k8s-master"
+ }
+ ]
+ }
+ ],
+ "thresholds": "1,3",
+ "title": "k8s-master",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "no data",
+ "value": "null"
+ },
+ {
+ "op": "=",
+ "text": "OKAY",
+ "value": "0"
+ },
+ {
+ "op": "=",
+ "text": "WARN",
+ "value": "1"
+ },
+ {
+ "op": "=",
+ "text": "UNKW",
+ "value": "2"
+ },
+ {
+ "op": "=",
+ "text": "CRIT",
+ "value": "3"
+ },
+ {
+ "op": "=",
+ "text": "DOWN",
+ "value": "4"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": true,
+ "colorValue": false,
+ "colors": [
+ "rgba(71, 212, 59, 0.4)",
+ "rgba(241, 181, 37, 0.73)",
+ "rgba(225, 40, 40, 0.59)"
+ ],
+ "editable": true,
+ "error": false,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 3,
+ "interval": "> 60s",
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "targets": [
+ {
+ "column": "value",
+ "condition": "",
+ "dsType": "influxdb",
+ "fill": "",
+ "function": "last",
+ "groupBy": [],
+ "groupByTags": [],
+ "groupby_field": "",
+ "interval": "",
+ "measurement": "cluster_status",
+ "policy": "default",
+ "query": "SELECT last(\"value\") FROM \"nginx\" WHERE \"environment_label\" = '$environment' AND $timeFilter GROUP BY time($interval) fill(null)",
+ "rawQuery": false,
+ "refId": "A",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "value"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "last"
+ }
+ ]
+ ],
+ "tags": [
+ {
+ "key": "environment_label",
+ "operator": "=~",
+ "value": "/^$environment$/"
+ },
+ {
+ "condition": "AND",
+ "key": "cluster_name",
+ "operator": "=",
+ "value": "k8s-pool"
+ }
+ ]
+ }
+ ],
+ "thresholds": "1,3",
+ "title": "k8s-pool",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "no data",
+ "value": "null"
+ },
+ {
+ "op": "=",
+ "text": "OKAY",
+ "value": "0"
+ },
+ {
+ "op": "=",
+ "text": "WARN",
+ "value": "1"
+ },
+ {
+ "op": "=",
+ "text": "UNKW",
+ "value": "2"
+ },
+ {
+ "op": "=",
+ "text": "CRIT",
+ "value": "3"
+ },
+ {
+ "op": "=",
+ "text": "DOWN",
+ "value": "4"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 12,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "targets": [
+ {
+ "dsType": "influxdb",
+ "groupBy": [
+ {
+ "params": [
+ "$interval"
+ ],
+ "type": "time"
+ },
+ {
+ "params": [
+ "null"
+ ],
+ "type": "fill"
+ }
+ ],
+ "measurement": "k8s_nodes_total",
+ "policy": "default",
+ "refId": "A",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "value"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "last"
+ }
+ ]
+ ],
+ "tags": []
+ },
+ {
+ "dsType": "influxdb",
+ "groupBy": [
+ {
+ "params": [
+ "$interval"
+ ],
+ "type": "time"
+ },
+ {
+ "params": [
+ "null"
+ ],
+ "type": "fill"
+ }
+ ],
+ "hide": true,
+ "measurement": "k8s_nodes",
+ "policy": "default",
+ "refId": "B",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "value"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "last"
+ }
+ ]
+ ],
+ "tags": [
+ {
+ "key": "environment_label",
+ "operator": "=~",
+ "value": "/^$environment$/"
+ },
+ {
+ "condition": "AND",
+ "key": "status",
+ "operator": "=",
+ "value": "not_ready"
+ }
+ ]
+ }
+ ],
+ "thresholds": "",
+ "title": "Total compute nodes",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "avg"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "format": "percent",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 13,
+ "interval": "30s",
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "targets": [
+ {
+ "dsType": "influxdb",
+ "groupBy": [],
+ "hide": false,
+ "measurement": "k8s_nodes_percent",
+ "policy": "default",
+ "query": "SELECT 100 - last(\"value\") FROM \"k8s_nodes_percent\" WHERE \"environment_label\" =~ /^$environment$/ AND \"status\" = 'not_ready' AND $timeFilter",
+ "rawQuery": true,
+ "refId": "B",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "value"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "last"
+ }
+ ]
+ ],
+ "tags": [
+ {
+ "key": "environment_label",
+ "operator": "=~",
+ "value": "/^$environment$/"
+ },
+ {
+ "condition": "AND",
+ "key": "status",
+ "operator": "=",
+ "value": "not_ready"
+ }
+ ]
+ }
+ ],
+ "thresholds": "0,100",
+ "title": "Compute - ready",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "avg"
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Master",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "id": 19,
+ "interval": "> 60s",
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 2,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "alias": "user $tag_service",
+ "column": "value",
+ "dsType": "influxdb",
+ "function": "mean",
+ "groupBy": [
+ {
+ "params": [
+ "auto"
+ ],
+ "type": "time"
+ },
+ {
+ "params": [
+ "service"
+ ],
+ "type": "tag"
+ },
+ {
+ "params": [
+ "0"
+ ],
+ "type": "fill"
+ }
+ ],
+ "measurement": "process_threads",
+ "policy": "default",
+ "query": "SELECT mean(\"value\") FROM \"lma_components_memory_rss\" WHERE \"service\" = 'grafana-server' AND \"hostname\" =~ /$server$/ AND $timeFilter GROUP BY time($interval) fill(0)",
+ "refId": "A",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "value"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "last"
+ }
+ ]
+ ],
+ "tags": [
+ {
+ "key": "service",
+ "operator": "=~",
+ "value": "/^$process$/"
+ },
+ {
+ "condition": "AND",
+ "key": "environment_label",
+ "operator": "=~",
+ "value": "/^$environment$/"
+ },
+ {
+ "condition": "AND",
+ "key": "hostname",
+ "operator": "=~",
+ "value": "/^$master$/"
+ }
+ ]
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Threads",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "id": 6,
+ "interval": "> 60s",
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": true,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 4,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "alias": "user $tag_service",
+ "column": "value",
+ "dsType": "influxdb",
+ "function": "mean",
+ "groupBy": [
+ {
+ "params": [
+ "auto"
+ ],
+ "type": "time"
+ },
+ {
+ "params": [
+ "service"
+ ],
+ "type": "tag"
+ },
+ {
+ "params": [
+ "0"
+ ],
+ "type": "fill"
+ }
+ ],
+ "measurement": "process_cputime_user",
+ "policy": "default",
+ "query": "SELECT mean(\"value\") FROM \"lma_components_memory_rss\" WHERE \"service\" = 'grafana-server' AND \"hostname\" =~ /$server$/ AND $timeFilter GROUP BY time($interval) fill(0)",
+ "refId": "A",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "value"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "mean"
+ }
+ ]
+ ],
+ "tags": [
+ {
+ "key": "service",
+ "operator": "=~",
+ "value": "/^$process$/"
+ },
+ {
+ "condition": "AND",
+ "key": "environment_label",
+ "operator": "=~",
+ "value": "/^$environment$/"
+ },
+ {
+ "condition": "AND",
+ "key": "hostname",
+ "operator": "=~",
+ "value": "/^$master$/"
+ }
+ ]
+ },
+ {
+ "alias": "sys $tag_service",
+ "column": "value",
+ "dsType": "influxdb",
+ "function": "mean",
+ "groupBy": [
+ {
+ "params": [
+ "auto"
+ ],
+ "type": "time"
+ },
+ {
+ "params": [
+ "service"
+ ],
+ "type": "tag"
+ },
+ {
+ "params": [
+ "0"
+ ],
+ "type": "fill"
+ }
+ ],
+ "measurement": "process_cputime_syst",
+ "policy": "default",
+ "query": "SELECT mean(\"value\") FROM \"lma_components_memory_rss\" WHERE \"service\" = 'grafana-server' AND \"hostname\" =~ /$server$/ AND $timeFilter GROUP BY time($interval) fill(0)",
+ "refId": "B",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "value"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "mean"
+ }
+ ]
+ ],
+ "tags": [
+ {
+ "key": "hostname",
+ "operator": "=~",
+ "value": "/^$master$/"
+ },
+ {
+ "condition": "AND",
+ "key": "environment_label",
+ "operator": "=~",
+ "value": "/^$environment$/"
+ },
+ {
+ "condition": "AND",
+ "key": "service",
+ "operator": "=~",
+ "value": "/^$process$/"
+ }
+ ]
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU usage",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "percent",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "id": 1,
+ "interval": "> 60s",
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": true,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 3,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "alias": "$tag_service",
+ "dsType": "influxdb",
+ "groupBy": [
+ {
+ "params": [
+ "$interval"
+ ],
+ "type": "time"
+ },
+ {
+ "params": [
+ "service"
+ ],
+ "type": "tag"
+ },
+ {
+ "params": [
+ "null"
+ ],
+ "type": "fill"
+ }
+ ],
+ "measurement": "process_memory_rss",
+ "policy": "default",
+ "refId": "A",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "value"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "mean"
+ }
+ ]
+ ],
+ "tags": [
+ {
+ "key": "hostname",
+ "operator": "=~",
+ "value": "/^$master$/"
+ },
+ {
+ "condition": "AND",
+ "key": "environment_label",
+ "operator": "=~",
+ "value": "/^$environment$/"
+ },
+ {
+ "condition": "AND",
+ "key": "service",
+ "operator": "=~",
+ "value": "/^$process$/"
+ }
+ ]
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Resident Set Size",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "id": 7,
+ "interval": "> 60s",
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": true,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 3,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "alias": "read $tag_service",
+ "column": "value",
+ "dsType": "influxdb",
+ "function": "mean",
+ "groupBy": [
+ {
+ "params": [
+ "auto"
+ ],
+ "type": "time"
+ },
+ {
+ "params": [
+ "service"
+ ],
+ "type": "tag"
+ },
+ {
+ "params": [
+ "0"
+ ],
+ "type": "fill"
+ }
+ ],
+ "measurement": "process_disk_ops_read",
+ "policy": "default",
+ "query": "SELECT mean(\"value\") FROM \"lma_components_memory_rss\" WHERE \"service\" = 'grafana-server' AND \"hostname\" =~ /$server$/ AND $timeFilter GROUP BY time($interval) fill(0)",
+ "refId": "A",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "value"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "mean"
+ }
+ ]
+ ],
+ "tags": [
+ {
+ "key": "service",
+ "operator": "=~",
+ "value": "/^$process$/"
+ },
+ {
+ "condition": "AND",
+ "key": "hostname",
+ "operator": "=~",
+ "value": "/^$master$/"
+ },
+ {
+ "condition": "AND",
+ "key": "environment_label",
+ "operator": "=~",
+ "value": "/^$environment$/"
+ }
+ ]
+ },
+ {
+ "alias": "write $tag_service",
+ "column": "value",
+ "dsType": "influxdb",
+ "function": "mean",
+ "groupBy": [
+ {
+ "params": [
+ "auto"
+ ],
+ "type": "time"
+ },
+ {
+ "params": [
+ "service"
+ ],
+ "type": "tag"
+ },
+ {
+ "params": [
+ "0"
+ ],
+ "type": "fill"
+ }
+ ],
+ "measurement": "process_disk_bytes_write",
+ "policy": "default",
+ "query": "SELECT mean(\"value\") FROM \"lma_components_memory_rss\" WHERE \"service\" = 'grafana-server' AND \"hostname\" =~ /$server$/ AND $timeFilter GROUP BY time($interval) fill(0)",
+ "refId": "B",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "value"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "mean"
+ }
+ ]
+ ],
+ "tags": [
+ {
+ "key": "service",
+ "operator": "=~",
+ "value": "/k8s/"
+ },
+ {
+ "condition": "AND",
+ "key": "hostname",
+ "operator": "=~",
+ "value": "/^$master$/"
+ },
+ {
+ "condition": "AND",
+ "key": "service",
+ "operator": "=~",
+ "value": "/^$process$/"
+ }
+ ]
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Disk IO",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Master $master $process",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "id": 20,
+ "interval": "> 60s",
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 2,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "alias": "user $tag_service",
+ "column": "value",
+ "dsType": "influxdb",
+ "function": "mean",
+ "groupBy": [
+ {
+ "params": [
+ "auto"
+ ],
+ "type": "time"
+ },
+ {
+ "params": [
+ "service"
+ ],
+ "type": "tag"
+ },
+ {
+ "params": [
+ "0"
+ ],
+ "type": "fill"
+ }
+ ],
+ "measurement": "process_threads",
+ "policy": "default",
+ "query": "SELECT mean(\"value\") FROM \"lma_components_memory_rss\" WHERE \"service\" = 'grafana-server' AND \"hostname\" =~ /$server$/ AND $timeFilter GROUP BY time($interval) fill(0)",
+ "refId": "A",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "value"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "last"
+ }
+ ]
+ ],
+ "tags": [
+ {
+ "key": "service",
+ "operator": "=~",
+ "value": "/^$process$/"
+ },
+ {
+ "condition": "AND",
+ "key": "environment_label",
+ "operator": "=~",
+ "value": "/^$environment$/"
+ },
+ {
+ "condition": "AND",
+ "key": "hostname",
+ "operator": "=~",
+ "value": "/^$pool$/"
+ }
+ ]
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Threads",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "id": 16,
+ "interval": "> 60s",
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 4,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "alias": "user $tag_service",
+ "column": "value",
+ "dsType": "influxdb",
+ "function": "mean",
+ "groupBy": [
+ {
+ "params": [
+ "auto"
+ ],
+ "type": "time"
+ },
+ {
+ "params": [
+ "service"
+ ],
+ "type": "tag"
+ },
+ {
+ "params": [
+ "0"
+ ],
+ "type": "fill"
+ }
+ ],
+ "measurement": "process_cputime_user",
+ "policy": "default",
+ "query": "SELECT mean(\"value\") FROM \"lma_components_memory_rss\" WHERE \"service\" = 'grafana-server' AND \"hostname\" =~ /$server$/ AND $timeFilter GROUP BY time($interval) fill(0)",
+ "refId": "A",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "value"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "mean"
+ }
+ ]
+ ],
+ "tags": [
+ {
+ "key": "service",
+ "operator": "=~",
+ "value": "/^$process$/"
+ },
+ {
+ "condition": "AND",
+ "key": "environment_label",
+ "operator": "=~",
+ "value": "/^$environment$/"
+ },
+ {
+ "condition": "AND",
+ "key": "hostname",
+ "operator": "=~",
+ "value": "/^$pool$/"
+ }
+ ]
+ },
+ {
+ "alias": "sys $tag_service",
+ "column": "value",
+ "dsType": "influxdb",
+ "function": "mean",
+ "groupBy": [
+ {
+ "params": [
+ "auto"
+ ],
+ "type": "time"
+ },
+ {
+ "params": [
+ "service"
+ ],
+ "type": "tag"
+ },
+ {
+ "params": [
+ "0"
+ ],
+ "type": "fill"
+ }
+ ],
+ "measurement": "process_cputime_syst",
+ "policy": "default",
+ "query": "SELECT mean(\"value\") FROM \"lma_components_memory_rss\" WHERE \"service\" = 'grafana-server' AND \"hostname\" =~ /$server$/ AND $timeFilter GROUP BY time($interval) fill(0)",
+ "refId": "B",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "value"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "mean"
+ }
+ ]
+ ],
+ "tags": [
+ {
+ "key": "hostname",
+ "operator": "=~",
+ "value": "/^$pool$/"
+ },
+ {
+ "condition": "AND",
+ "key": "environment_label",
+ "operator": "=~",
+ "value": "/^$environment$/"
+ },
+ {
+ "condition": "AND",
+ "key": "service",
+ "operator": "=~",
+ "value": "/^$process$/"
+ }
+ ]
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU usage",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "percent",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "id": 17,
+ "interval": "> 60s",
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": true,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 3,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "alias": "rss",
+ "dsType": "influxdb",
+ "groupBy": [
+ {
+ "params": [
+ "$interval"
+ ],
+ "type": "time"
+ },
+ {
+ "params": [
+ "service"
+ ],
+ "type": "tag"
+ },
+ {
+ "params": [
+ "null"
+ ],
+ "type": "fill"
+ }
+ ],
+ "measurement": "process_memory_rss",
+ "policy": "default",
+ "refId": "A",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "value"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "mean"
+ }
+ ]
+ ],
+ "tags": [
+ {
+ "key": "hostname",
+ "operator": "=~",
+ "value": "/^$pool$/"
+ },
+ {
+ "condition": "AND",
+ "key": "environment_label",
+ "operator": "=~",
+ "value": "/^$environment$/"
+ },
+ {
+ "condition": "AND",
+ "key": "service",
+ "operator": "=~",
+ "value": "/^$process$/"
+ }
+ ]
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Resident Set Size",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "id": 18,
+ "interval": "> 60s",
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": true,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 3,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "alias": "read $tag_service",
+ "column": "value",
+ "dsType": "influxdb",
+ "function": "mean",
+ "groupBy": [
+ {
+ "params": [
+ "auto"
+ ],
+ "type": "time"
+ },
+ {
+ "params": [
+ "service"
+ ],
+ "type": "tag"
+ },
+ {
+ "params": [
+ "0"
+ ],
+ "type": "fill"
+ }
+ ],
+ "measurement": "process_disk_ops_read",
+ "policy": "default",
+ "query": "SELECT mean(\"value\") FROM \"lma_components_memory_rss\" WHERE \"service\" = 'grafana-server' AND \"hostname\" =~ /$server$/ AND $timeFilter GROUP BY time($interval) fill(0)",
+ "refId": "A",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "value"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "mean"
+ }
+ ]
+ ],
+ "tags": [
+ {
+ "key": "service",
+ "operator": "=~",
+ "value": "/^$process$/"
+ },
+ {
+ "condition": "AND",
+ "key": "hostname",
+ "operator": "=~",
+ "value": "/^$pool$/"
+ },
+ {
+ "condition": "AND",
+ "key": "environment_label",
+ "operator": "=~",
+ "value": "/^$environment$/"
+ }
+ ]
+ },
+ {
+ "alias": "write $tag_service",
+ "column": "value",
+ "dsType": "influxdb",
+ "function": "mean",
+ "groupBy": [
+ {
+ "params": [
+ "auto"
+ ],
+ "type": "time"
+ },
+ {
+ "params": [
+ "service"
+ ],
+ "type": "tag"
+ },
+ {
+ "params": [
+ "0"
+ ],
+ "type": "fill"
+ }
+ ],
+ "measurement": "process_disk_bytes_write",
+ "policy": "default",
+ "query": "SELECT mean(\"value\") FROM \"lma_components_memory_rss\" WHERE \"service\" = 'grafana-server' AND \"hostname\" =~ /$server$/ AND $timeFilter GROUP BY time($interval) fill(0)",
+ "refId": "B",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "value"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "mean"
+ }
+ ]
+ ],
+ "tags": [
+ {
+ "key": "service",
+ "operator": "=~",
+ "value": "/k8s/"
+ },
+ {
+ "condition": "AND",
+ "key": "hostname",
+ "operator": "=~",
+ "value": "/^$pool$/"
+ },
+ {
+ "condition": "AND",
+ "key": "service",
+ "operator": "=~",
+ "value": "/^$process$/"
+ }
+ ]
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Disk IO",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Pool $pool $process",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 250,
+ "panels": [],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6"
+ }
+ ],
+ "schemaVersion": 14,
+ "sharedCrosshair": true,
+ "style": "dark",
+ "tags": [],
+ "templating": {
+ "list": [
+ {
+ "allValue": null,
+ "current": {},
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "environment",
+ "options": [],
+ "query": "show tag values from cpu_idle with key = environment_label",
+ "refresh": 1,
+ "refresh_on_load": true,
+ "regex": "",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {},
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "master",
+ "options": [],
+ "query": "show tag values from process_threads with key=\"hostname\" where service =~ /scheduler/ and environment_label =~ /^$environment$/ ",
+ "refresh": 1,
+ "refresh_on_load": true,
+ "regex": "",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {},
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "pool",
+ "options": [],
+ "query": "show tag values from process_threads with key=\"hostname\" where service =~ /kubelet/ and environment_label =~ /^$environment$/ ",
+ "refresh": 1,
+ "refresh_on_load": true,
+ "regex": "",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {},
+ "hide": 0,
+ "includeAll": true,
+ "label": null,
+ "multi": true,
+ "name": "process",
+ "options": [],
+ "query": "show tag values from process_threads with key=\"service\" where service =~ /k8s/",
+ "refresh": 1,
+ "refresh_on_load": true,
+ "regex": "",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "browser",
+ "title": "Kubernetes",
+ "version": 8
+}
diff --git a/kubernetes/meta/grafana.yml b/kubernetes/meta/grafana.yml
new file mode 100644
index 0000000..f4fd87b
--- /dev/null
+++ b/kubernetes/meta/grafana.yml
@@ -0,0 +1,33 @@
+dashboard:
+ kubernetes:
+ format: json
+ template: kubernetes/files/grafana_dashboards/kubernetes_influxdb.json
+ main:
+ row:
+ kubernetes-control-plane:
+ title: Kubernetes Control Plane
+ panel:
+ kubernetes:
+ title: Kubernetes
+ links:
+ - dashboard: Kubernetes
+ title: Kubernetes
+ type: dashboard
+ target:
+ cluster_status:
+ rawQuery: true
+ query: SELECT last(value) FROM cluster_status WHERE cluster_name = 'k8s_master' AND environment_label = '$environment' AND $timeFilter GROUP BY time($interval) fill(null)
+ kubernetes-data-plane:
+ title: Kubernetes Data Plane
+ panel:
+ kubernetes:
+ title: Kubernetes
+ links:
+ - dashboard: Kubernetes
+ title: Kubernetes
+ type: dashboard
+ target:
+ cluster_status:
+ rawQuery: true
+ query: SELECT last(value) FROM cluster_status WHERE cluster_name = 'k8s_pool' AND environment_label = '$environment' AND $timeFilter GROUP BY time($interval) fill(null)
+
diff --git a/metadata/service/support.yml b/metadata/service/support.yml
index 6f58f72..a9a0c0e 100644
--- a/metadata/service/support.yml
+++ b/metadata/service/support.yml
@@ -5,6 +5,8 @@
enabled: true
heka:
enabled: true
+ grafana:
+ enabled: true
sensu:
enabled: false
sphinx: