Support Grafana dashboards for Prometheus
Change-Id: I053e9d93f6feb2026e3fd63b4151d0907764cb92
diff --git a/kubernetes/files/grafana_dashboards/kubernetes_app_prometheus.json b/kubernetes/files/grafana_dashboards/kubernetes_app_prometheus.json
new file mode 100644
index 0000000..ffcea9c
--- /dev/null
+++ b/kubernetes/files/grafana_dashboards/kubernetes_app_prometheus.json
@@ -0,0 +1,1191 @@
+{% raw %}{
+ "annotations": {
+ "list": []
+ },
+ "description": "After selecting your namespace and container you get a wealth of metrics like request rate, error rate, response times, pod count, cpu and memory usage. You can view cpu and memory usage in a variety of ways, compared to the limit, compared to the request, per pod, average per pod, etc.",
+ "editable": true,
+ "gnetId": 1471,
+ "graphTooltip": 1,
+ "hideControls": false,
+ "id": null,
+ "links": [],
+ "refresh": "1m",
+ "rows": [
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 3,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(http_requests_total{app=\"$container\", handler!=\"prometheus\", kubernetes_namespace=\"$namespace\"}[30s])) by (kubernetes_namespace,app,code)",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "native | {{code}}",
+ "refId": "A",
+ "step": 10
+ },
+ {
+ "expr": "sum(irate(nginx_http_requests_total{app=\"$container\", kubernetes_namespace=\"$namespace\"}[30s])) by (kubernetes_namespace,app,status)",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "nginx | {{status}}",
+ "refId": "B",
+ "step": 10
+ },
+ {
+ "expr": "sum(irate(haproxy_backend_http_responses_total{app=\"$container\", kubernetes_namespace=\"$namespace\"}[30s])) by (app,kubernetes_namespace,code)",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "haproxy | {{code}}",
+ "refId": "C",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Request rate",
+ "tooltip": {
+ "msResolution": true,
+ "shared": false,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "fill": 1,
+ "id": 15,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(haproxy_backend_http_responses_total{app=\"$container\", kubernetes_namespace=\"$namespace\",code=\"5xx\"}[30s])) by (app,kubernetes_namespace) / sum(irate(haproxy_backend_http_responses_total{app=\"$container\", kubernetes_namespace=\"$namespace\"}[30s])) by (app,kubernetes_namespace)",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "haproxy",
+ "refId": "A",
+ "step": 20
+ },
+ {
+ "expr": "sum(irate(http_requests_total{app=\"$container\", handler!=\"prometheus\", kubernetes_namespace=\"$namespace\", code=~\"5[0-9]+\"}[30s])) by (kubernetes_namespace,app) / sum(irate(http_requests_total{app=\"$container\", handler!=\"prometheus\", kubernetes_namespace=\"$namespace\"}[30s])) by (kubernetes_namespace,app)",
+ "intervalFactor": 2,
+ "legendFormat": "native",
+ "refId": "B",
+ "step": 20
+ },
+ {
+ "expr": "sum(irate(nginx_http_requests_total{app=\"$container\", kubernetes_namespace=\"$namespace\", status=~\"5[0-9]+\"}[30s])) by (kubernetes_namespace,app) / sum(irate(nginx_http_requests_total{app=\"$container\", kubernetes_namespace=\"$namespace\"}[30s])) by (kubernetes_namespace,app)",
+ "intervalFactor": 2,
+ "legendFormat": "nginx",
+ "refId": "C",
+ "step": 20
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Error rate",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Request rate",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 224,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 5,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "max",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(http_request_duration_microseconds{app=\"$container\", kubernetes_namespace=\"$namespace\"}[30s])) by (app,kubernetes_namespace,le))",
+ "intervalFactor": 1,
+ "legendFormat": "native | 0.99",
+ "metric": "http_requ",
+ "refId": "A",
+ "step": 5
+ },
+ {
+ "expr": "histogram_quantile(0.90, sum(rate(http_request_duration_seconds_bucket{app=\"$container\", kubernetes_namespace=\"$namespace\"}[30s])) by (app,kubernetes_namespace,le))",
+ "intervalFactor": 1,
+ "legendFormat": "native | 0.90",
+ "refId": "B",
+ "step": 5
+ },
+ {
+ "expr": "histogram_quantile(0.5, sum(rate(http_request_duration_seconds_bucket{app=\"$container\", kubernetes_namespace=\"$namespace\"}[30s])) by (app,kubernetes_namespace,le))",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "native | 0.50",
+ "refId": "C",
+ "step": 5
+ },
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(nginx_http_request_duration_seconds_bucket{app=\"$container\", kubernetes_namespace=\"$namespace\"}[30s])) by (app,kubernetes_namespace,le))",
+ "intervalFactor": 1,
+ "legendFormat": "nginx | 0.99",
+ "refId": "D",
+ "step": 5
+ },
+ {
+ "expr": "histogram_quantile(0.9, sum(rate(nginx_http_request_duration_seconds_bucket{app=\"$container\", kubernetes_namespace=\"$namespace\"}[30s])) by (app,kubernetes_namespace,le))",
+ "intervalFactor": 1,
+ "legendFormat": "nginx | 0.90",
+ "refId": "E",
+ "step": 5
+ },
+ {
+ "expr": "histogram_quantile(0.5, sum(rate(nginx_http_request_duration_seconds_bucket{app=\"$container\", kubernetes_namespace=\"$namespace\"}[30s])) by (app,kubernetes_namespace,le))",
+ "intervalFactor": 1,
+ "legendFormat": "nginx | 0.50",
+ "refId": "F",
+ "step": 5
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Response time percentiles",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Response time",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "id": 7,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "count(count(container_memory_usage_bytes{container_name=\"$container\", namespace=\"$namespace\"}) by (pod_name))",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "pods",
+ "refId": "A",
+ "step": 5
+ },
+ {
+ "expr": "count(count(container_memory_usage_bytes{container_name=\"$container\", namespace=\"$namespace\"}) by (kubernetes_io_hostname))",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "hosts",
+ "refId": "B",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Number of pods",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Pod count",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "id": 12,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "elasticsearch-logging-data-20170207a (logging) - system",
+ "color": "#BF1B00"
+ },
+ {
+ "alias": "elasticsearch-logging-data-20170207a (logging) - user",
+ "color": "#508642"
+ }
+ ],
+ "span": 12,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(container_cpu_system_seconds_total{container_name=\"$container\", namespace=\"$namespace\"}[30s])) by (namespace,container_name) / sum(container_spec_cpu_shares{container_name=\"$container\", namespace=\"$namespace\"} / 1024) by (namespace,container_name)",
+ "intervalFactor": 2,
+ "legendFormat": "system",
+ "refId": "C",
+ "step": 10
+ },
+ {
+ "expr": "sum(irate(container_cpu_user_seconds_total{container_name=\"$container\", namespace=\"$namespace\"}[30s])) by (namespace,container_name) / sum(container_spec_cpu_shares{container_name=\"$container\", namespace=\"$namespace\"} / 1024) by (namespace,container_name)",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "user",
+ "refId": "B",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Cpu usage (relative to request)",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "percentunit",
+ "label": "",
+ "logBase": 1,
+ "max": "1",
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Usage relative to request",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "fill": 1,
+ "id": 13,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(container_cpu_usage_seconds_total{container_name=\"$container\", namespace=\"$namespace\"}[30s])) by (id,pod_name)",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{pod_name}}",
+ "refId": "A",
+ "step": 20
+ },
+ {
+ "expr": "sum(container_spec_cpu_quota{container_name=\"$container\", namespace=\"$namespace\"} / container_spec_cpu_period{container_name=\"$container\", namespace=\"$namespace\"}) by (namespace,container_name) / count(container_memory_usage_bytes{container_name=\"$container\", namespace=\"$namespace\"}) by (namespace,container_name) ",
+ "intervalFactor": 2,
+ "legendFormat": "limit",
+ "refId": "B",
+ "step": 20
+ },
+ {
+ "expr": "sum(container_spec_cpu_shares{container_name=\"$container\", namespace=\"$namespace\"} / 1024) by (namespace,container_name) / count(container_spec_cpu_shares{container_name=\"$container\", namespace=\"$namespace\"}) by (namespace,container_name) ",
+ "intervalFactor": 2,
+ "legendFormat": "request",
+ "refId": "C",
+ "step": 20
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Cpu usage (per pod)",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "cores",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "fill": 1,
+ "id": 14,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(container_memory_usage_bytes{container_name=\"$container\", namespace=\"$namespace\"}) by (id,pod_name)",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{pod_name}}",
+ "refId": "A",
+ "step": 20
+ },
+ {
+ "expr": "sum(container_spec_memory_limit_bytes{container_name=\"$container\", namespace=\"$namespace\"}) by (namespace,container_name) / count(container_memory_usage_bytes{container_name=\"$container\", namespace=\"$namespace\"}) by (container_name,namespace)",
+ "intervalFactor": 2,
+ "legendFormat": "limit",
+ "refId": "B",
+ "step": 20
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Memory usage (per pod)",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Usage per pod",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "id": 8,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(container_cpu_usage_seconds_total{container_name=\"$container\", namespace=\"$namespace\"}[30s])) by (namespace,container_name) / count(container_memory_usage_bytes{container_name=\"$container\", namespace=\"$namespace\"}) by (namespace,container_name) ",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "actual",
+ "refId": "A",
+ "step": 10
+ },
+ {
+ "expr": "sum(container_spec_cpu_quota{container_name=\"$container\", namespace=\"$namespace\"} / container_spec_cpu_period{container_name=\"$container\", namespace=\"$namespace\"}) by (namespace,container_name) / count(container_memory_usage_bytes{container_name=\"$container\", namespace=\"$namespace\"}) by (namespace,container_name) ",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "limit",
+ "refId": "B",
+ "step": 10
+ },
+ {
+ "expr": "sum(container_spec_cpu_shares{container_name=\"$container\", namespace=\"$namespace\"} / 1024) by (namespace,container_name) / count(container_spec_cpu_shares{container_name=\"$container\", namespace=\"$namespace\"}) by (namespace,container_name) ",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "request",
+ "refId": "C",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Cpu usage (avg per pod)",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": "cores",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "id": 9,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(container_memory_usage_bytes{container_name=\"$container\", namespace=\"$namespace\"}) by (namespace,container_name) / count(container_memory_usage_bytes{container_name=\"$container\", namespace=\"$namespace\"}) by (namespace,container_name) ",
+ "intervalFactor": 1,
+ "legendFormat": "actual",
+ "metric": "",
+ "refId": "A",
+ "step": 10
+ },
+ {
+ "expr": "sum(container_spec_memory_limit_bytes{container_name=\"$container\", namespace=\"$namespace\"}) by (namespace,container_name) / count(container_memory_usage_bytes{container_name=\"$container\", namespace=\"$namespace\"}) by (namespace,container_name) ",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "limit",
+ "refId": "B",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Memory usage (avg per pod)",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Usage per pod (average)",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 259.4375,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 1,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(container_cpu_usage_seconds_total{container_name=\"$container\", namespace=\"$namespace\"}[30s])) by (namespace,container_name)",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "actual",
+ "metric": "",
+ "refId": "A",
+ "step": 10
+ },
+ {
+ "expr": "sum(container_spec_cpu_quota{container_name=\"$container\", namespace=\"$namespace\"} / container_spec_cpu_period{container_name=\"$container\", namespace=\"$namespace\"}) by (namespace,container_name)",
+ "intervalFactor": 1,
+ "legendFormat": "limit",
+ "refId": "B",
+ "step": 10
+ },
+ {
+ "expr": "sum(container_spec_cpu_shares{container_name=\"$container\", namespace=\"$namespace\"} / 1024) by (namespace,container_name) ",
+ "intervalFactor": 1,
+ "legendFormat": "request",
+ "refId": "C",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Cpu usage (total)",
+ "tooltip": {
+ "msResolution": true,
+ "shared": false,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": "cores",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 2,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(container_memory_usage_bytes{container_name=\"$container\", namespace=\"$namespace\"}) by (namespace,container_name)",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "actual",
+ "refId": "A",
+ "step": 10
+ },
+ {
+ "expr": "sum(container_spec_memory_limit_bytes{container_name=\"$container\", namespace=\"$namespace\"}) by (namespace,container_name)",
+ "intervalFactor": 1,
+ "legendFormat": "limit",
+ "refId": "B",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Memory usage (total)",
+ "tooltip": {
+ "msResolution": true,
+ "shared": false,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Usage total",
+ "titleSize": "h6"
+ }
+ ],
+ "schemaVersion": 14,
+ "sharedCrosshair": true,
+ "style": "dark",
+ "tags": [],
+ "templating": {
+ "list": [
+ {
+ "allValue": ".+",
+ "current": {},
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "namespace",
+ "options": [],
+ "query": "label_values(container_memory_usage_bytes{namespace=~\".+\",container_name!=\"POD\"},namespace)",
+ "refresh": 1,
+ "refresh_on_load": true,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": null,
+ "tags": [],
+ "tagsQuery": null,
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".+",
+ "current": {},
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "container",
+ "options": [],
+ "query": "label_values(container_memory_usage_bytes{namespace=~\"$namespace\",container_name!=\"POD\"},container_name)",
+ "refresh": 1,
+ "refresh_on_load": true,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": null,
+ "tags": [],
+ "tagsQuery": null,
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "browser",
+ "title": "Kubernetes App Metrics",
+ "version": 3
+}
+{% endraw %}
diff --git a/kubernetes/meta/grafana.yml b/kubernetes/meta/grafana.yml
index 867ff99..61f49bd 100644
--- a/kubernetes/meta/grafana.yml
+++ b/kubernetes/meta/grafana.yml
@@ -1,4 +1,22 @@
+{%- from "kubernetes/map.jinja" import common with context %}
+
dashboard:
+{%- if common.monitoring is defined %}
+ {%- set backend = common.monitoring.get('backend') %}
+ {%- if backend is string %}
+ {%- set backend = [backend] %}
+ {%- endif %}
+{%- else %}
+{# legacy StackLight behavior #}
+{% set backend = ['influxdb'] %}
+{%- endif %}
+
+{%- if 'prometheus' in backend %}
+ kubernetes-application-prometheus:
+ format: json
+ template: kubernetes/files/grafana_dashboards/kubernetes_app_prometheus.json
+{%- endif %}
+{%- if 'influxdb' in backend %}
kubernetes:
format: json
template: kubernetes/files/grafana_dashboards/kubernetes_influxdb.json
@@ -33,4 +51,4 @@
cluster_status:
rawQuery: true
query: SELECT last(value) FROM cluster_status WHERE cluster_name = 'k8s-pool' AND environment_label = '$environment' AND $timeFilter GROUP BY time($interval) fill(null)
-
+{%- endif %}
diff --git a/metadata/service/common.yml b/metadata/service/common.yml
index b02880d..7cb5766 100644
--- a/metadata/service/common.yml
+++ b/metadata/service/common.yml
@@ -1,3 +1,6 @@
+classes:
+- service.kubernetes.logging
+- service.kubernetes.monitoring
parameters:
kubernetes:
common:
diff --git a/metadata/service/control/cluster.yml b/metadata/service/control/cluster.yml
index 0965a11..944de40 100644
--- a/metadata/service/control/cluster.yml
+++ b/metadata/service/control/cluster.yml
@@ -1,8 +1,6 @@
applications:
- kubernetes
-classes:
-- service.kubernetes.logging
parameters:
kubernetes:
control:
- enabled: true
\ No newline at end of file
+ enabled: true
diff --git a/metadata/service/monitoring.yml b/metadata/service/monitoring.yml
new file mode 100644
index 0000000..cbf56a3
--- /dev/null
+++ b/metadata/service/monitoring.yml
@@ -0,0 +1,6 @@
+parameters:
+ kubernetes:
+ common:
+ monitoring:
+ backend: prometheus
+
diff --git a/metadata/service/pool/cluster.yml b/metadata/service/pool/cluster.yml
index cdf145e..7c42e12 100644
--- a/metadata/service/pool/cluster.yml
+++ b/metadata/service/pool/cluster.yml
@@ -1,7 +1,6 @@
applications:
- kubernetes
classes:
-- service.kubernetes.logging
- service.kubernetes.support
- service.kubernetes.common
parameters:
diff --git a/metadata/service/pool/single.yml b/metadata/service/pool/single.yml
index a06e46b..b01a719 100644
--- a/metadata/service/pool/single.yml
+++ b/metadata/service/pool/single.yml
@@ -1,7 +1,6 @@
applications:
- kubernetes
classes:
-- service.kubernetes.logging
- service.kubernetes.support
- service.kubernetes.common
parameters: