keep old monitoring if ceph is older than luminous
Change-Id: Ifb85e4beeb2f5b62521399cf2b22aea3b0938dd8
Related-Prod: PROD-23021
diff --git a/ceph/files/grafana_dashboards/legacy/ceph_cluster_prometheus.json b/ceph/files/grafana_dashboards/legacy/ceph_cluster_prometheus.json
new file mode 100644
index 0000000..eb1fe5c
--- /dev/null
+++ b/ceph/files/grafana_dashboards/legacy/ceph_cluster_prometheus.json
@@ -0,0 +1,2125 @@
+{%- raw %}
+{
+ "annotations": {
+ "list": []
+ },
+ "description": "Ceph Cluster overview.\r\n",
+ "editable": true,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "id": null,
+ "links": [],
+ "refresh": "1m",
+ "rows": [
+ {
+ "collapse": false,
+ "height": 101,
+ "panels": [
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": true,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "prometheus",
+ "editable": true,
+ "error": false,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 21,
+ "interval": "1m",
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "min(ceph_overall_health) without (instance)",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 60
+ }
+ ],
+ "thresholds": "2,2",
+ "title": "Health Status",
+ "transparent": false,
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ },
+ {
+ "op": "=",
+ "text": "HEALTHY",
+ "value": "1"
+ },
+ {
+ "op": "=",
+ "text": "CRITICAL",
+ "value": "3"
+ },
+ {
+ "op": "=",
+ "text": "WARNING",
+ "value": "2"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": true,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": "prometheus",
+ "editable": true,
+ "error": false,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 14,
+ "interval": "1m",
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 1,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "avg(ceph_num_mon_quorum)",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 60
+ }
+ ],
+ "thresholds": "2,3",
+ "title": "Monitors",
+ "transparent": false,
+ "type": "singlestat",
+ "valueFontSize": "100%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": "prometheus",
+ "editable": true,
+ "error": false,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 26,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 1,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "min(ceph_osdmap_num_in_osds) without (instance)",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "thresholds": "",
+ "title": "OSDs IN",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": true,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 40, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "prometheus",
+ "editable": true,
+ "error": false,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 27,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 1,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "min(ceph_osdmap_num_osds - ceph_osdmap_num_in_osds) without (instance)",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "thresholds": "1,1",
+ "title": "OSDs OUT",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": "prometheus",
+ "editable": true,
+ "error": false,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 28,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 1,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "min(ceph_osdmap_num_up_osds) without (instance)",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "thresholds": "",
+ "title": "OSDs UP",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": true,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 40, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "prometheus",
+ "editable": true,
+ "error": false,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 29,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "min(ceph_osdmap_num_osds - ceph_osdmap_num_up_osds) without (instance)",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "thresholds": "1,1",
+ "title": "OSDs DOWN",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(249, 164, 30, 0.97)",
+ "rgba(96, 197, 32, 0.89)",
+ "rgba(245, 163, 54, 0.9)"
+ ],
+ "datasource": "prometheus",
+ "decimals": null,
+ "description": "Average number of PGs per OSD.",
+ "editable": true,
+ "error": false,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 36,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "min(ceph_pgmap_num_pgs/ceph_osdmap_num_in_osds) without (instance)",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "ceph_num_osd_in",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "thresholds": "",
+ "title": "Avg PGs per OSD",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(249, 164, 30, 0.97)",
+ "rgba(96, 197, 32, 0.89)",
+ "rgba(245, 163, 54, 0.9)"
+ ],
+ "datasource": "prometheus",
+ "decimals": null,
+ "description": "Total number of PGs.",
+ "editable": true,
+ "error": false,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 37,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "min(ceph_pgmap_num_pgs) without (instance)",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "ceph_num_osd_in",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "thresholds": "",
+ "title": "PGs total",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "New row",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 134,
+ "panels": [
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": "prometheus",
+ "editable": true,
+ "error": false,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 22,
+ "interval": "1m",
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 1,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": true,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "max(count(ceph_pool_usage_objects) by (instance))",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 60
+ }
+ ],
+ "thresholds": "",
+ "title": "Pools",
+ "transparent": false,
+ "type": "singlestat",
+ "valueFontSize": "100%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "0",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "prometheus",
+ "editable": true,
+ "error": false,
+ "format": "bytes",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 33,
+ "interval": "1m",
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 3,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": true,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "min(ceph_usage_total_bytes) without (instance)",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 60
+ }
+ ],
+ "thresholds": "0.025,0.1",
+ "title": "Cluster Capacity",
+ "transparent": false,
+ "type": "singlestat",
+ "valueFontSize": "100%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "prometheus",
+ "editable": true,
+ "error": false,
+ "format": "bytes",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 34,
+ "interval": "1m",
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 3,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": true,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "min(ceph_usage_total_used_bytes) without (instance)",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 60
+ }
+ ],
+ "thresholds": "0.025,0.1",
+ "title": "Used Capacity",
+ "transparent": false,
+ "type": "singlestat",
+ "valueFontSize": "100%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": "prometheus",
+ "editable": true,
+ "error": false,
+ "format": "percentunit",
+ "gauge": {
+ "maxValue": 1,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 23,
+ "interval": "1m",
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 5,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": true,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "min(ceph_usage_total_avail_bytes/ceph_usage_total_bytes) without (instance)",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 60
+ }
+ ],
+ "thresholds": "0.2,0.3",
+ "title": "Available Capacity",
+ "transparent": false,
+ "type": "singlestat",
+ "valueFontSize": "100%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Capacity",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 139,
+ "panels": [
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": true,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "prometheus",
+ "editable": true,
+ "error": false,
+ "format": "ms",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 35,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 3.5,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": true,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "avg(ceph_op_latency_sum) / avg(ceph_op_latency_avgcount)\n",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "ceph_op_latency_avgcount",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "thresholds": "0.01,0.05",
+ "title": "Average OSD Op Latency RW",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "avg"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": true,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "prometheus",
+ "editable": true,
+ "error": false,
+ "format": "ms",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 31,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 4,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": true,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "(avg(ceph_apply_latency_sum) / avg(ceph_apply_latency_avgcount)) or (avg(ceph_commit_latency_sum) / avg(ceph_commit_latency_avgcount))",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "thresholds": "0.01,0.05",
+ "title": "Avg OSD Apply Latency",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "avg"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": true,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "prometheus",
+ "editable": true,
+ "error": false,
+ "format": "ms",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 32,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "repeat": null,
+ "span": 4,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": true,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "Value",
+ "targets": [
+ {
+ "expr": "avg(ceph_commit_latency_sum) / avg(ceph_commit_latency_avgcount)",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "thresholds": "0.01,0.05",
+ "timeFrom": null,
+ "title": "Avg OSD Commit Latency",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "avg"
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Latency",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 259,
+ "panels": [
+ {
+ "aliasColors": {
+ "Total Capacity": "#7EB26D",
+ "Used": "#BF1B00",
+ "total_avail": "#6ED0E0",
+ "total_space": "#7EB26D",
+ "total_used": "#890F02"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "prometheus",
+ "decimals": 0,
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "height": "300",
+ "id": 3,
+ "interval": "$interval",
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "minSpan": null,
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "ceph_pgmap_write_op_per_sec or absent(ceph_pgmap_write_op_per_sec) - 1",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "Write",
+ "refId": "A",
+ "step": 60
+ },
+ {
+ "expr": "ceph_pgmap_read_op_per_sec or absent(ceph_pgmap_read_op_per_sec) - 1",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "Read",
+ "refId": "B",
+ "step": 60
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "IOPS",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "height": "300",
+ "id": 7,
+ "interval": "$interval",
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "ceph_pgmap_write_bytes_sec or absent(ceph_pgmap_write_bytes_sec) - 1 ",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "Write",
+ "refId": "A",
+ "step": 60
+ },
+ {
+ "expr": "ceph_pgmap_read_bytes_sec or absent(ceph_pgmap_read_bytes_sec) - 1",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "Read",
+ "refId": "B",
+ "step": 60
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Throughput",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Performance",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 295,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 18,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/^Total.*$/",
+ "stack": false
+ }
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "avg(ceph_num_object)",
+ "format": "time_series",
+ "interval": "$interval",
+ "intervalFactor": 1,
+ "legendFormat": "Total",
+ "refId": "A",
+ "step": 60
+ },
+ {
+ "expr": "avg(ceph_num_object_degraded)",
+ "format": "time_series",
+ "interval": "$interval",
+ "intervalFactor": 1,
+ "legendFormat": "Degraded",
+ "refId": "B",
+ "step": 60
+ },
+ {
+ "expr": "avg(ceph_num_object_misplaced)",
+ "format": "time_series",
+ "interval": "$interval",
+ "intervalFactor": 1,
+ "legendFormat": "Misplaced",
+ "refId": "C",
+ "step": 60
+ },
+ {
+ "expr": "avg(ceph_num_object_unfound)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Unfound",
+ "refId": "D",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Objects in the Cluster",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 1,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+ "Available": "#EAB839",
+ "Total Capacity": "#447EBC",
+ "Used": "#BF1B00",
+ "total_avail": "#6ED0E0",
+ "total_space": "#7EB26D",
+ "total_used": "#890F02"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 4,
+ "grid": {},
+ "height": "300",
+ "id": 1,
+ "interval": "$interval",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 0,
+ "links": [],
+ "minSpan": null,
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "Total Capacity",
+ "fill": 0,
+ "linewidth": 3,
+ "stack": false
+ }
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "ceph_usage_total_bytes - ceph_usage_total_used_bytes",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "Available",
+ "refId": "A",
+ "step": 60
+ },
+ {
+ "expr": "ceph_usage_total_used_bytes or absent(ceph_usage_total_used_bytes) - 1",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "Used",
+ "refId": "B",
+ "step": 60
+ },
+ {
+ "expr": "ceph_usage_total_bytes or absent(ceph_usage_total_bytes) - 1",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "Total Capacity",
+ "refId": "C",
+ "step": 60
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Capacity",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 19,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/^Total.*$/",
+ "stack": false
+ }
+ ],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "ceph_pgmap_num_pgs",
+ "format": "time_series",
+ "hide": false,
+ "interval": "$interval",
+ "intervalFactor": 1,
+ "legendFormat": "Total",
+ "refId": "A",
+ "step": 60
+ },
+ {
+ "expr": "ceph_pgmap_state_creating_peering",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "creating+peering",
+ "refId": "C",
+ "step": 4
+ },
+ {
+ "expr": "ceph_pgmap_state_stale_peering",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "stale+peering",
+ "refId": "D",
+ "step": 4
+ },
+ {
+ "expr": "ceph_pgmap_state_active_clean",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "active+clean",
+ "refId": "E",
+ "step": 4
+ },
+ {
+ "expr": "ceph_pgmap_state_active_undersized_degraded",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "active+undersized+degraded",
+ "refId": "F",
+ "step": 4
+ },
+ {
+ "expr": "ceph_pgmap_state_stale_active_clean",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "stale+active+clean",
+ "refId": "G",
+ "step": 4
+ },
+ {
+ "expr": "ceph_pgmap_state_undersized_degraded_peered",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "undersized+degraded+peered",
+ "refId": "H",
+ "step": 4
+ },
+ {
+ "expr": "ceph_pgmap_state_stale_remapped_peering",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "stale+remapped+peering",
+ "refId": "I",
+ "step": 4
+ },
+ {
+ "expr": "ceph_pgmap_state_creating",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "creating",
+ "refId": "J",
+ "step": 4
+ },
+ {
+ "expr": "ceph_pgmap_state_stale_active_undersized_degrade",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "stale+active+undersized+degraded",
+ "refId": "K",
+ "step": 4
+ },
+ {
+ "expr": "ceph_pgmap_state_active_recovering_degraded",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "active+recovering+degraded",
+ "refId": "B",
+ "step": 4
+ },
+ {
+ "expr": "ceph_pgmap_state_active_recovery_wait_degraded",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "active+recovery_wait+degraded",
+ "metric": "ceph_pgmap_state_active_recovery_wait_degraded",
+ "refId": "L",
+ "step": 4
+ },
+ {
+ "expr": "ceph_pgmap_state_active_remapped",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "active+remapped",
+ "metric": "ceph_pgmap_state",
+ "refId": "N",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "PGs",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 1,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Capacity",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "150px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 15,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(ceph_recovery_ops[2m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "Ops {{ host }}/{{ id }}",
+ "refId": "A",
+ "step": 5
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Ops",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Recovery",
+ "titleSize": "h6"
+ }
+ ],
+ "schemaVersion": 14,
+ "style": "dark",
+ "tags": [
+ "ceph",
+ "cluster"
+ ],
+ "templating": {
+ "list": [
+ {
+ "auto": false,
+ "auto_count": 30,
+ "auto_min": "10s",
+ "current": {
+ "text": "1m",
+ "value": "1m"
+ },
+ "hide": 2,
+ "label": null,
+ "name": "interval",
+ "options": [
+ {
+ "selected": true,
+ "text": "1m",
+ "value": "1m"
+ },
+ {
+ "selected": false,
+ "text": "10m",
+ "value": "10m"
+ },
+ {
+ "selected": false,
+ "text": "30m",
+ "value": "30m"
+ },
+ {
+ "selected": false,
+ "text": "1h",
+ "value": "1h"
+ },
+ {
+ "selected": false,
+ "text": "6h",
+ "value": "6h"
+ },
+ {
+ "selected": false,
+ "text": "12h",
+ "value": "12h"
+ },
+ {
+ "selected": false,
+ "text": "1d",
+ "value": "1d"
+ },
+ {
+ "selected": false,
+ "text": "7d",
+ "value": "7d"
+ },
+ {
+ "selected": false,
+ "text": "14d",
+ "value": "14d"
+ },
+ {
+ "selected": false,
+ "text": "30d",
+ "value": "30d"
+ }
+ ],
+ "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d",
+ "refresh": 2,
+ "type": "interval"
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "browser",
+ "title": "Ceph - Cluster",
+ "version": 61
+}
+{%- endraw %}
diff --git a/ceph/files/grafana_dashboards/legacy/ceph_osd_prometheus.json b/ceph/files/grafana_dashboards/legacy/ceph_osd_prometheus.json
new file mode 100644
index 0000000..13789df
--- /dev/null
+++ b/ceph/files/grafana_dashboards/legacy/ceph_osd_prometheus.json
@@ -0,0 +1,967 @@
+{%- raw %}
+{
+ "annotations": {
+ "list": []
+ },
+ "description": "CEPH OSD Status.",
+ "editable": true,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "id": null,
+ "links": [],
+ "refresh": "1m",
+ "rows": [
+ {
+ "collapse": false,
+ "height": "100px",
+ "panels": [
+ {
+ "cacheTimeout": null,
+ "colorBackground": true,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 40, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": "prometheus",
+ "editable": true,
+ "error": false,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 6,
+ "interval": null,
+ "links": [],
+ "mappingType": 2,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ },
+ {
+ "from": "0",
+ "text": "DOWN",
+ "to": "0.99"
+ },
+ {
+ "from": "0.99",
+ "text": "UP",
+ "to": "1"
+ }
+ ],
+ "span": 1,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "min(ceph_osdmap_num_up_osds) without (instance)",
+ "format": "time_series",
+ "interval": "$interval",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 60
+ }
+ ],
+ "thresholds": "0,1",
+ "timeFrom": null,
+ "title": "Status",
+ "transparent": false,
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "DOWN",
+ "value": "0"
+ },
+ {
+ "op": "=",
+ "text": "UP",
+ "value": "1"
+ },
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": true,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 40, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": "prometheus",
+ "editable": true,
+ "error": false,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 8,
+ "interval": null,
+ "links": [],
+ "mappingType": 2,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ },
+ {
+ "from": "0",
+ "text": "OUT",
+ "to": "0.99"
+ },
+ {
+ "from": "0.99",
+ "text": "IN",
+ "to": "1"
+ }
+ ],
+ "span": 1,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "min(ceph_osdmap_num_in_osds) without (instance)",
+ "format": "time_series",
+ "interval": "$interval",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 60
+ }
+ ],
+ "thresholds": "0,1",
+ "timeFrom": null,
+ "title": "Available",
+ "transparent": false,
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "DOWN",
+ "value": "0"
+ },
+ {
+ "op": "=",
+ "text": "UP",
+ "value": "1"
+ },
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": "prometheus",
+ "editable": true,
+ "error": false,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 10,
+ "interval": null,
+ "links": [],
+ "mappingType": 2,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 1,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "min(ceph_osdmap_num_osds) without (instance)",
+ "format": "time_series",
+ "interval": "$interval",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 60
+ }
+ ],
+ "thresholds": "0,1",
+ "timeFrom": null,
+ "title": "Total OSDs",
+ "transparent": false,
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "DOWN",
+ "value": "0"
+ },
+ {
+ "op": "=",
+ "text": "UP",
+ "value": "1"
+ },
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "New row",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 248,
+ "panels": [
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": true,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "prometheus",
+ "editable": true,
+ "error": false,
+ "format": "percentunit",
+ "gauge": {
+ "maxValue": 1,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 7,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "avg(ceph_osd_bytes_used)/avg(ceph_osd_bytes)",
+ "format": "time_series",
+ "interval": "$interval",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 60
+ }
+ ],
+ "thresholds": "0.6,0.8",
+ "timeFrom": null,
+ "title": "Utilization",
+ "transparent": false,
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "prometheus",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 5,
+ "interval": "$interval",
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/^Average.*/",
+ "fill": 0,
+ "stack": false
+ }
+ ],
+ "spaceLength": 10,
+ "span": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "ceph_num_pg",
+ "format": "time_series",
+ "interval": "$interval",
+ "intervalFactor": 1,
+ "legendFormat": "Number of PGs - {{ host }}",
+ "refId": "A",
+ "step": 60
+ },
+ {
+ "expr": "avg(ceph_num_pg)",
+ "format": "time_series",
+ "interval": "$interval",
+ "intervalFactor": 1,
+ "legendFormat": "Average Number of PGs in the Cluster",
+ "refId": "B",
+ "step": 60
+ }
+ ],
+ "thresholds": [
+ {
+ "colorMode": "custom",
+ "line": true,
+ "lineColor": "rgba(216, 200, 27, 0.27)",
+ "op": "gt",
+ "value": 250
+ },
+ {
+ "colorMode": "custom",
+ "line": true,
+ "lineColor": "rgba(234, 112, 112, 0.22)",
+ "op": "gt",
+ "value": 300
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "PGs",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "OSD",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 255,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "prometheus",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 4,
+ "interval": "$interval",
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 2,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "avg(rate(ceph_apply_latency_avgcount[5m]))",
+ "format": "time_series",
+ "hide": false,
+ "interval": "$interval",
+ "intervalFactor": 1,
+ "legendFormat": "Apply Latency",
+ "metric": "ceph_apply_latency_avgcount",
+ "refId": "A",
+ "step": 60
+ },
+ {
+ "expr": "avg(rate(ceph_commit_latency_avgcount[5m]))",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "Commit Latency",
+ "refId": "B",
+ "step": 60
+ },
+ {
+ "expr": "avg(rate(ceph_apply_latency_avgcount[5m])) + avg(rate(ceph_commit_latency_avgcount[5m]))",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Write Latency",
+ "refId": "C",
+ "step": 120
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Latency",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "µs",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": false
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "prometheus",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 2,
+ "interval": "$interval",
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "avg(ceph_osd_bytes_avail)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "$interval",
+ "intervalFactor": 1,
+ "legendFormat": "Available",
+ "metric": "ceph_osd_avail_bytes",
+ "refId": "A",
+ "step": 60
+ },
+ {
+ "expr": "avg(ceph_osd_bytes_used)",
+ "format": "time_series",
+ "interval": "$interval",
+ "intervalFactor": 1,
+ "legendFormat": "Used",
+ "metric": "ceph_osd_avail_bytes",
+ "refId": "B",
+ "step": 60
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "OSD Capacity",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "New row",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "prometheus",
+ "fill": 1,
+ "id": 11,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "avg(ceph_osdop_read)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "read",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "avg(ceph_osdop_write)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "write",
+ "refId": "B",
+ "step": 4
+ },
+ {
+ "expr": "avg(ceph_osdop_append)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "append",
+ "refId": "C",
+ "step": 4
+ },
+ {
+ "expr": "avg(ceph_osdop_delete)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "delete",
+ "refId": "D",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "OSD ops",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6"
+ }
+ ],
+ "schemaVersion": 14,
+ "style": "dark",
+ "tags": [
+ "ceph",
+ "osd"
+ ],
+ "templating": {
+ "list": [
+ {
+ "auto": false,
+ "auto_count": 30,
+ "auto_min": "10s",
+ "current": {
+ "text": "1m",
+ "value": "1m"
+ },
+ "hide": 2,
+ "label": null,
+ "name": "interval",
+ "options": [
+ {
+ "selected": true,
+ "text": "1m",
+ "value": "1m"
+ },
+ {
+ "selected": false,
+ "text": "10m",
+ "value": "10m"
+ },
+ {
+ "selected": false,
+ "text": "30m",
+ "value": "30m"
+ },
+ {
+ "selected": false,
+ "text": "1h",
+ "value": "1h"
+ },
+ {
+ "selected": false,
+ "text": "6h",
+ "value": "6h"
+ },
+ {
+ "selected": false,
+ "text": "12h",
+ "value": "12h"
+ },
+ {
+ "selected": false,
+ "text": "1d",
+ "value": "1d"
+ },
+ {
+ "selected": false,
+ "text": "7d",
+ "value": "7d"
+ },
+ {
+ "selected": false,
+ "text": "14d",
+ "value": "14d"
+ },
+ {
+ "selected": false,
+ "text": "30d",
+ "value": "30d"
+ }
+ ],
+ "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d",
+ "refresh": 2,
+ "type": "interval"
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "browser",
+ "title": "Ceph - OSD",
+ "version": 33
+}
+{%- endraw %}
diff --git a/ceph/files/grafana_dashboards/legacy/ceph_pools_prometheus.json b/ceph/files/grafana_dashboards/legacy/ceph_pools_prometheus.json
new file mode 100644
index 0000000..61e2780
--- /dev/null
+++ b/ceph/files/grafana_dashboards/legacy/ceph_pools_prometheus.json
@@ -0,0 +1,895 @@
+{%- raw %}
+{
+ "annotations": {
+ "list": []
+ },
+ "description": "Ceph Pools dashboard.",
+ "editable": true,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "id": null,
+ "links": [],
+ "refresh": "1m",
+ "rows": [
+ {
+ "collapse": false,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "prometheus",
+ "fill": 1,
+ "id": 13,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "ceph_pool_usage_bytes_used",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Bytes used - {{name}}",
+ "metric": "ceph_pool_usage_bytes_used",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Capacity Used Per Pool",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "decbytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "prometheus",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fill": 4,
+ "grid": {},
+ "height": "",
+ "id": 2,
+ "interval": "$interval",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/^Total.*$/",
+ "fill": 0,
+ "linewidth": 4,
+ "stack": false
+ },
+ {
+ "alias": "/^Raw.*$/",
+ "color": "#BF1B00",
+ "fill": 0,
+ "linewidth": 4
+ }
+ ],
+ "spaceLength": 10,
+ "span": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "avg(ceph_pool_usage_max_avail{name=\"$pool\"})",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "Avilable - $pool",
+ "metric": "ceph_pool_available_bytes",
+ "refId": "A",
+ "step": 60
+ },
+ {
+ "expr": "avg(ceph_pool_usage_bytes_used{name=\"$pool\"})",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "Used - $pool",
+ "metric": "ceph_pool",
+ "refId": "B",
+ "step": 60
+ },
+ {
+ "expr": "avg(ceph_pool_usage_max_avail{name=\"$pool\"}) + avg(ceph_pool_usage_bytes_used{name=\"$pool\"})",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "Total - $pool",
+ "metric": "ceph_pool",
+ "refId": "C",
+ "step": 60
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Pool Storage",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": "prometheus",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "format": "percentunit",
+ "gauge": {
+ "maxValue": 1,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 10,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "avg(ceph_pool_usage_bytes_used{name=\"$pool\"}) / (avg(ceph_pool_usage_max_avail{name=\"$pool\"}) + avg(ceph_pool_usage_bytes_used{name=\"$pool\"}))",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "thresholds": "",
+ "title": "Usage",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Pool: $pool",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "height": "",
+ "id": 7,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "avg(ceph_pool_usage_objects{name=\"$pool\"})",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "Objects - $pool",
+ "refId": "A",
+ "step": 5
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Objects in Pool",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "prometheus",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 4,
+ "interval": "$interval",
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "avg(irate(ceph_pool_stats_read_op_per_sec{name=\"$pool\"}[3m]))",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "Read - $pool",
+ "refId": "B",
+ "step": 60
+ },
+ {
+ "expr": "avg(irate(ceph_pool_stats_write_op_per_sec{name=\"$pool\"}[3m]))",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "Write - $pool",
+ "refId": "A",
+ "step": 60
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "IOPS",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": "IOPS",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": "IOPS",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": false
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "prometheus",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 5,
+ "interval": "$interval",
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "avg(irate(ceph_pool_stats_read_bytes_sec{name=\"$pool\"}[3m]))",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "Read Bytes - $pool",
+ "refId": "A",
+ "step": 60
+ },
+ {
+ "expr": "avg(irate(ceph_pool_stats_write_bytes_sec{name=\"$pool\"}[3m]))",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "Written Bytes - $pool",
+ "refId": "B",
+ "step": 60
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Throughput",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "New row",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "prometheus",
+ "description": "This is the capacity available in this pool, if all other pools stay at their current capacity. Depends on data protection method and available cluster capcity.",
+ "fill": 1,
+ "id": 11,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "avg(ceph_pool_usage_max_avail{name=\"$pool\"})\n",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Max available - $pool",
+ "metric": "ceph_pool_usage_max_avail",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Max Pool Capacity Available",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "decbytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "prometheus",
+ "description": "Size of current content of pool (usable).",
+ "fill": 1,
+ "id": 12,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "avg(ceph_pool_usage_bytes_used{name=\"$pool\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Bytes used - $pool",
+ "metric": "ceph_pool_usage_bytes_used",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Pool capacity used",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "decbytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6"
+ }
+ ],
+ "schemaVersion": 14,
+ "style": "dark",
+ "tags": [
+ "ceph",
+ "pools"
+ ],
+ "templating": {
+ "list": [
+ {
+ "allValue": null,
+ "current": {},
+ "datasource": "prometheus",
+ "hide": 0,
+ "includeAll": false,
+ "label": "Pool",
+ "multi": false,
+ "name": "pool",
+ "options": [],
+ "query": "label_values(ceph_pool_usage_kb_used, name)",
+ "refresh": 1,
+ "regex": "",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "auto": false,
+ "auto_count": 30,
+ "auto_min": "10s",
+ "current": {
+ "text": "1m",
+ "value": "1m"
+ },
+ "hide": 2,
+ "label": null,
+ "name": "interval",
+ "options": [
+ {
+ "selected": true,
+ "text": "1m",
+ "value": "1m"
+ },
+ {
+ "selected": false,
+ "text": "10m",
+ "value": "10m"
+ },
+ {
+ "selected": false,
+ "text": "30m",
+ "value": "30m"
+ },
+ {
+ "selected": false,
+ "text": "1h",
+ "value": "1h"
+ },
+ {
+ "selected": false,
+ "text": "6h",
+ "value": "6h"
+ },
+ {
+ "selected": false,
+ "text": "12h",
+ "value": "12h"
+ },
+ {
+ "selected": false,
+ "text": "1d",
+ "value": "1d"
+ },
+ {
+ "selected": false,
+ "text": "7d",
+ "value": "7d"
+ },
+ {
+ "selected": false,
+ "text": "14d",
+ "value": "14d"
+ },
+ {
+ "selected": false,
+ "text": "30d",
+ "value": "30d"
+ }
+ ],
+ "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d",
+ "refresh": 2,
+ "type": "interval"
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "browser",
+ "title": "Ceph - Pools",
+ "version": 29
+}
+{%- endraw %}
diff --git a/ceph/meta/grafana.yml b/ceph/meta/grafana.yml
index a82c81a..f43431b 100644
--- a/ceph/meta/grafana.yml
+++ b/ceph/meta/grafana.yml
@@ -1,4 +1,21 @@
+{%- from "ceph/map.jinja" import common with context -%}
+
+{%- if common.version is defined %}
dashboard:
+ {%- if common.version in ['kraken', 'jewel'] %}
+ ceph_cluster_prometheus:
+ datasource: prometheus
+ format: json
+ template: ceph/files/grafana_dashboards/legacy/ceph_cluster_prometheus.json
+ ceph_osd_prometheus:
+ datasource: prometheus
+ format: json
+ template: ceph/files/grafana_dashboards/legacy/ceph_osd_prometheus.json
+ ceph_pools_prometheus:
+ datasource: prometheus
+ format: json
+ template: ceph/files/grafana_dashboards/legacy/ceph_pools_prometheus.json
+ {%- else %}
ceph_cluster_prometheus:
datasource: prometheus
format: json
@@ -27,3 +44,5 @@
datasource: prometheus
format: json
template: ceph/files/grafana_dashboards/radosgw-detail_prometheus.json
+ {%- endif %}
+{%- endif %}
diff --git a/ceph/meta/prometheus.yml b/ceph/meta/prometheus.yml
index f91a4f2..86c8a47 100644
--- a/ceph/meta/prometheus.yml
+++ b/ceph/meta/prometheus.yml
@@ -1,7 +1,162 @@
-{%- from "ceph/map.jinja" import thresholds, mon, monitoring, setup, osd with context %}
+{%- from "ceph/map.jinja" import common, mon, monitoring, setup with context -%}
-{%- if (mon is defined and mon.get('enabled')) or (monitoring.cluster_stats.get('enabled') and monitoring.cluster_stats.ceph_user is defined) %}
-{% raw %}
+{%- if common.version is defined %}
+ {%- if common.version in ['kraken', 'jewel'] -%}
+
+ {%- if (mon is defined and mon.get('enabled')) or (monitoring.cluster_stats.get('enabled') and monitoring.cluster_stats.ceph_user is defined) %}
+ {%- raw %}
+server:
+ alert:
+ CephClusterHealthMinor:
+ if: >-
+ ceph_overall_health == 2
+ for: 3m
+ labels:
+ severity: minor
+ service: ceph
+ annotations:
+ summary: "Ceph cluster health is WARNING"
+ description: "The Ceph cluster is in the WARNING state. For details, run 'ceph -s'."
+ CephClusterHealthCritical:
+ if: >-
+ ceph_overall_health == 3
+ for: 3m
+ labels:
+ severity: critical
+ service: ceph
+ annotations:
+ summary: "Ceph cluster health is CRITICAL"
+ description: "The Ceph cluster is in the CRITICAL state. For details, run 'ceph -s'."
+ CephMonitorDownMinor:
+ if: >-
+ 100 * (1 - ceph_num_mon_quorum / ceph_num_mon) > 0
+ for: 3m
+ labels:
+ severity: minor
+ service: ceph
+ annotations:
+ summary: "Ceph Monitors are down"
+ description: "{{ $value }}% of Ceph Monitors are down. For details, run 'ceph -s'."
+ CephOsdDownMinor:
+ if: >-
+ 100 * (1 - ceph_osdmap_num_up_osds / ceph_osdmap_num_osds) > 0
+ for: 3m
+ labels:
+ severity: minor
+ service: ceph
+ annotations:
+ summary: "Ceph OSDs are down"
+ description: "{{ $value }}% of Ceph OSDs are down. For details, run 'ceph osd tree'."
+ CephOsdSpaceUsageWarning:
+ {%- endraw %}
+ {%- set threshold = monitoring.space_used_warning_threshold|default('0.75')|float %}
+ if: >-
+ ceph_osd_bytes_used > ceph_osd_bytes * {{threshold}}
+ {%- raw %}
+ for: 3m
+ labels:
+ severity: warning
+ service: ceph
+ annotations:
+ summary: "{% endraw %}{{100*threshold}}{% raw %}% of Ceph space is used"
+ description: "{{ $value }} bytes of Ceph OSD space (>={% endraw %}{{100*threshold}}{% raw %}%) is used for 3 minutes. For details, run 'ceph df'."
+ CephOsdSpaceUsageMajor:
+ {%- endraw %}
+ {%- set threshold = monitoring.space_used_critical_threshold|default('0.85')|float %}
+ if: >-
+ ceph_osd_bytes_used > ceph_osd_bytes * {{threshold}}
+ {%- raw %}
+ for: 3m
+ labels:
+ severity: major
+ service: ceph
+ annotations:
+ summary: "{% endraw %}{{100*threshold}}{% raw %}% of Ceph space is used"
+ description: "{{ $ value }} bytes of Ceph OSD space (>={% endraw %}{{100*threshold}}{% raw %}%) is used for 3 minutes. For details, run 'ceph df'."
+ {%- endraw %}
+ {%- if setup.pool is defined %}
+ {%- for pool_name, pool in setup.pool.iteritems() %}
+ {%- if monitoring.pool is defined and monitoring.pool[pool_name] is defined %}
+ {%- set monitoring_pool = monitoring.pool[pool_name] %}
+ {%- else %}
+ {%- set monitoring_pool = monitoring %}
+ {%- endif %}
+ CephPool{{pool_name|replace(".", "")|replace("-", "")}}SpaceUsageWarning:
+ {%- set threshold = monitoring_pool.pool_space_used_utilization_warning_threshold|default('0.75')|float %}
+ if: >-
+ ceph_pool_usage_bytes_used{name="{{pool_name}}"} / (ceph_pool_usage_max_avail{name="{{pool_name}}"} + ceph_pool_usage_bytes_used{name="{{pool_name}}"}) > {{threshold}}
+ for: 3m
+ labels:
+ severity: warning
+ service: ceph
+ annotations:
+ summary: "{{100*threshold}}% of Ceph pool space is used"
+ description: "The Ceph {{pool_name}} pool uses {{100*threshold}}% of available space for 3 minutes. For details, run 'ceph df'."
+ CephPool{{pool_name|replace(".", "")|replace("-", "")}}SpaceUsageCritical:
+ {%- set threshold = monitoring_pool.pool_space_used_critical_threshold|default('0.85')|float %}
+ if: >-
+ ceph_pool_usage_bytes_used{name="{{pool_name}}"} / (ceph_pool_usage_max_avail{name="{{pool_name}}"} + ceph_pool_usage_bytes_used{name="{{pool_name}}"}) > {{threshold}}
+ for: 3m
+ labels:
+ severity: minor
+ service: ceph
+ annotations:
+ summary: "{{100*threshold}}% of Ceph pool space is used"
+ description: "The Ceph {{pool_name}} pool uses {{100*threshold}}% of available space for 3 minutes. For details, run 'ceph df'."
+ {%- if monitoring.cluster_stats.extra_alerts is defined and monitoring.cluster_stats.extra_alerts.get("enabled", False) %}
+ CephPool{{pool_name|replace(".", "")|replace("-", "")}}WriteOpsTooHigh:
+ {%- set threshold = monitoring_pool.pool_write_ops_threshold|default('200')|float %}
+ if: >-
+ ceph_pool_stats_write_op_per_sec{name="{{pool_name}}"} > {{threshold}}
+ for: 3m
+ labels:
+ severity: warning
+ service: ceph
+ annotations:
+ summary: "{{threshold}} Ceph pool write operations per second"
+ description: "The number of Ceph {{pool_name}} pool write operations per second is {{threshold}} for 3 minutes."
+ CephPool{{pool_name|replace(".", "")|replace("-", "")}}WriteBytesTooHigh:
+ {%- set threshold = monitoring_pool.pool_write_bytes_threshold|default('70000000')|float %}
+ if: >-
+ ceph_pool_stats_write_bytes_sec{name="{{pool_name}}"} > {{threshold}}
+ for: 3m
+ labels:
+ severity: warning
+ service: ceph
+ annotations:
+ summary: "{{threshold}} Ceph pool write bytes per second"
+ description: "The number of Ceph {{pool_name}} pool write bytes per second is {{threshold}} for 3 minutes."
+ CephPool{{pool_name|replace(".", "")|replace("-", "")}}ReadOpsTooHigh:
+ {%- set threshold = monitoring_pool.pool_read_ops_threshold|default('1000')|float %}
+ if: >-
+ ceph_pool_stats_read_op_per_sec{name="{{pool_name}}"} > {{threshold}}
+ for: 3m
+ labels:
+ severity: warning
+ service: ceph
+ annotations:
+ summary: "{{threshold}} Ceph pool read operations per second"
+ description: "The number of Ceph {{pool_name}} pool read operations per second is {{threshold}} for 3 minutes."
+ CephPool{{pool_name|replace(".", "")|replace("-", "")}}ReadBytesTooHigh:
+ {%- set threshold = monitoring_pool.pool_read_bytes_threshold|default('70000000')|float %}
+ if: >-
+ ceph_pool_stats_read_bytes_sec{name="{{pool_name}}"} > {{threshold}}
+ for: 3m
+ labels:
+ severity: warning
+ service: ceph
+ annotations:
+ summary: "{{threshold}} Ceph pool read bytes per second"
+ description: "The number of Ceph {{pool_name}} pool read bytes per second is {{threshold}} for 3 minutes."
+ {%- endif %}
+ {%- endfor %}
+ {%- endif %}
+ {%- endif -%}
+
+ {%- else -%}
+
+ {%- if mon is defined and mon.get('enabled') %}
+ {%- raw %}
server:
alert:
CephClusterHealthMinor:
@@ -55,8 +210,8 @@
severity: warning
service: ceph
annotations:
- summary: "{%-endraw %}{{100*threshold}}{%- raw %}% of Ceph space is used"
- description: "{{ $value }} bytes of Ceph OSD space (>= {%-endraw %}{{100*threshold}}{%- raw %}%) is used for 3 minutes. For details, run 'ceph df'."
+ summary: "{% endraw %}{{100*threshold}}{% raw %}% of Ceph space is used"
+ description: "{{ $value }} bytes of Ceph OSD space (>={% endraw %}{{100*threshold}}{% raw %}%) is used for 3 minutes. For details, run 'ceph df'."
CephOsdSpaceUsageMajor:
{%- endraw %}
{%- set threshold = monitoring.space_used_critical_threshold|default('0.85')|float %}
@@ -68,18 +223,18 @@
severity: major
service: ceph
annotations:
- summary: "{%-endraw %}{{100*threshold}}{%- raw %}% of Ceph space is used"
- description: "{{ $ value }} bytes of Ceph OSD space (>= {%-endraw %}{{100*threshold}}{%- raw %}%) is used for 3 minutes. For details, run 'ceph df'."
-{% endraw %}
-{%- if setup.pool is defined %}
-{%- for pool_name, pool in setup.pool.iteritems() %}
- {%- if monitoring.pool is defined and monitoring.pool[pool_name] is defined %}
- {%- set monitoring_pool = monitoring.pool[pool_name] %}
- {%- else %}
- {%- set monitoring_pool = monitoring %}
- {%- endif %}
+ summary: "{% endraw %}{{100*threshold}}{% raw %}% of Ceph space is used"
+ description: "{{ $ value }} bytes of Ceph OSD space (>={% endraw %}{{100*threshold}}{% raw %}%) is used for 3 minutes. For details, run 'ceph df'."
+ {%- endraw %}
+ {%- if setup.pool is defined %}
+ {%- for pool_name, pool in setup.pool.iteritems() %}
+ {%- if monitoring.pool is defined and monitoring.pool[pool_name] is defined %}
+ {%- set monitoring_pool = monitoring.pool[pool_name] %}
+ {%- else %}
+ {%- set monitoring_pool = monitoring %}
+ {%- endif %}
CephPool{{pool_name|replace(".", "")|replace("-", "")}}SpaceUsageWarning:
- {%- set threshold = monitoring_pool.pool_space_used_utilization_warning_threshold|default('0.75')|float %}
+ {%- set threshold = monitoring_pool.pool_space_used_utilization_warning_threshold|default('0.75')|float %}
if: >-
ceph_pool_bytes_used / (ceph_pool_bytes_used + ceph_pool_max_avail) * on(pool_id) group_left(name) ceph_pool_metadata{name="{{pool_name}}"} > {{threshold}}
for: 3m
@@ -90,7 +245,7 @@
summary: "{{100*threshold}}% of Ceph pool space is used"
description: "The Ceph {{pool_name}} pool uses {{100*threshold}}% of available space for 3 minutes. For details, run 'ceph df'."
CephPool{{pool_name|replace(".", "")|replace("-", "")}}SpaceUsageCritical:
- {%- set threshold = monitoring_pool.pool_space_used_critical_threshold|default('0.85')|float %}
+ {%- set threshold = monitoring_pool.pool_space_used_critical_threshold|default('0.85')|float %}
if: >-
ceph_pool_bytes_used / (ceph_pool_bytes_used + ceph_pool_max_avail) * on(pool_id) group_left(name) ceph_pool_metadata{name="{{pool_name}}"} > {{threshold}}
for: 3m
@@ -100,21 +255,17 @@
annotations:
summary: "{{100*threshold}}% of Ceph pool space is used"
description: "The Ceph {{pool_name}} pool uses {{100*threshold}}% of available space for 3 minutes. For details, run 'ceph df'."
-{%- endfor %}
-{%- endif %}
-{%- endif %}
+ {%- endfor %}
+ {%- endif -%}
-
-{%- if mon is defined and mon.get('enabled') %}
-{%- set fqdn_ip4_addresses = [] %}
-{%- for addr in grains['fqdn_ip4'] %}
- {%- if not addr.startswith('127.') %}
- {%- do fqdn_ip4_addresses.append(addr) %}
- {%- endif %}
-{%- endfor %}
-{%- set address = fqdn_ip4_addresses[0] %}
-{%- if address is defined %}
-server:
+ {%- set fqdn_ip4_addresses = [] %}
+ {%- for addr in grains['fqdn_ip4'] %}
+ {%- if not addr.startswith('127.') %}
+ {%- do fqdn_ip4_addresses.append(addr) %}
+ {%- endif %}
+ {%- endfor %}
+ {%- set address = fqdn_ip4_addresses[0] %}
+ {%- if address is defined %}
target:
static:
ceph:
@@ -123,6 +274,7 @@
- address: {{ address }}
port: 9283
honor_labels: true
-
-{%- endif %}
+ {%- endif %}
+ {%- endif %}
+ {%- endif %}
{%- endif %}
diff --git a/ceph/meta/telegraf.yml b/ceph/meta/telegraf.yml
index 0fbb00a..cbbe981 100644
--- a/ceph/meta/telegraf.yml
+++ b/ceph/meta/telegraf.yml
@@ -1,67 +1,70 @@
-{%- from "ceph/map.jinja" import mon, osd, monitoring with context %}
+{%- from "ceph/map.jinja" import common, mon, osd, monitoring with context -%}
-{%- if mon is defined and mon.get('enabled') %}
+{%- if common.version is defined %}
+ {%- if common.version in ['kraken', 'jewel'] -%}
+
+ {%- if mon is defined and mon.get('enabled') %}
remote_agent:
input:
ceph:
template: ceph/files/telegraf.conf
-{%- if monitoring.cluster_stats is defined %}
+ {%- if monitoring.cluster_stats is defined %}
ceph_user: client.{{ monitoring.cluster_stats.ceph_user|default('admin') }}
gather_admin_socket_stats: {{ monitoring.cluster_stats.gather_admin_socket_stats|default('false') }}
gather_cluster_stats: {{ monitoring.cluster_stats.gather_cluster_stats|default('true') }}
gather_pool_loads: {{ monitoring.cluster_stats.gather_pool_loads|default('true') }}
-{%- if monitoring.cluster_stats.ceph_binary is defined %}
+ {%- if monitoring.cluster_stats.ceph_binary is defined %}
ceph_binary: {{ monitoring.cluster_stats.ceph_binary }}
-{%- endif %}
-{%- if monitoring.cluster_stats.socket_dir is defined %}
+ {%- endif %}
+ {%- if monitoring.cluster_stats.socket_dir is defined %}
socket_dir: {{ monitoring.cluster_stats.socket_dir }}
-{%- endif %}
-{%- if monitoring.cluster_stats.mon_prefix is defined %}
+ {%- endif %}
+ {%- if monitoring.cluster_stats.mon_prefix is defined %}
mon_prefix: {{ monitoring.cluster_stats.mon_prefix }}
-{%- endif %}
-{%- if monitoring.cluster_stats.osd_prefix is defined %}
+ {%- endif %}
+ {%- if monitoring.cluster_stats.osd_prefix is defined %}
osd_prefix: {{ monitoring.cluster_stats.osd_prefix }}
-{%- endif %}
-{%- if monitoring.interval is defined %}
+ {%- endif %}
+ {%- if monitoring.interval is defined %}
interval: {{ monitoring.interval }}
-{%- endif %}
-{%- else %}
+ {%- endif %}
+ {%- else %}
ceph_user: client.admin
gather_admin_socket_stats: false
gather_cluster_stats: true
gather_pool_loads: true
-{%- endif %}
+ {%- endif %}
agent:
input:
ceph:
template: ceph/files/telegraf.conf
-{%- if monitoring.cluster_stats is defined %}
+ {%- if monitoring.cluster_stats is defined %}
ceph_user: client.{{ monitoring.cluster_stats.ceph_user|default('admin') }}
gather_admin_socket_stats: {{ monitoring.cluster_stats.gather_admin_socket_stats|default('true') }}
gather_cluster_stats: {{ monitoring.cluster_stats.gather_cluster_stats|default('false') }}
gather_pool_loads: {{ monitoring.cluster_stats.gather_pool_loads|default('false') }}
-{%- if monitoring.cluster_stats.ceph_binary is defined %}
+ {%- if monitoring.cluster_stats.ceph_binary is defined %}
ceph_binary: {{ monitoring.cluster_stats.ceph_binary }}
-{%- endif %}
-{%- if monitoring.cluster_stats.socket_dir is defined %}
+ {%- endif %}
+ {%- if monitoring.cluster_stats.socket_dir is defined %}
socket_dir: {{ monitoring.cluster_stats.socket_dir }}
-{%- endif %}
-{%- if monitoring.cluster_stats.mon_prefix is defined %}
+ {%- endif %}
+ {%- if monitoring.cluster_stats.mon_prefix is defined %}
mon_prefix: {{ monitoring.cluster_stats.mon_prefix }}
-{%- endif %}
-{%- if monitoring.cluster_stats.osd_prefix is defined %}
+ {%- endif %}
+ {%- if monitoring.cluster_stats.osd_prefix is defined %}
osd_prefix: {{ monitoring.cluster_stats.osd_prefix }}
-{%- endif %}
-{%- if monitoring.interval is defined %}
+ {%- endif %}
+ {%- if monitoring.interval is defined %}
interval: {{ monitoring.interval }}
-{%- endif %}
-{%- else %}
+ {%- endif %}
+ {%- else %}
ceph_user: client.admin
gather_admin_socket_stats: true
gather_cluster_stats: false
gather_pool_loads: false
-{%- endif %}
-{%- elif monitoring.get('cluster_stats').get('enabled') %}
+ {%- endif %}
+ {%- elif monitoring.get('cluster_stats').get('enabled') %}
remote_agent:
input:
ceph:
@@ -70,33 +73,36 @@
gather_admin_socket_stats: false
gather_cluster_stats: true
gather_pool_loads: true
-{%- endif %}
+ {%- endif -%}
-{%- if osd is defined and osd.get('enabled') %}
+ {%- if osd is defined and osd.get('enabled') %}
agent:
input:
ceph:
template: ceph/files/telegraf.conf
fieldpass: [ "apply_latency*", "commitcycle_latency*", "op_latency*", "osdop_append", "osdop_delete", "osdop_read", "osdop_write", "recovery_ops" ]
-{%- if monitoring.node_stats is defined %}
+ {%- if monitoring.node_stats is defined %}
gather_admin_socket_stats: {{ monitoring.node_stats.gather_admin_socket_stats|default('true') }}
gather_cluster_stats: {{ monitoring.node_stats.gather_cluster_stats|default('false') }}
gather_pool_loads: {{ monitoring.cluster_stats.gather_pool_loads|default('false') }}
-{%- if monitoring.node_stats.socket_dir is defined %}
+ {%- if monitoring.node_stats.socket_dir is defined %}
socket_dir: {{ monitoring.node_stats.socket_dir }}
-{%- endif %}
-{%- if monitoring.node_stats.mon_prefix is defined %}
+ {%- endif %}
+ {%- if monitoring.node_stats.mon_prefix is defined %}
mon_prefix: {{ monitoring.node_stats.mon_prefix }}
-{%- endif %}
-{%- if monitoring.node_stats.osd_prefix is defined %}
+ {%- endif %}
+ {%- if monitoring.node_stats.osd_prefix is defined %}
osd_prefix: {{ monitoring.node_stats.osd_prefix }}
-{%- endif %}
-{%- if monitoring.interval is defined %}
+ {%- endif %}
+ {%- if monitoring.interval is defined %}
interval: {{ monitoring.interval }}
-{%- endif %}
-{%- else %}
+ {%- endif %}
+ {%- else %}
gather_admin_socket_stats: true
gather_cluster_stats: false
gather_pool_loads: false
-{%- endif %}
+ {%- endif %}
+ {%- endif -%}
+
+ {%- endif %}
{%- endif %}