Merge "Fix the Ceph disk activation logic when multiple osd are assigned to single disk"
diff --git a/.kitchen.yml b/.kitchen.yml
index 7e8343d..c6c5d32 100644
--- a/.kitchen.yml
+++ b/.kitchen.yml
@@ -34,11 +34,25 @@
name: inspec
sudo: true
+docker_images:
+ - &xenial-20163 <%=ENV['IMAGE_XENIAL_20163'] || 'docker-dev-local.docker.mirantis.net/epcim/salt/saltstack-ubuntu-xenial-salt-2016.3/salt:2018_11_19'%>
+ - &xenial-20177 <%=ENV['IMAGE_XENIAL_20177'] || 'docker-dev-local.docker.mirantis.net/epcim/salt/saltstack-ubuntu-xenial-salt-2017.7/salt:2018_11_19'%>
+ - &xenial-stable <%=ENV['IMAGE_XENIAL_STABLE'] || 'docker-dev-local.docker.mirantis.net/epcim/salt/saltstack-ubuntu-xenial-salt-stable/salt:2018_11_19'%>
platforms:
- - name: <%=ENV['PLATFORM'] || 'saltstack-ubuntu-xenial-salt-stable' %>
+ - name: xenial-2016.3
driver_config:
- image: <%=ENV['PLATFORM'] || 'docker-dev-local.docker.mirantis.net/epcim/salt/saltstack-ubuntu-xenial-salt-stable/salt:2018_11_19'%>
+ image: *xenial-20163
+ platform: ubuntu
+
+ - name: xenial-2017.7
+ driver_config:
+ image: *xenial-20177
+ platform: ubuntu
+
+ - name: xenial-stable
+ driver_config:
+ image: *xenial-stable
platform: ubuntu
suites:
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index d2a1609..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,45 +0,0 @@
-language: python
-python:
-- "2.7.13"
-sudo: required
-services:
- - docker
-
-install:
- - pip install PyYAML
- - pip install virtualenv
- - |
- if [ ! -e Gemfile ]; then
- curl -s -o ./Gemfile 'https://gerrit.mcp.mirantis.com/gitweb?p=salt-formulas/salt-formulas-scripts.git;a=blob_plain;f=Gemfile;hb=refs/heads/master'
- fi
- - bundle install
-
-env:
- - PLATFORM=docker-dev-local.docker.mirantis.net/epcim/salt/saltstack-ubuntu-xenial-salt-2016.3/salt:2018_11_19 SUITE=ceph-client-single
- - PLATFORM=docker-dev-local.docker.mirantis.net/epcim/salt/saltstack-ubuntu-xenial-salt-2016.3/salt:2018_11_19 SUITE=ceph-mon-single
- - PLATFORM=docker-dev-local.docker.mirantis.net/epcim/salt/saltstack-ubuntu-xenial-salt-2016.3/salt:2018_11_19 SUITE=ceph-osd-single
- - PLATFORM=docker-dev-local.docker.mirantis.net/epcim/salt/saltstack-ubuntu-xenial-salt-2017.7/salt:2018_11_19 SUITE=ceph-client-single
- - PLATFORM=docker-dev-local.docker.mirantis.net/epcim/salt/saltstack-ubuntu-xenial-salt-2017.7/salt:2018_11_19 SUITE=ceph-mon-single
- - PLATFORM=docker-dev-local.docker.mirantis.net/epcim/salt/saltstack-ubuntu-xenial-salt-2017.7/salt:2018_11_19 SUITE=ceph-osd-single
- - PLATFORM=docker-dev-local.docker.mirantis.net/epcim/salt/saltstack-ubuntu-xenial-salt-stable/salt:2018_11_19 SUITE=ceph-client-single
- - PLATFORM=docker-dev-local.docker.mirantis.net/epcim/salt/saltstack-ubuntu-xenial-salt-stable/salt:2018_11_19 SUITE=ceph-mon-single
- - PLATFORM=docker-dev-local.docker.mirantis.net/epcim/salt/saltstack-ubuntu-xenial-salt-stable/salt:2018_11_19 SUITE=ceph-osd-single
-
-before_script:
- - set -o pipefail
- - make test | tail
-
-script:
- - test ! -e .kitchen.yml || bundle exec kitchen converge ${SUITE} || true
- - test ! -e .kitchen.yml || bundle exec kitchen verify ${SUITE} -t tests/integration
-
-notifications:
- webhooks:
- urls:
- - https://webhooks.gitter.im/e/6123573504759330786b
- on_success: change # options: [always|never|change] default: always
- on_failure: never # options: [always|never|change] default: always
- on_start: never # options: [always|never|change] default: always
- on_cancel: never # options: [always|never|change] default: always
- on_error: never # options: [always|never|change] default: always
- email: false
diff --git a/LICENSE b/LICENSE
index ee729da..97c1317 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,4 @@
-Copyright 2015 tcp cloud a.s.
+Copyright 2019 Mirantis Inc. et al.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -10,4 +10,4 @@
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
-limitations under the License.
\ No newline at end of file
+limitations under the License.
diff --git a/README.rst b/README.rst
index e4488fa..a0c3d3c 100644
--- a/README.rst
+++ b/README.rst
@@ -841,7 +841,7 @@
Migration from Decapod to salt-formula-ceph
--------------------------------------------
-The following configuration will run a python script which will generate ceph config and osd disk mappings to be put in cluster model.
+The following configuration will run a python script which will generate ceph config and osd disk mappings to be put in cluster model.
.. code-block:: yaml
@@ -859,37 +859,3 @@
* https://github.com/cloud-ee/ceph-salt-formula
* http://ceph.com/ceph-storage/
* http://ceph.com/docs/master/start/intro/
-
-
-Documentation and bugs
-======================
-
-To learn how to install and update salt-formulas, consult the documentation
-available online at:
-
- http://salt-formulas.readthedocs.io/
-
-In the unfortunate event that bugs are discovered, they should be reported to
-the appropriate issue tracker. Use Github issue tracker for specific salt
-formula:
-
- https://github.com/salt-formulas/salt-formula-ceph/issues
-
-For feature requests, bug reports or blueprints affecting entire ecosystem,
-use Launchpad salt-formulas project:
-
- https://launchpad.net/salt-formulas
-
-You can also join salt-formulas-users team and subscribe to mailing list:
-
- https://launchpad.net/~salt-formulas-users
-
-Developers wishing to work on the salt-formulas projects should always base
-their work on master branch and submit pull request against specific formula.
-
- https://github.com/salt-formulas/salt-formula-ceph
-
-Any questions or feedback is always welcome so feel free to join our IRC
-channel:
-
- #salt-formulas @ irc.freenode.net
diff --git a/ceph/files/backup/ceph-backup-client-runner-call.sh b/ceph/files/backup/ceph-backup-client-runner-call.sh
index 74d6e92..81e49cf 100644
--- a/ceph/files/backup/ceph-backup-client-runner-call.sh
+++ b/ceph/files/backup/ceph-backup-client-runner-call.sh
@@ -22,7 +22,7 @@
SERVERBACKUPDIR="{{ backup.client.target.get('backup_dir', backup.backup_dir) }}/full"
TMPDIR="$( pwd )/tmp_ceph_backup"
HOSTNAME="$( hostname )"
- TIMESTAMP="$( date +%m%d%k%M )"
+ TIMESTAMP="$( date +%m%d%H%M )"
SCRIPTDIR="/usr/local/bin"
KEEP={{ backup.client.full_backups_to_keep }}
diff --git a/ceph/files/backup/ceph-backup-client-runner.sh b/ceph/files/backup/ceph-backup-client-runner.sh
index 2c2a0b2..971f944 100644
--- a/ceph/files/backup/ceph-backup-client-runner.sh
+++ b/ceph/files/backup/ceph-backup-client-runner.sh
@@ -7,7 +7,7 @@
BACKUPDIR="{{ backup.backup_dir }}/full"
TMPDIR="$( pwd )/tmp_ceph_backup"
HOSTNAME="$( hostname )"
- TIMESTAMP="$( date +%m%d%k%M )"
+ TIMESTAMP="$( date +%m%d%H%M )"
# Need write access to local directory to create dump file
if [ ! -w $( pwd ) ]; then
diff --git a/ceph/files/grafana_dashboards/ceph-cluster_prometheus.json b/ceph/files/grafana_dashboards/ceph-cluster_prometheus.json
new file mode 100644
index 0000000..2f54c74
--- /dev/null
+++ b/ceph/files/grafana_dashboards/ceph-cluster_prometheus.json
@@ -0,0 +1,1011 @@
+{%- raw %}
+{
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "prometheus",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "description": "Ceph cluster overview",
+ "editable": true,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "id": null,
+ "iteration": 1545072771425,
+ "links": [],
+ "panels": [
+ {
+ "cacheTimeout": null,
+ "colorBackground": true,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 128, 45, 0.9)",
+ "rgba(237, 129, 40, 0.9)",
+ "rgb(255, 0, 0)"
+ ],
+ "datasource": null,
+ "editable": true,
+ "error": false,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 3,
+ "w": 2,
+ "x": 0,
+ "y": 0
+ },
+ "hideTimeOverride": true,
+ "id": 21,
+ "interval": "1m",
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "ceph_health_status{instance=~'$instance'}",
+ "format": "time_series",
+ "interval": "$interval",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 60
+ }
+ ],
+ "thresholds": "1,2",
+ "timeFrom": "1m",
+ "title": "Health Status",
+ "transparent": false,
+ "type": "singlestat",
+ "valueFontSize": "50%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "OK",
+ "value": "0"
+ },
+ {
+ "op": "=",
+ "text": "WARN",
+ "value": "1"
+ },
+ {
+ "op": "=",
+ "text": "ERR",
+ "value": "2"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": null,
+ "format": "percentunit",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 4,
+ "x": 4,
+ "y": 0
+ },
+ "id": 47,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(ceph_osd_stat_bytes_used{instance=~\"$instance\"})/sum(ceph_osd_stat_bytes{instance=~\"$instance\"})",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Used",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "70,80",
+ "title": "Capacity used",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "fill": 0,
+ "gridPos": {
+ "h": 6,
+ "w": 8,
+ "x": 8,
+ "y": 0
+ },
+ "id": 53,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "Active",
+ "color": "#508642",
+ "fill": 1,
+ "stack": "A"
+ },
+ {
+ "alias": "Total",
+ "color": "#f9e2d2"
+ },
+ {
+ "alias": "Degraded",
+ "color": "#eab839"
+ },
+ {
+ "alias": "Undersized",
+ "color": "#f9934e"
+ },
+ {
+ "alias": "Inconsistent",
+ "color": "#e24d42"
+ },
+ {
+ "alias": "Down",
+ "color": "#bf1b00"
+ },
+ {
+ "alias": "Inactive",
+ "color": "#bf1b00",
+ "fill": 4,
+ "linewidth": 0,
+ "stack": "A"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "ceph_pg_total",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Total",
+ "refId": "A"
+ },
+ {
+ "expr": "ceph_pg_active",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Active",
+ "refId": "B"
+ },
+ {
+ "expr": "ceph_pg_total - ceph_pg_active",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Inactive",
+ "refId": "G"
+ },
+ {
+ "expr": "ceph_pg_undersized",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Undersized",
+ "refId": "F"
+ },
+ {
+ "expr": "ceph_pg_degraded",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Degraded",
+ "refId": "C"
+ },
+ {
+ "expr": "ceph_pg_inconsistent",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Inconsistent",
+ "refId": "D"
+ },
+ {
+ "expr": "ceph_pg_down",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Down",
+ "refId": "E"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "PG States",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "fill": 0,
+ "gridPos": {
+ "h": 6,
+ "w": 8,
+ "x": 16,
+ "y": 0
+ },
+ "id": 66,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "Avg Apply Latency",
+ "color": "#7eb26d"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "quantile(0.95, ceph_osd_apply_latency_ms{instance=~\"$instance\"})",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Apply Latency P_95",
+ "refId": "A"
+ },
+ {
+ "expr": "quantile(0.95, ceph_osd_commit_latency_ms{instance=~\"$instance\"})",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Commit Latency P_95",
+ "refId": "B"
+ },
+ {
+ "expr": "avg(ceph_osd_apply_latency_ms{instance=~\"$instance\"})",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Avg Apply Latency",
+ "refId": "C"
+ },
+ {
+ "expr": "avg(ceph_osd_commit_latency_ms{instance=~\"$instance\"})",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Avg Commit Latency",
+ "refId": "D"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "OSD Latencies",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "fill": 1,
+ "gridPos": {
+ "h": 9,
+ "w": 12,
+ "x": 0,
+ "y": 6
+ },
+ "id": 45,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 0.5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "Reads",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(ceph_osd_op_w_in_bytes{instance=~\"$instance\"}[1m]))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Writes",
+ "refId": "A"
+ },
+ {
+ "expr": "sum(irate(ceph_osd_op_r_out_bytes{instance=~\"$instance\"}[1m]))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Reads",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Cluster I/O",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": "Read (-) / Write (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "fill": 1,
+ "gridPos": {
+ "h": 9,
+ "w": 12,
+ "x": 12,
+ "y": 6
+ },
+ "id": 62,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(deriv(ceph_pool_bytes_used{instance=~\"$instance\"}[1m]))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Bytes",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "In-/Egress",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": " Egress (-) / Ingress (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "cards": {
+ "cardPadding": null,
+ "cardRound": 1
+ },
+ "color": {
+ "cardColor": "rgb(0, 254, 255)",
+ "colorScale": "sqrt",
+ "colorScheme": "interpolateBlues",
+ "exponent": 0.5,
+ "min": null,
+ "mode": "spectrum"
+ },
+ "dataFormat": "timeseries",
+ "datasource": null,
+ "gridPos": {
+ "h": 9,
+ "w": 6,
+ "x": 0,
+ "y": 15
+ },
+ "heatmap": {},
+ "highlightCards": true,
+ "id": 55,
+ "legend": {
+ "show": true
+ },
+ "links": [],
+ "span": 12,
+ "targets": [
+ {
+ "expr": "ceph_osd_stat_bytes_used{instance=~'$instance'} / ceph_osd_stat_bytes{instance=~'$instance'}",
+ "format": "time_series",
+ "instant": false,
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Util (%)",
+ "refId": "A",
+ "step": 60
+ }
+ ],
+ "timeFrom": null,
+ "title": "OSD Capacity Utilization",
+ "tooltip": {
+ "show": true,
+ "showHistogram": false
+ },
+ "type": "heatmap",
+ "xAxis": {
+ "show": true
+ },
+ "xBucketNumber": null,
+ "xBucketSize": "",
+ "yAxis": {
+ "decimals": null,
+ "format": "percentunit",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true,
+ "splitFactor": null
+ },
+ "yBucketBound": "auto",
+ "yBucketNumber": null,
+ "yBucketSize": null
+ },
+ {
+ "cards": {
+ "cardPadding": null,
+ "cardRound": 1
+ },
+ "color": {
+ "cardColor": "#b4ff00",
+ "colorScale": "sqrt",
+ "colorScheme": "interpolateBlues",
+ "exponent": 0.5,
+ "mode": "spectrum"
+ },
+ "dataFormat": "timeseries",
+ "datasource": null,
+ "gridPos": {
+ "h": 9,
+ "w": 6,
+ "x": 6,
+ "y": 15
+ },
+ "heatmap": {},
+ "highlightCards": true,
+ "id": 59,
+ "legend": {
+ "show": true
+ },
+ "links": [],
+ "targets": [
+ {
+ "expr": "ceph_osd_numpg{instance=~\"$instance\"}",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "#PGs",
+ "refId": "A"
+ }
+ ],
+ "title": "PGs per OSD",
+ "tooltip": {
+ "show": true,
+ "showHistogram": false
+ },
+ "type": "heatmap",
+ "xAxis": {
+ "show": true
+ },
+ "xBucketNumber": null,
+ "xBucketSize": "",
+ "yAxis": {
+ "decimals": null,
+ "format": "none",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true,
+ "splitFactor": null
+ },
+ "yBucketBound": "auto",
+ "yBucketNumber": null,
+ "yBucketSize": null
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "fill": 0,
+ "gridPos": {
+ "h": 9,
+ "w": 12,
+ "x": 12,
+ "y": 15
+ },
+ "id": 64,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(ceph_osd_recovery_ops[1m]))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Op/s",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Recovery Rate",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ops",
+ "label": "Recovery Ops/s",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "refresh": "30s",
+ "schemaVersion": 16,
+ "style": "dark",
+ "tags": [
+ "ceph",
+ "cluster"
+ ],
+ "templating": {
+ "list": [
+ {
+ "auto": true,
+ "auto_count": 10,
+ "auto_min": "1m",
+ "current": {
+ "text": "auto",
+ "value": "$__auto_interval_interval"
+ },
+ "datasource": null,
+ "hide": 0,
+ "includeAll": false,
+ "label": "Interval",
+ "multi": false,
+ "name": "interval",
+ "options": [
+ {
+ "selected": true,
+ "text": "auto",
+ "value": "$__auto_interval_interval"
+ },
+ {
+ "selected": false,
+ "text": "1m",
+ "value": "1m"
+ },
+ {
+ "selected": false,
+ "text": "10m",
+ "value": "10m"
+ },
+ {
+ "selected": false,
+ "text": "30m",
+ "value": "30m"
+ },
+ {
+ "selected": false,
+ "text": "1h",
+ "value": "1h"
+ },
+ {
+ "selected": false,
+ "text": "6h",
+ "value": "6h"
+ },
+ {
+ "selected": false,
+ "text": "12h",
+ "value": "12h"
+ },
+ {
+ "selected": false,
+ "text": "1d",
+ "value": "1d"
+ },
+ {
+ "selected": false,
+ "text": "7d",
+ "value": "7d"
+ },
+ {
+ "selected": false,
+ "text": "14d",
+ "value": "14d"
+ },
+ {
+ "selected": false,
+ "text": "30d",
+ "value": "30d"
+ }
+ ],
+ "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d",
+ "refresh": 2,
+ "skipUrlSync": false,
+ "type": "interval"
+ },
+ {
+ "allFormat": "glob",
+ "allValue": null,
+ "current": {
+ "text": "All",
+ "value": "$__all"
+ },
+ "datasource": null,
+ "hide": 0,
+ "hideLabel": false,
+ "includeAll": true,
+ "label": "Exporter Instance",
+ "multi": false,
+ "multiFormat": "glob",
+ "name": "instance",
+ "options": [],
+ "query": "label_values(ceph_health_status, instance)",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-6h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "browser",
+ "title": "Ceph - Cluster",
+ "version": 7
+}
+{%- endraw %}
diff --git a/ceph/files/grafana_dashboards/hosts-overview_prometheus.json b/ceph/files/grafana_dashboards/hosts-overview_prometheus.json
new file mode 100644
index 0000000..bdc6a90
--- /dev/null
+++ b/ceph/files/grafana_dashboards/hosts-overview_prometheus.json
@@ -0,0 +1,1068 @@
+{%- raw %}
+{
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "prometheus",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "id": null,
+ "iteration": 1545072569576,
+ "links": [],
+ "panels": [
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": null,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 3,
+ "x": 0,
+ "y": 0
+ },
+ "id": 26,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "count(ceph_mon_quorum_status)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Monitors",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "avg"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": null,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 3,
+ "x": 3,
+ "y": 0
+ },
+ "id": 24,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "count(ceph_mon_quorum_status) - sum(ceph_mon_quorum_status)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "1,2",
+ "title": "Monitors down",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "avg"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": null,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 3,
+ "x": 6,
+ "y": 0
+ },
+ "id": 5,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "Value",
+ "targets": [
+ {
+ "expr": "count(sum by (instance) (ceph_disk_occupation))",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "15s",
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "OSD Hosts",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#890f02"
+ ],
+ "datasource": null,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 3,
+ "x": 9,
+ "y": 0
+ },
+ "id": 22,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "count(ceph_osd_up) - sum(ceph_osd_up)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "1,2",
+ "title": "OSDs down",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "avg"
+ },
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 4
+ },
+ "id": 30,
+ "panels": [],
+ "title": "",
+ "type": "row"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": null,
+ "description": "IOPS Load at the device as reported by the OS on all OSD hosts",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 3,
+ "x": 0,
+ "y": 5
+ },
+ "id": 2,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "Value",
+ "targets": [
+ {
+ "expr": "sum (irate(diskio_reads{host=~\"($osd_hosts).*\"}[5m]) + irate(diskio_writes{host=~\"($osd_hosts).*\"}[5m]))",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Physical IOPS",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": null,
+ "decimals": 0,
+ "description": "Average Memory Usage across all hosts in the cluster (excludes buffer/cache usage)",
+ "format": "percentunit",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 3,
+ "x": 3,
+ "y": 5
+ },
+ "id": 9,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "Value",
+ "targets": [
+ {
+ "expr": "avg ((mem_total{host=~\"($osd_hosts|$mon_hosts|$rgw_hosts).*\"} - (\n mem_free{host=~\"($osd_hosts|$mon_hosts|$rgw_hosts).*\"} + \n mem_cached{host=~\"($osd_hosts|$mon_hosts|$rgw_hosts).*\"} + \n mem_buffered{host=~\"($osd_hosts|$mon_hosts|$rgw_hosts).*\"} +\n mem_slab{host=~\"($osd_hosts|$mon_hosts|$rgw_hosts).*\"}\n )) /\n mem_total{host=~\"($osd_hosts|$mon_hosts|$rgw_hosts).*\"})",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "AVG RAM Utilization",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": null,
+ "description": "Average Disk utilization for all OSD data devices (i.e. excludes journal/WAL)",
+ "format": "percent",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 3,
+ "x": 6,
+ "y": 5
+ },
+ "id": 20,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "Value",
+ "targets": [
+ {
+ "expr": "avg (\n ((irate(diskio_io_time[5m]) / 10 )\n ) *\n on(host, name) diskio_io_time{host=~\"($osd_hosts).*\"}\n)",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "AVG Disk Utilization",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": null,
+ "decimals": 0,
+ "description": "Total send/receive network load across all hosts in the ceph cluster",
+ "format": "bytes",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 3,
+ "x": 9,
+ "y": 5
+ },
+ "id": 18,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "Value",
+ "targets": [
+ {
+ "expr": "sum (\n irate(net_bytes_recv{host=~\"($osd_hosts|mon_hosts|rgw_hosts).*\",device!=\"lo\"}[1m])\n ) +\nsum (\n irate(net_bytes_sent{host=~\"($osd_hosts|mon_hosts|rgw_hosts).*\",device!=\"lo\"}[1m]) \n )",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Network Load",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": null,
+ "decimals": 0,
+ "description": "Average CPU busy across all hosts (OSD, RGW, MON etc) within the cluster",
+ "format": "percentunit",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 3,
+ "x": 12,
+ "y": 5
+ },
+ "id": 6,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "Value",
+ "targets": [
+ {
+ "expr": "avg(\n 1-(cpu_usage_idle{host=~\"($osd_hosts|$mon_hosts|$rgw_hosts).*\"}/100)\n )",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "AVG CPU Busy",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 9
+ },
+ "id": 28,
+ "panels": [],
+ "title": "",
+ "type": "row"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "description": "Top 10 hosts by network load",
+ "fill": 1,
+ "gridPos": {
+ "h": 9,
+ "w": 12,
+ "x": 0,
+ "y": 10
+ },
+ "id": 19,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "topk(10, (sum by(host) (\n (\n irate(net_bytes_recv{host=~\"($osd_hosts|mon_hosts|rgw_hosts).*\",device!=\"lo\"}[1m])\n ) +\n (\n irate(net_bytes_sent{host=~\"($osd_hosts|mon_hosts|rgw_hosts).*\",device!=\"lo\"}[1m])\n ))\n )\n)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{host}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Network Load - Top 10",
+ "tooltip": {
+ "shared": true,
+ "sort": 1,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": 1,
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "description": "Show the top 10 busiest hosts by cpu",
+ "fill": 1,
+ "gridPos": {
+ "h": 9,
+ "w": 12,
+ "x": 12,
+ "y": 10
+ },
+ "id": 13,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "topk(10,1-(cpu_usage_idle{host=~\"($osd_hosts|$mon_hosts|$rgw_hosts).*\"}/100))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{host}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU Busy - Top 10 Hosts",
+ "tooltip": {
+ "shared": true,
+ "sort": 1,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": 1,
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": false
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "refresh": "10s",
+ "schemaVersion": 16,
+ "style": "dark",
+ "tags": [
+ "ceph",
+ "overview"
+ ],
+ "templating": {
+ "list": [
+ {
+ "allValue": "",
+ "current": {
+ "text": "All",
+ "value": "$__all"
+ },
+ "datasource": null,
+ "hide": 2,
+ "includeAll": true,
+ "label": null,
+ "multi": false,
+ "name": "osd_hosts",
+ "options": [],
+ "query": "label_values(ceph_disk_occupation, instance)",
+ "refresh": 1,
+ "regex": "([^.]*).*",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "ceph",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {
+ "text": "All",
+ "value": "$__all"
+ },
+ "datasource": null,
+ "hide": 2,
+ "includeAll": true,
+ "label": null,
+ "multi": false,
+ "name": "mon_hosts",
+ "options": [],
+ "query": "label_values(ceph_mon_metadata, ceph_daemon)",
+ "refresh": 1,
+ "regex": "mon.(.*)",
+ "skipUrlSync": false,
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {
+ "text": "All",
+ "value": "$__all"
+ },
+ "datasource": null,
+ "hide": 2,
+ "includeAll": true,
+ "label": null,
+ "multi": false,
+ "name": "rgw_hosts",
+ "options": [],
+ "query": "label_values(ceph_rgw_qlen, ceph_daemon)",
+ "refresh": 1,
+ "regex": "rgw.(.*)",
+ "skipUrlSync": false,
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "",
+ "title": "Ceph Hosts Overview",
+ "version": 23
+}
+{%- endraw %}
diff --git a/ceph/files/grafana_dashboards/ceph_cluster_prometheus.json b/ceph/files/grafana_dashboards/legacy/ceph_cluster_prometheus.json
similarity index 100%
rename from ceph/files/grafana_dashboards/ceph_cluster_prometheus.json
rename to ceph/files/grafana_dashboards/legacy/ceph_cluster_prometheus.json
diff --git a/ceph/files/grafana_dashboards/ceph_osd_prometheus.json b/ceph/files/grafana_dashboards/legacy/ceph_osd_prometheus.json
similarity index 100%
rename from ceph/files/grafana_dashboards/ceph_osd_prometheus.json
rename to ceph/files/grafana_dashboards/legacy/ceph_osd_prometheus.json
diff --git a/ceph/files/grafana_dashboards/ceph_pools_prometheus.json b/ceph/files/grafana_dashboards/legacy/ceph_pools_prometheus.json
similarity index 97%
rename from ceph/files/grafana_dashboards/ceph_pools_prometheus.json
rename to ceph/files/grafana_dashboards/legacy/ceph_pools_prometheus.json
index 61e2780..a669b33 100644
--- a/ceph/files/grafana_dashboards/ceph_pools_prometheus.json
+++ b/ceph/files/grafana_dashboards/legacy/ceph_pools_prometheus.json
@@ -36,7 +36,7 @@
"lines": true,
"linewidth": 1,
"links": [],
- "nullPointMode": "null",
+ "nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
@@ -437,7 +437,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "avg(irate(ceph_pool_stats_read_op_per_sec{name=\"$pool\"}[3m]))",
+ "expr": "avg(irate(ceph_pool_stats_read_op_per_sec{name=\"$pool\"}[3m])) or absent(avg(irate(ceph_pool_stats_read_op_per_sec{name=\"$pool\"}[3m]))) - 1",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
@@ -446,7 +446,7 @@
"step": 60
},
{
- "expr": "avg(irate(ceph_pool_stats_write_op_per_sec{name=\"$pool\"}[3m]))",
+ "expr": "avg(irate(ceph_pool_stats_write_op_per_sec{name=\"$pool\"}[3m])) or absent(avg(irate(ceph_pool_stats_write_op_per_sec{name=\"$pool\"}[3m]))) - 1",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
@@ -530,7 +530,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "avg(irate(ceph_pool_stats_read_bytes_sec{name=\"$pool\"}[3m]))",
+ "expr": "avg(irate(ceph_pool_stats_read_bytes_sec{name=\"$pool\"}[3m])) or absent(irate(ceph_pool_stats_read_bytes_sec{name=\"$pool\"}[3m])) - 1",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
@@ -539,7 +539,7 @@
"step": 60
},
{
- "expr": "avg(irate(ceph_pool_stats_write_bytes_sec{name=\"$pool\"}[3m]))",
+ "expr": "avg(irate(ceph_pool_stats_write_bytes_sec{name=\"$pool\"}[3m])) or absent(irate(ceph_pool_stats_write_bytes_sec{name=\"$pool\"}[3m])) - 1 ",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
@@ -618,7 +618,7 @@
"lines": true,
"linewidth": 1,
"links": [],
- "nullPointMode": "null",
+ "nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
@@ -696,7 +696,7 @@
"lines": true,
"linewidth": 1,
"links": [],
- "nullPointMode": "null",
+ "nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
diff --git a/ceph/files/grafana_dashboards/osds-detail_prometheus.json b/ceph/files/grafana_dashboards/osds-detail_prometheus.json
new file mode 100644
index 0000000..b9b950b
--- /dev/null
+++ b/ceph/files/grafana_dashboards/osds-detail_prometheus.json
@@ -0,0 +1,747 @@
+{%- raw %}
+{
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "prometheus",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "id": null,
+ "iteration": 1545072619802,
+ "links": [],
+ "panels": [
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 14,
+ "panels": [],
+ "title": "OSD Performance",
+ "type": "row"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "fill": 1,
+ "gridPos": {
+ "h": 9,
+ "w": 6,
+ "x": 0,
+ "y": 1
+ },
+ "id": 2,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(ceph_osd_op_r_latency_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "READs",
+ "refId": "A"
+ },
+ {
+ "expr": "irate(ceph_osd_op_w_latency_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) * 1000",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "WRITEs",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "OSD $osd_id Latency",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "fill": 1,
+ "gridPos": {
+ "h": 9,
+ "w": 6,
+ "x": 6,
+ "y": 1
+ },
+ "id": 8,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(ceph_osd_op_r{ceph_daemon=~\"osd.[[osd_id]]\"}[1m])",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Reads",
+ "refId": "A"
+ },
+ {
+ "expr": "irate(ceph_osd_op_w{ceph_daemon=~\"osd.[[osd_id]]\"}[1m])",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Writes",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "OSD $osd_id R/W IOPS",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "fill": 1,
+ "gridPos": {
+ "h": 9,
+ "w": 6,
+ "x": 12,
+ "y": 1
+ },
+ "id": 7,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(ceph_osd_op_r_out_bytes{ceph_daemon=~\"osd.[[osd_id]]\"}[1m])",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Read Bytes",
+ "refId": "A"
+ },
+ {
+ "expr": "irate(ceph_osd_op_w_in_bytes{ceph_daemon=~\"osd.[[osd_id]]\"}[1m])",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Write Bytes",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "OSD $osd_id R/W Bytes",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 10
+ },
+ "id": 12,
+ "panels": [],
+ "title": "Physical Device Performance",
+ "type": "row"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "fill": 1,
+ "gridPos": {
+ "h": 9,
+ "w": 6,
+ "x": 0,
+ "y": 11
+ },
+ "id": 9,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "label_replace(label_replace(irate(diskio_read_time[1m]), \"instance\", \"$1\", \"host\", \"(.+)\"), \"device\", \"$1\", \"name\", \"(.+)\") / label_replace(label_replace(irate(diskio_reads[1m]), \"instance\", \"$1\", \"host\", \"(.+)\"), \"device\", \"$1\", \"name\", \"(.+)\") and on (instance, device) ceph_disk_occupation{ceph_daemon=~\"osd.[[osd_id]]\"}",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{instance}}/{{device}} Reads",
+ "refId": "A"
+ },
+ {
+ "expr": "label_replace(label_replace(irate(diskio_write_time[1m]), \"instance\", \"$1\", \"host\", \"(.+)\"), \"device\", \"$1\", \"name\", \"(.+)\") / label_replace(label_replace(irate(diskio_writes[1m]), \"instance\", \"$1\", \"host\", \"(.+)\"), \"device\", \"$1\", \"name\", \"(.+)\") and on (instance, device) ceph_disk_occupation{ceph_daemon=~\"osd.[[osd_id]]\"}",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{instance}}/{{device}} Writes",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Physical Device Latency for OSD $osd_id",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "fill": 1,
+ "gridPos": {
+ "h": 9,
+ "w": 6,
+ "x": 6,
+ "y": 11
+ },
+ "id": 5,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "label_replace(label_replace(irate(diskio_reads[1m]), \"instance\", \"$1\", \"host\", \"(.+)\"), \"device\", \"$1\", \"name\", \"(.+)\") and on (instance, device) ceph_disk_occupation{ceph_daemon=~\"osd.[[osd_id]]\"}",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{instance}} {{device}} READS",
+ "refId": "A"
+ },
+ {
+ "expr": "label_replace(label_replace(irate(diskio_writes[1m]), \"instance\", \"$1\", \"host\", \"(.+)\"), \"device\", \"$1\", \"name\", \"(.+)\") and on (instance, device) ceph_disk_occupation{ceph_daemon=~\"osd.[[osd_id]]\"}",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{instance}} {{device}} WRITES",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Physical Device R/W IOPS for OSD $osd_id",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "fill": 1,
+ "gridPos": {
+ "h": 9,
+ "w": 6,
+ "x": 12,
+ "y": 11
+ },
+ "id": 10,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "label_replace(label_replace(irate(diskio_read_bytes[1m]), \"instance\", \"$1\", \"host\", \"(.+)\"), \"device\", \"$1\", \"name\", \"(.+)\") and on (instance, device) ceph_disk_occupation{ceph_daemon=~\"osd.[[osd_id]]\"}",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{instance}} {{device}} READS",
+ "refId": "A"
+ },
+ {
+ "expr": "label_replace(label_replace(irate(diskio_write_bytes[1m]), \"instance\", \"$1\", \"host\", \"(.+)\"), \"device\", \"$1\", \"name\", \"(.+)\") and on (instance, device) ceph_disk_occupation{ceph_daemon=~\"osd.[[osd_id]]\"}",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{instance}} {{device}} WRITES",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Physical Device R/W Bytes for OSD $osd_id",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "fill": 1,
+ "gridPos": {
+ "h": 9,
+ "w": 6,
+ "x": 18,
+ "y": 11
+ },
+ "id": 4,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "label_replace(label_replace(irate(diskio_io_time[1m]), \"instance\", \"$1\", \"host\", \"(.+)\"), \"device\", \"$1\", \"name\", \"(.+)\") and on (instance, device) ceph_disk_occupation{ceph_daemon=~\"osd.[[osd_id]]\"}",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{instance}} {{device}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Physical Device Util% for OSD $osd_id",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "schemaVersion": 16,
+ "style": "dark",
+ "tags": [
+ "ceph",
+ "osd"
+ ],
+ "templating": {
+ "list": [
+ {
+ "allValue": null,
+ "current": {
+ "text": "0",
+ "value": "0"
+ },
+ "datasource": null,
+ "hide": 0,
+ "includeAll": false,
+ "label": "OSD Id",
+ "multi": false,
+ "name": "osd_id",
+ "options": [],
+ "query": "label_values(ceph_osd_metadata,ceph_daemon)",
+ "refresh": 1,
+ "regex": "osd.(.*)",
+ "skipUrlSync": false,
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "",
+ "title": "Ceph OSD device details",
+ "version": 8
+}
+{%- endraw %}
diff --git a/ceph/files/grafana_dashboards/osds-overview_prometheus.json b/ceph/files/grafana_dashboards/osds-overview_prometheus.json
new file mode 100644
index 0000000..c399fc0
--- /dev/null
+++ b/ceph/files/grafana_dashboards/osds-overview_prometheus.json
@@ -0,0 +1,632 @@
+{%- raw %}
+{
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "prometheus",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "id": null,
+ "links": [],
+ "panels": [
+ {
+ "aliasColors": {
+ "@95%ile": "#e0752d"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "fill": 1,
+ "gridPos": {
+ "h": 8,
+ "w": 8,
+ "x": 0,
+ "y": 0
+ },
+ "id": 12,
+ "legend": {
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "avg (irate(ceph_osd_op_r_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "AVG read",
+ "refId": "A"
+ },
+ {
+ "expr": "max (irate(ceph_osd_op_r_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "MAX read",
+ "refId": "B"
+ },
+ {
+ "expr": "quantile(0.95,\n (irate(ceph_osd_op_r_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000)\n)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "@95%ile",
+ "refId": "C"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "OSD Read Latencies",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "columns": [],
+ "datasource": null,
+ "description": "This table shows the osd's that are delivering the 10 highest read latencies within the cluster",
+ "fontSize": "100%",
+ "gridPos": {
+ "h": 8,
+ "w": 4,
+ "x": 8,
+ "y": 0
+ },
+ "id": 15,
+ "links": [],
+ "pageSize": null,
+ "scroll": true,
+ "showHeader": true,
+ "sort": {
+ "col": 2,
+ "desc": true
+ },
+ "styles": [
+ {
+ "alias": "OSD ID",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "ceph_daemon",
+ "thresholds": [],
+ "type": "string",
+ "unit": "short"
+ },
+ {
+ "alias": "Latency (ms)",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 0,
+ "pattern": "Value",
+ "thresholds": [],
+ "type": "number",
+ "unit": "none"
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "/.*/",
+ "thresholds": [],
+ "type": "hidden",
+ "unit": "short"
+ }
+ ],
+ "targets": [
+ {
+ "expr": "topk(10,\n (sort(\n (irate(ceph_osd_op_r_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000)\n ))\n)\n\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "title": "Highest READ Latencies",
+ "transform": "table",
+ "type": "table"
+ },
+ {
+ "aliasColors": {
+ "@95%ile write": "#e0752d"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "fill": 1,
+ "gridPos": {
+ "h": 8,
+ "w": 8,
+ "x": 12,
+ "y": 0
+ },
+ "id": 13,
+ "legend": {
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "avg (irate(ceph_osd_op_w_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) * 1000)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "AVG write",
+ "refId": "A"
+ },
+ {
+ "expr": "max (irate(ceph_osd_op_w_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) * 1000)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "MAX write",
+ "refId": "B"
+ },
+ {
+ "expr": "quantile(0.95,\n (irate(ceph_osd_op_w_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) * 1000)\n)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "@95%ile write",
+ "refId": "C"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "OSD Write Latencies",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "columns": [],
+ "datasource": null,
+ "description": "This table shows the osd's that are delivering the 10 highest write latencies within the cluster",
+ "fontSize": "100%",
+ "gridPos": {
+ "h": 8,
+ "w": 4,
+ "x": 20,
+ "y": 0
+ },
+ "id": 16,
+ "links": [],
+ "pageSize": null,
+ "scroll": true,
+ "showHeader": true,
+ "sort": {
+ "col": 2,
+ "desc": true
+ },
+ "styles": [
+ {
+ "alias": "OSD ID",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "ceph_daemon",
+ "thresholds": [],
+ "type": "string",
+ "unit": "short"
+ },
+ {
+ "alias": "Latency (ms)",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 0,
+ "pattern": "Value",
+ "thresholds": [],
+ "type": "number",
+ "unit": "none"
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "/.*/",
+ "thresholds": [],
+ "type": "hidden",
+ "unit": "short"
+ }
+ ],
+ "targets": [
+ {
+ "expr": "topk(10,\n (sort(\n (irate(ceph_osd_op_w_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) * 1000)\n ))\n)\n\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "title": "Highest WRITE Latencies",
+ "transform": "table",
+ "type": "table"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "description": "Each bar indicates the number of OSD's that have a PG count in a specific range as shown on the x axis.",
+ "fill": 1,
+ "gridPos": {
+ "h": 7,
+ "w": 24,
+ "x": 0,
+ "y": 8
+ },
+ "id": 6,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "repeatDirection": "h",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "ceph_osd_numpg",
+ "format": "time_series",
+ "instant": false,
+ "intervalFactor": 1,
+ "legendFormat": "PGs per OSD",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Distribution of PGs per OSD",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "transparent": false,
+ "type": "graph",
+ "xaxis": {
+ "buckets": 20,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": 0,
+ "format": "short",
+ "label": "# of OSDs",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 15
+ },
+ "id": 20,
+ "panels": [],
+ "title": "R/W Profile",
+ "type": "row"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "description": "Show the read/write workload profile overtime",
+ "fill": 1,
+ "gridPos": {
+ "h": 8,
+ "w": 24,
+ "x": 0,
+ "y": 16
+ },
+ "id": 10,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "round(sum(irate(ceph_pool_rd[1m])))",
+ "format": "time_series",
+ "hide": false,
+ "instant": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "Reads",
+ "refId": "A"
+ },
+ {
+ "expr": "round(sum(irate(ceph_pool_wr[1m])))",
+ "format": "time_series",
+ "instant": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "Writes",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": "36h",
+ "timeShift": null,
+ "title": "Read/Write Profile",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "refresh": "30s",
+ "schemaVersion": 16,
+ "style": "dark",
+ "tags": [
+ "ceph",
+ "osd"
+ ],
+ "templating": {
+ "list": []
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "",
+ "title": "Ceph OSD Overview",
+ "version": 11
+}
+{%- endraw %}
diff --git a/ceph/files/grafana_dashboards/pool-overview_prometheus.json b/ceph/files/grafana_dashboards/pool-overview_prometheus.json
new file mode 100644
index 0000000..a58e6a2
--- /dev/null
+++ b/ceph/files/grafana_dashboards/pool-overview_prometheus.json
@@ -0,0 +1,730 @@
+{%- raw %}
+{
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "prometheus",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "id": null,
+ "iteration": 1545072836850,
+ "links": [],
+ "panels": [
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 14,
+ "panels": [],
+ "repeat": null,
+ "title": "Pool Overview",
+ "type": "row"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "fill": 5,
+ "gridPos": {
+ "h": 7,
+ "w": 24,
+ "x": 0,
+ "y": 1
+ },
+ "id": 1,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "minSpan": 12,
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "avg((rate(ceph_pool_rd{pool_id=~\"[[pool_id]]\"}[1m]) + rate(ceph_pool_wr{pool_id=~\"[[pool_id]]\"}[1m])) + on(pool_id,instance) group_left(name) ceph_pool_metadata{pool_id=~\"[[pool_id]]\"}) without (instance)",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 1,
+ "legendFormat": "{{name}}",
+ "refId": "F"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Client IOPS by Pool",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "fill": 5,
+ "gridPos": {
+ "h": 6,
+ "w": 24,
+ "x": 0,
+ "y": 8
+ },
+ "id": 2,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "minSpan": 12,
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "avg((rate(ceph_pool_rd_bytes{pool_id=~\"[[pool_id]]\"}[1m]) + rate(ceph_pool_wr_bytes{pool_id=~\"[[pool_id]]\"}[1m])) + on(pool_id,instance) group_left(name) ceph_pool_metadata{pool_id=~\"[[pool_id]]\"}) without (instance)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{name}}",
+ "refId": "A",
+ "textEditor": true
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Client Throughput by Pool",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "decbytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 14
+ },
+ "id": 15,
+ "panels": [],
+ "repeat": null,
+ "title": "Top 5's",
+ "type": "row"
+ },
+ {
+ "columns": [
+ {
+ "text": "Current",
+ "value": "current"
+ }
+ ],
+ "datasource": null,
+ "fontSize": "100%",
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 0,
+ "y": 15
+ },
+ "id": 3,
+ "links": [],
+ "minSpan": 12,
+ "pageSize": null,
+ "scroll": true,
+ "showHeader": true,
+ "sort": {
+ "col": 6,
+ "desc": true
+ },
+ "styles": [
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "Time",
+ "thresholds": [],
+ "type": "hidden",
+ "unit": "short"
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "id",
+ "thresholds": [],
+ "type": "hidden",
+ "unit": "short"
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "instance",
+ "thresholds": [],
+ "type": "hidden",
+ "unit": "short"
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "job",
+ "thresholds": [],
+ "type": "hidden",
+ "unit": "short"
+ },
+ {
+ "alias": "Pool Name",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "name",
+ "thresholds": [],
+ "type": "number",
+ "unit": "short"
+ },
+ {
+ "alias": "Pool ID",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "pool_id",
+ "thresholds": [],
+ "type": "number",
+ "unit": "short"
+ },
+ {
+ "alias": "IOPS (R+W)",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 0,
+ "pattern": "Value",
+ "thresholds": [],
+ "type": "number",
+ "unit": "none"
+ }
+ ],
+ "targets": [
+ {
+ "expr": "topk(5,(label_replace((irate(ceph_pool_rd{pool_id=~\"[[pool_id]]\"}[1m]) + irate(ceph_pool_wr{pool_id=~\"[[pool_id]]\"}[1m])),\"id\", \"$1\", \"pool_id\", \"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{pool_id=~\"[[pool_id]]\"}) )",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "refId": "A",
+ "textEditor": true
+ }
+ ],
+ "title": "Top 5 Pools by Client IOPS",
+ "transform": "table",
+ "type": "table"
+ },
+ {
+ "columns": [
+ {
+ "text": "Current",
+ "value": "current"
+ }
+ ],
+ "datasource": null,
+ "fontSize": "100%",
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 8,
+ "y": 15
+ },
+ "id": 4,
+ "links": [],
+ "minSpan": 12,
+ "pageSize": null,
+ "scroll": true,
+ "showHeader": true,
+ "sort": {
+ "col": 6,
+ "desc": true
+ },
+ "styles": [
+ {
+ "alias": "Time",
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "pattern": "Time",
+ "type": "hidden"
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "id",
+ "thresholds": [],
+ "type": "hidden",
+ "unit": "short"
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "instance",
+ "thresholds": [],
+ "type": "hidden",
+ "unit": "short"
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "job",
+ "thresholds": [],
+ "type": "hidden",
+ "unit": "short"
+ },
+ {
+ "alias": "Pool Name",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "name",
+ "thresholds": [],
+ "type": "number",
+ "unit": "short"
+ },
+ {
+ "alias": "Pool ID",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "pool_id",
+ "thresholds": [],
+ "type": "number",
+ "unit": "short"
+ },
+ {
+ "alias": "Throughput",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "Value",
+ "thresholds": [],
+ "type": "number",
+ "unit": "decbytes"
+ }
+ ],
+ "targets": [
+ {
+ "expr": "(label_replace((irate(ceph_pool_rd_bytes{pool_id=~\"[[pool_id]]\"}[1m]) + irate(ceph_pool_wr_bytes{pool_id=~\"[[pool_id]]\"}[1m])),\"id\", \"$1\", \"pool_id\", \"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{pool_id=~\"[[pool_id]]\"}) ",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "refId": "A",
+ "textEditor": true
+ }
+ ],
+ "title": "Top 5 Pools by Throughput",
+ "transform": "table",
+ "type": "table"
+ },
+ {
+ "columns": [],
+ "datasource": null,
+ "fontSize": "100%",
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 16,
+ "y": 15
+ },
+ "id": 5,
+ "links": [],
+ "minSpan": 8,
+ "pageSize": null,
+ "scroll": true,
+ "showHeader": true,
+ "sort": {
+ "col": 5,
+ "desc": true
+ },
+ "styles": [
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "Time",
+ "thresholds": [],
+ "type": "hidden",
+ "unit": "short"
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "instance",
+ "thresholds": [],
+ "type": "hidden",
+ "unit": "short"
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "job",
+ "thresholds": [],
+ "type": "hidden",
+ "unit": "short"
+ },
+ {
+ "alias": "Pool Name",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "name",
+ "thresholds": [],
+ "type": "string",
+ "unit": "short"
+ },
+ {
+ "alias": "Pool ID",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "pool_id",
+ "thresholds": [],
+ "type": "number",
+ "unit": "short"
+ },
+ {
+ "alias": "Capacity Used",
+ "colorMode": "value",
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "Value",
+ "thresholds": [
+ "70",
+ "85"
+ ],
+ "type": "number",
+ "unit": "percentunit"
+ }
+ ],
+ "targets": [
+ {
+ "expr": "topk(5,((ceph_pool_bytes_used / (ceph_pool_bytes_used + ceph_pool_max_avail)) * on(pool_id) group_left(name) ceph_pool_metadata))",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "D"
+ }
+ ],
+ "title": "Top 5 Pools By Capacity Used",
+ "transform": "table",
+ "type": "table"
+ }
+ ],
+ "refresh": "15s",
+ "schemaVersion": 16,
+ "style": "dark",
+ "tags": [
+ "ceph",
+ "pool"
+ ],
+ "templating": {
+ "list": [
+ {
+ "allValue": null,
+ "current": {
+ "text": "All",
+ "value": "$__all"
+ },
+ "datasource": null,
+ "hide": 2,
+ "includeAll": true,
+ "label": null,
+ "multi": false,
+ "name": "pool_id",
+ "options": [],
+ "query": "label_values(ceph_pool_metadata,pool_id)",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {
+ "text": "All",
+ "value": "$__all"
+ },
+ "datasource": null,
+ "hide": 2,
+ "includeAll": true,
+ "label": "Pool Name",
+ "multi": false,
+ "name": "pool_name",
+ "options": [],
+ "query": "label_values(ceph_pool_metadata,name)",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "15s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "browser",
+ "title": "Ceph Pools Overview",
+ "version": 11
+}
+{%- endraw %}
diff --git a/ceph/files/grafana_dashboards/radosgw-detail_prometheus.json b/ceph/files/grafana_dashboards/radosgw-detail_prometheus.json
new file mode 100644
index 0000000..fa95510
--- /dev/null
+++ b/ceph/files/grafana_dashboards/radosgw-detail_prometheus.json
@@ -0,0 +1,401 @@
+{%- raw %}
+{
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "prometheus",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "id": null,
+ "iteration": 1545072859805,
+ "links": [],
+ "panels": [
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 12,
+ "panels": [],
+ "repeat": null,
+ "title": "RGW Host Detail : $rgw_servers",
+ "type": "row"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "fill": 1,
+ "gridPos": {
+ "h": 8,
+ "w": 6,
+ "x": 0,
+ "y": 1
+ },
+ "id": 34,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(ceph_rgw_get_initial_lat_sum{ceph_daemon=~\"($rgw_servers)\"}[1m]) / rate(ceph_rgw_get_initial_lat_count{ceph_daemon=~\"($rgw_servers)\"}[1m])",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "GET {{ceph_daemon}}",
+ "refId": "A"
+ },
+ {
+ "expr": "rate(ceph_rgw_put_initial_lat_sum{ceph_daemon=~\"($rgw_servers)\"}[1m]) / rate(ceph_rgw_put_initial_lat_count{ceph_daemon=~\"($rgw_servers)\"}[1m])",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "PUT {{ceph_daemon}}",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "$rgw_servers GET/PUT Latencies",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "fill": 1,
+ "gridPos": {
+ "h": 8,
+ "w": 7,
+ "x": 6,
+ "y": 1
+ },
+ "id": 18,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(ceph_rgw_get_b{ceph_daemon=~\"[[rgw_servers]]\"}[1m])",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "GETs {{ceph_daemon}}",
+ "refId": "B"
+ },
+ {
+ "expr": "rate(ceph_rgw_put_b{ceph_daemon=~\"[[rgw_servers]]\"}[1m])",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "PUTs {{ceph_daemon}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Bandwidth by HTTP Operation",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": 0,
+ "format": "bytes",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "GETs": "#7eb26d",
+ "Other": "#447ebc",
+ "PUTs": "#eab839",
+ "Requests": "#3f2b5b",
+ "Requests Failed": "#bf1b00"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "fill": 1,
+ "gridPos": {
+ "h": 8,
+ "w": 7,
+ "x": 13,
+ "y": 1
+ },
+ "id": 14,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(ceph_rgw_failed_req{ceph_daemon=~\"[[rgw_servers]]\"}[1m])",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Requests Failed {{ceph_daemon}}",
+ "refId": "B"
+ },
+ {
+ "expr": "rate(ceph_rgw_get{ceph_daemon=~\"[[rgw_servers]]\"}[1m])",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "GETs {{ceph_daemon}}",
+ "refId": "C"
+ },
+ {
+ "expr": "rate(ceph_rgw_put{ceph_daemon=~\"[[rgw_servers]]\"}[1m])",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "PUTs {{ceph_daemon}}",
+ "refId": "D"
+ },
+ {
+ "expr": "rate(ceph_rgw_req{ceph_daemon=~\"[[rgw_servers]]\"}[1m]) -\n (rate(ceph_rgw_get{ceph_daemon=~\"[[rgw_servers]]\"}[1m]) +\n rate(ceph_rgw_put{ceph_daemon=~\"[[rgw_servers]]\"}[1m]))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Other {{ceph_daemon}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "HTTP Request Breakdown",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "refresh": "15s",
+ "schemaVersion": 16,
+ "style": "dark",
+ "tags": [
+ "ceph",
+ "rgw"
+ ],
+ "templating": {
+ "list": [
+ {
+ "allValue": null,
+ "current": {
+ "text": "All",
+ "value": "$__all"
+ },
+ "datasource": null,
+ "hide": 0,
+ "includeAll": true,
+ "label": null,
+ "multi": false,
+ "name": "rgw_servers",
+ "options": [],
+ "query": "label_values(ceph_rgw_req, ceph_daemon)",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "15s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "",
+ "title": "Ceph RGW Instance Detail",
+ "version": 12
+}
+{%- endraw %}
diff --git a/ceph/files/grafana_dashboards/radosgw-overview_prometheus.json b/ceph/files/grafana_dashboards/radosgw-overview_prometheus.json
new file mode 100644
index 0000000..1cafb7a
--- /dev/null
+++ b/ceph/files/grafana_dashboards/radosgw-overview_prometheus.json
@@ -0,0 +1,631 @@
+{%- raw %}
+{
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "prometheus",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "id": null,
+ "iteration": 1545072879350,
+ "links": [],
+ "panels": [
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 2,
+ "panels": [],
+ "title": "RGW Overview - All Gateways",
+ "type": "row"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "fill": 1,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 0,
+ "y": 1
+ },
+ "id": 29,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "avg(rate(ceph_rgw_get_initial_lat_sum[1m]) / rate(ceph_rgw_get_initial_lat_count[1m]))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "GET AVG",
+ "refId": "A"
+ },
+ {
+ "expr": "avg(rate(ceph_rgw_put_initial_lat_sum[1m]) / rate(ceph_rgw_put_initial_lat_count[1m]))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "PUT AVG",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Average GET/PUT Latencies",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "fill": 1,
+ "gridPos": {
+ "h": 7,
+ "w": 7,
+ "x": 8,
+ "y": 1
+ },
+ "id": 4,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum by(rgw_host) (label_replace(rate(ceph_rgw_req[1m]), \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{rgw_host}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Total Requests/sec by RGW Instance",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": 0,
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "description": "Latencies are shown stacked, without a yaxis to provide a visual indication of GET latency imbalance across RGW hosts",
+ "fill": 1,
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 15,
+ "y": 1
+ },
+ "id": 31,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "label_replace(rate(ceph_rgw_get_initial_lat_sum[1m]),\"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\") / \nlabel_replace(rate(ceph_rgw_get_initial_lat_count[1m]),\"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\")",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{rgw_host}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "GET Latencies by RGW Instance",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": null,
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "description": "Total bytes transferred in/out of all radosgw instances within the cluster",
+ "fill": 1,
+ "gridPos": {
+ "h": 6,
+ "w": 8,
+ "x": 0,
+ "y": 8
+ },
+ "id": 6,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(ceph_rgw_get_b[1m]))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "GETs",
+ "refId": "A"
+ },
+ {
+ "expr": "sum(rate(ceph_rgw_put_b[1m]))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "PUTs",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Bandwidth Consumed by Type",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "description": "Total bytes transferred in/out through get/put operations, by radosgw instance",
+ "fill": 1,
+ "gridPos": {
+ "h": 6,
+ "w": 7,
+ "x": 8,
+ "y": 8
+ },
+ "id": 9,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum by(rgw_host) (\n (label_replace(rate(ceph_rgw_get_b[1m]), \"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\")) + \n (label_replace(rate(ceph_rgw_put_b[1m]), \"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\"))\n)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{rgw_host}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Bandwidth by RGW Instance",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "description": "Latencies are shown stacked, without a yaxis to provide a visual indication of PUT latency imbalance across RGW hosts",
+ "fill": 1,
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 15,
+ "y": 8
+ },
+ "id": 32,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "label_replace(rate(ceph_rgw_put_initial_lat_sum[1m]),\"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\") / \nlabel_replace(rate(ceph_rgw_put_initial_lat_count[1m]),\"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\")",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{rgw_host}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "PUT Latencies by RGW Instance",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": null,
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": false
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "refresh": "15s",
+ "schemaVersion": 16,
+ "style": "dark",
+ "tags": [
+ "ceph",
+ "rgw"
+ ],
+ "templating": {
+ "list": [
+ {
+ "allValue": null,
+ "current": {
+ "text": "All",
+ "value": "$__all"
+ },
+ "datasource": null,
+ "hide": 2,
+ "includeAll": true,
+ "label": null,
+ "multi": false,
+ "name": "rgw_servers",
+ "options": [],
+ "query": "label_values(ceph_rgw_req, ceph_daemon)",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "15s",
+ "1m",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "",
+ "title": "Ceph RGW Overview",
+ "version": 13
+}
+{%- endraw %}
diff --git a/ceph/files/jewel/ceph.conf.Debian b/ceph/files/jewel/ceph.conf.Debian
index 351eae2..e8d73a9 100644
--- a/ceph/files/jewel/ceph.conf.Debian
+++ b/ceph/files/jewel/ceph.conf.Debian
@@ -102,8 +102,8 @@
{%- if radosgw.identity.engine == 'keystone' %}
{%- set ident = radosgw.identity %}
rgw keystone api version = {{ ident.get('api_version', 3) }}
-rgw keystone url = {{ ident.host }}:{{ ident.get('port', '5000') }}
-rgw keystone accepted roles = {{ ident.get('accepted_roles', '_member_, Member, admin, swiftoperator') }}
+rgw keystone url = {{ ident.get('protocol', 'https') }}://{{ ident.host }}:{{ ident.get('port', '5000') }}
+rgw keystone accepted roles = {{ ident.get('accepted_roles', '_member_, Member, admin, swiftoperator, ResellerAdmin') }}
rgw keystone revocation interval = {{ ident.get('revocation_interval', '1000000') }}
rgw keystone implicit tenants = {{ ident.get('implicit_tenants', 'false') }}
rgw s3 auth use keystone = {{ ident.get('s3_auth_use_keystone', 'true') }}
diff --git a/ceph/files/kraken/ceph.conf.Debian b/ceph/files/kraken/ceph.conf.Debian
index 351eae2..e8d73a9 100644
--- a/ceph/files/kraken/ceph.conf.Debian
+++ b/ceph/files/kraken/ceph.conf.Debian
@@ -102,8 +102,8 @@
{%- if radosgw.identity.engine == 'keystone' %}
{%- set ident = radosgw.identity %}
rgw keystone api version = {{ ident.get('api_version', 3) }}
-rgw keystone url = {{ ident.host }}:{{ ident.get('port', '5000') }}
-rgw keystone accepted roles = {{ ident.get('accepted_roles', '_member_, Member, admin, swiftoperator') }}
+rgw keystone url = {{ ident.get('protocol', 'https') }}://{{ ident.host }}:{{ ident.get('port', '5000') }}
+rgw keystone accepted roles = {{ ident.get('accepted_roles', '_member_, Member, admin, swiftoperator, ResellerAdmin') }}
rgw keystone revocation interval = {{ ident.get('revocation_interval', '1000000') }}
rgw keystone implicit tenants = {{ ident.get('implicit_tenants', 'false') }}
rgw s3 auth use keystone = {{ ident.get('s3_auth_use_keystone', 'true') }}
diff --git a/ceph/files/luminous/ceph.conf.Debian b/ceph/files/luminous/ceph.conf.Debian
index 351eae2..e8d73a9 100644
--- a/ceph/files/luminous/ceph.conf.Debian
+++ b/ceph/files/luminous/ceph.conf.Debian
@@ -102,8 +102,8 @@
{%- if radosgw.identity.engine == 'keystone' %}
{%- set ident = radosgw.identity %}
rgw keystone api version = {{ ident.get('api_version', 3) }}
-rgw keystone url = {{ ident.host }}:{{ ident.get('port', '5000') }}
-rgw keystone accepted roles = {{ ident.get('accepted_roles', '_member_, Member, admin, swiftoperator') }}
+rgw keystone url = {{ ident.get('protocol', 'https') }}://{{ ident.host }}:{{ ident.get('port', '5000') }}
+rgw keystone accepted roles = {{ ident.get('accepted_roles', '_member_, Member, admin, swiftoperator, ResellerAdmin') }}
rgw keystone revocation interval = {{ ident.get('revocation_interval', '1000000') }}
rgw keystone implicit tenants = {{ ident.get('implicit_tenants', 'false') }}
rgw s3 auth use keystone = {{ ident.get('s3_auth_use_keystone', 'true') }}
diff --git a/ceph/meta/fluentd.yml b/ceph/meta/fluentd.yml
new file mode 100644
index 0000000..8bc2794
--- /dev/null
+++ b/ceph/meta/fluentd.yml
@@ -0,0 +1,79 @@
+{%- if pillar.get('fluentd', {}).get('agent', {}).get('enabled', False) %}
+{%- set positiondb = pillar.fluentd.agent.dir.positiondb %}
+agent:
+ config:
+ label:
+ ceph:
+ input:
+ tail_ceph-osd:
+ type: tail
+ tag: ceph.osd
+ path: /var/log/ceph/ceph-osd*
+ path_key: log_location
+ pos_file: {{ positiondb }}/ceph.osd.pos
+ parser:
+ type: regexp
+ time_key: Timestamp
+ time_format: '%Y-%m-%d %H:%M:%S.%N'
+ keep_time_key: false
+ format: >-
+ '/^(?<Timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{6}) (?<Payload>.*)/'
+ tail_ceph-mon:
+ type: tail
+ tag: ceph.mon
+ path: /var/log/ceph/ceph-mon*, /var/log/ceph/ceph.log, /var/log/ceph/ceph.audit.log
+ path_key: log_location
+ pos_file: {{ positiondb }}/ceph.mon.pos
+ parser:
+ type: regexp
+ time_key: Timestamp
+ time_format: '%Y-%m-%d %H:%M:%S.%N'
+ keep_time_key: false
+ format: >-
+ '/^(?<Timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{6}) (?<Payload>.*)/'
+ tail_ceph-mgr:
+ type: tail
+ tag: ceph.mgr
+ path: /var/log/ceph/ceph-mgr*
+ path_key: log_location
+ pos_file: {{ positiondb }}/ceph.mgr.pos
+ parser:
+ type: regexp
+ time_key: Timestamp
+ time_format: '%Y-%m-%d %H:%M:%S.%N'
+ keep_time_key: false
+ format: >-
+ '/^(?<Timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{6}) (?<Payload>.*)/'
+ tail_radosgw:
+ type: tail
+ tag: ceph.radosgw
+ path: /var/log/ceph/ceph-rgw*
+ path_key: log_location
+ pos_file: {{ positiondb }}/ceph.radosgw.pos
+ parser:
+ type: regexp
+ time_key: Timestamp
+ time_format: '%Y-%m-%d %H:%M:%S.%N'
+ keep_time_key: false
+ format: >-
+ '/^(?<Timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{6}) (?<Payload>.*)$/'
+
+ filter:
+ match_severity:
+ type: record_transformer
+ tag: ceph.**
+ enable_ruby: true
+ record:
+ - name: programname
+ value: ceph
+ - name: severity_label
+ value: INFO
+ - name: Severity
+ value: 6
+ match:
+ push_to_default:
+ tag: ceph.*
+ type: relabel
+ label: default_output
+
+{%- endif %}
diff --git a/ceph/meta/grafana.yml b/ceph/meta/grafana.yml
index bfa369d..f43431b 100644
--- a/ceph/meta/grafana.yml
+++ b/ceph/meta/grafana.yml
@@ -1,13 +1,48 @@
+{%- from "ceph/map.jinja" import common with context -%}
+
+{%- if common.version is defined %}
dashboard:
+ {%- if common.version in ['kraken', 'jewel'] %}
ceph_cluster_prometheus:
datasource: prometheus
format: json
- template: ceph/files/grafana_dashboards/ceph_cluster_prometheus.json
+ template: ceph/files/grafana_dashboards/legacy/ceph_cluster_prometheus.json
ceph_osd_prometheus:
datasource: prometheus
format: json
- template: ceph/files/grafana_dashboards/ceph_osd_prometheus.json
+ template: ceph/files/grafana_dashboards/legacy/ceph_osd_prometheus.json
ceph_pools_prometheus:
datasource: prometheus
format: json
- template: ceph/files/grafana_dashboards/ceph_pools_prometheus.json
+ template: ceph/files/grafana_dashboards/legacy/ceph_pools_prometheus.json
+ {%- else %}
+ ceph_cluster_prometheus:
+ datasource: prometheus
+ format: json
+ template: ceph/files/grafana_dashboards/ceph-cluster_prometheus.json
+ ceph_osd_prometheus:
+ datasource: prometheus
+ format: json
+ template: ceph/files/grafana_dashboards/osds-overview_prometheus.json
+ ceph_osd_details_prometheus:
+ datasource: prometheus
+ format: json
+ template: ceph/files/grafana_dashboards/osds-detail_prometheus.json
+ ceph_pools_prometheus:
+ datasource: prometheus
+ format: json
+ template: ceph/files/grafana_dashboards/pool-overview_prometheus.json
+ ceph_hosts_overview_prometheus:
+ datasource: prometheus
+ format: json
+ template: ceph/files/grafana_dashboards/hosts-overview_prometheus.json
+ ceph_radosgw_overview_prometheus:
+ datasource: prometheus
+ format: json
+ template: ceph/files/grafana_dashboards/radosgw-overview_prometheus.json
+ ceph_radosgw_detail_prometheus:
+ datasource: prometheus
+ format: json
+ template: ceph/files/grafana_dashboards/radosgw-detail_prometheus.json
+ {%- endif %}
+{%- endif %}
diff --git a/ceph/meta/prometheus.yml b/ceph/meta/prometheus.yml
index 62f1b9d..86c8a47 100644
--- a/ceph/meta/prometheus.yml
+++ b/ceph/meta/prometheus.yml
@@ -1,7 +1,10 @@
-{%- from "ceph/map.jinja" import thresholds, mon, monitoring, setup with context %}
+{%- from "ceph/map.jinja" import common, mon, monitoring, setup with context -%}
-{%- if (mon is defined and mon.get('enabled')) or (monitoring.cluster_stats.get('enabled') and monitoring.cluster_stats.ceph_user is defined) %}
-{% raw %}
+{%- if common.version is defined %}
+ {%- if common.version in ['kraken', 'jewel'] -%}
+
+ {%- if (mon is defined and mon.get('enabled')) or (monitoring.cluster_stats.get('enabled') and monitoring.cluster_stats.ceph_user is defined) %}
+ {%- raw %}
server:
alert:
CephClusterHealthMinor:
@@ -55,8 +58,8 @@
severity: warning
service: ceph
annotations:
- summary: "{%-endraw %}{{100*threshold}}{%- raw %}% of Ceph space is used"
- description: "{{ $value }} bytes of Ceph OSD space (>= {%-endraw %}{{100*threshold}}{%- raw %}%) is used for 3 minutes. For details, run 'ceph df'."
+ summary: "{% endraw %}{{100*threshold}}{% raw %}% of Ceph space is used"
+ description: "{{ $value }} bytes of Ceph OSD space (>={% endraw %}{{100*threshold}}{% raw %}%) is used for 3 minutes. For details, run 'ceph df'."
CephOsdSpaceUsageMajor:
{%- endraw %}
{%- set threshold = monitoring.space_used_critical_threshold|default('0.85')|float %}
@@ -68,18 +71,18 @@
severity: major
service: ceph
annotations:
- summary: "{%-endraw %}{{100*threshold}}{%- raw %}% of Ceph space is used"
- description: "{{ $ value }} bytes of Ceph OSD space (>= {%-endraw %}{{100*threshold}}{%- raw %}%) is used for 3 minutes. For details, run 'ceph df'."
-{% endraw %}
-{%- if setup.pool is defined %}
-{%- for pool_name, pool in setup.pool.iteritems() %}
- {%- if monitoring.pool is defined and monitoring.pool[pool_name] is defined %}
- {%- set monitoring_pool = monitoring.pool[pool_name] %}
- {%- else %}
- {%- set monitoring_pool = monitoring %}
- {%- endif %}
+ summary: "{% endraw %}{{100*threshold}}{% raw %}% of Ceph space is used"
+ description: "{{ $ value }} bytes of Ceph OSD space (>={% endraw %}{{100*threshold}}{% raw %}%) is used for 3 minutes. For details, run 'ceph df'."
+ {%- endraw %}
+ {%- if setup.pool is defined %}
+ {%- for pool_name, pool in setup.pool.iteritems() %}
+ {%- if monitoring.pool is defined and monitoring.pool[pool_name] is defined %}
+ {%- set monitoring_pool = monitoring.pool[pool_name] %}
+ {%- else %}
+ {%- set monitoring_pool = monitoring %}
+ {%- endif %}
CephPool{{pool_name|replace(".", "")|replace("-", "")}}SpaceUsageWarning:
- {%- set threshold = monitoring_pool.pool_space_used_utilization_warning_threshold|default('0.75')|float %}
+ {%- set threshold = monitoring_pool.pool_space_used_utilization_warning_threshold|default('0.75')|float %}
if: >-
ceph_pool_usage_bytes_used{name="{{pool_name}}"} / (ceph_pool_usage_max_avail{name="{{pool_name}}"} + ceph_pool_usage_bytes_used{name="{{pool_name}}"}) > {{threshold}}
for: 3m
@@ -90,9 +93,9 @@
summary: "{{100*threshold}}% of Ceph pool space is used"
description: "The Ceph {{pool_name}} pool uses {{100*threshold}}% of available space for 3 minutes. For details, run 'ceph df'."
CephPool{{pool_name|replace(".", "")|replace("-", "")}}SpaceUsageCritical:
- {%- set threshold = monitoring_pool.pool_space_used_critical_threshold|default('0.85')|float %}
+ {%- set threshold = monitoring_pool.pool_space_used_critical_threshold|default('0.85')|float %}
if: >-
- ceph_pool_usage_bytes_used{name="{{pool_name}}"} / ceph_pool_usage_max_avail{name="{{pool_name}}"} + ceph_pool_usage_bytes_used{name="{{pool_name}}"}) > {{threshold}}
+ ceph_pool_usage_bytes_used{name="{{pool_name}}"} / (ceph_pool_usage_max_avail{name="{{pool_name}}"} + ceph_pool_usage_bytes_used{name="{{pool_name}}"}) > {{threshold}}
for: 3m
labels:
severity: minor
@@ -100,9 +103,9 @@
annotations:
summary: "{{100*threshold}}% of Ceph pool space is used"
description: "The Ceph {{pool_name}} pool uses {{100*threshold}}% of available space for 3 minutes. For details, run 'ceph df'."
- {%- if monitoring.cluster_stats.extra_alerts.get("enabled", False) %}
+ {%- if monitoring.cluster_stats.extra_alerts is defined and monitoring.cluster_stats.extra_alerts.get("enabled", False) %}
CephPool{{pool_name|replace(".", "")|replace("-", "")}}WriteOpsTooHigh:
- {%- set threshold = monitoring_pool.pool_write_ops_threshold|default('200')|float %}
+ {%- set threshold = monitoring_pool.pool_write_ops_threshold|default('200')|float %}
if: >-
ceph_pool_stats_write_op_per_sec{name="{{pool_name}}"} > {{threshold}}
for: 3m
@@ -113,7 +116,7 @@
summary: "{{threshold}} Ceph pool write operations per second"
description: "The number of Ceph {{pool_name}} pool write operations per second is {{threshold}} for 3 minutes."
CephPool{{pool_name|replace(".", "")|replace("-", "")}}WriteBytesTooHigh:
- {%- set threshold = monitoring_pool.pool_write_bytes_threshold|default('70000000')|float %}
+ {%- set threshold = monitoring_pool.pool_write_bytes_threshold|default('70000000')|float %}
if: >-
ceph_pool_stats_write_bytes_sec{name="{{pool_name}}"} > {{threshold}}
for: 3m
@@ -124,7 +127,7 @@
summary: "{{threshold}} Ceph pool write bytes per second"
description: "The number of Ceph {{pool_name}} pool write bytes per second is {{threshold}} for 3 minutes."
CephPool{{pool_name|replace(".", "")|replace("-", "")}}ReadOpsTooHigh:
- {%- set threshold = monitoring_pool.pool_read_ops_threshold|default('1000')|float %}
+ {%- set threshold = monitoring_pool.pool_read_ops_threshold|default('1000')|float %}
if: >-
ceph_pool_stats_read_op_per_sec{name="{{pool_name}}"} > {{threshold}}
for: 3m
@@ -135,7 +138,7 @@
summary: "{{threshold}} Ceph pool read operations per second"
description: "The number of Ceph {{pool_name}} pool read operations per second is {{threshold}} for 3 minutes."
CephPool{{pool_name|replace(".", "")|replace("-", "")}}ReadBytesTooHigh:
- {%- set threshold = monitoring_pool.pool_read_bytes_threshold|default('70000000')|float %}
+ {%- set threshold = monitoring_pool.pool_read_bytes_threshold|default('70000000')|float %}
if: >-
ceph_pool_stats_read_bytes_sec{name="{{pool_name}}"} > {{threshold}}
for: 3m
@@ -145,7 +148,133 @@
annotations:
summary: "{{threshold}} Ceph pool read bytes per second"
description: "The number of Ceph {{pool_name}} pool read bytes per second is {{threshold}} for 3 minutes."
+ {%- endif %}
+ {%- endfor %}
+ {%- endif %}
+ {%- endif -%}
+
+ {%- else -%}
+
+ {%- if mon is defined and mon.get('enabled') %}
+ {%- raw %}
+server:
+ alert:
+ CephClusterHealthMinor:
+ if: >-
+ ceph_health_status == 1
+ for: 3m
+ labels:
+ severity: minor
+ service: ceph
+ annotations:
+ summary: "Ceph cluster health is WARNING"
+ description: "The Ceph cluster is in the WARNING state. For details, run 'ceph -s'."
+ CephClusterHealthCritical:
+ if: >-
+ ceph_health_status == 2
+ for: 3m
+ labels:
+ severity: critical
+ service: ceph
+ annotations:
+ summary: "Ceph cluster health is CRITICAL"
+ description: "The Ceph cluster is in the CRITICAL state. For details, run 'ceph -s'."
+ CephMonitorDownMinor:
+ if: >-
+ count(ceph_mon_quorum_status) - sum(ceph_mon_quorum_status) > 0
+ for: 3m
+ labels:
+ severity: minor
+ service: ceph
+ annotations:
+ summary: "Ceph Monitors are down"
+ description: "{{ $value }} of Ceph Monitors are down. For details, run 'ceph -s'."
+ CephOsdDownMinor:
+ if: >-
+ count(ceph_osd_up) - sum(ceph_osd_up) > 0
+ for: 3m
+ labels:
+ severity: minor
+ service: ceph
+ annotations:
+ summary: "Ceph OSDs are down"
+ description: "{{ $value }} of Ceph OSDs are down. For details, run 'ceph osd tree'."
+ CephOsdSpaceUsageWarning:
+ {%- endraw %}
+ {%- set threshold = monitoring.space_used_warning_threshold|default('0.75')|float %}
+ if: >-
+ ceph_cluster_total_used_bytes > ceph_cluster_total_bytes * {{threshold}}
+ {%- raw %}
+ for: 3m
+ labels:
+ severity: warning
+ service: ceph
+ annotations:
+ summary: "{% endraw %}{{100*threshold}}{% raw %}% of Ceph space is used"
+ description: "{{ $value }} bytes of Ceph OSD space (>={% endraw %}{{100*threshold}}{% raw %}%) is used for 3 minutes. For details, run 'ceph df'."
+ CephOsdSpaceUsageMajor:
+ {%- endraw %}
+ {%- set threshold = monitoring.space_used_critical_threshold|default('0.85')|float %}
+ if: >-
+ ceph_cluster_total_used_bytes > ceph_cluster_total_bytes * {{threshold}}
+ {%- raw %}
+ for: 3m
+ labels:
+ severity: major
+ service: ceph
+ annotations:
+ summary: "{% endraw %}{{100*threshold}}{% raw %}% of Ceph space is used"
+ description: "{{ $ value }} bytes of Ceph OSD space (>={% endraw %}{{100*threshold}}{% raw %}%) is used for 3 minutes. For details, run 'ceph df'."
+ {%- endraw %}
+ {%- if setup.pool is defined %}
+ {%- for pool_name, pool in setup.pool.iteritems() %}
+ {%- if monitoring.pool is defined and monitoring.pool[pool_name] is defined %}
+ {%- set monitoring_pool = monitoring.pool[pool_name] %}
+ {%- else %}
+ {%- set monitoring_pool = monitoring %}
+ {%- endif %}
+ CephPool{{pool_name|replace(".", "")|replace("-", "")}}SpaceUsageWarning:
+ {%- set threshold = monitoring_pool.pool_space_used_utilization_warning_threshold|default('0.75')|float %}
+ if: >-
+ ceph_pool_bytes_used / (ceph_pool_bytes_used + ceph_pool_max_avail) * on(pool_id) group_left(name) ceph_pool_metadata{name="{{pool_name}}"} > {{threshold}}
+ for: 3m
+ labels:
+ severity: warning
+ service: ceph
+ annotations:
+ summary: "{{100*threshold}}% of Ceph pool space is used"
+ description: "The Ceph {{pool_name}} pool uses {{100*threshold}}% of available space for 3 minutes. For details, run 'ceph df'."
+ CephPool{{pool_name|replace(".", "")|replace("-", "")}}SpaceUsageCritical:
+ {%- set threshold = monitoring_pool.pool_space_used_critical_threshold|default('0.85')|float %}
+ if: >-
+ ceph_pool_bytes_used / (ceph_pool_bytes_used + ceph_pool_max_avail) * on(pool_id) group_left(name) ceph_pool_metadata{name="{{pool_name}}"} > {{threshold}}
+ for: 3m
+ labels:
+ severity: minor
+ service: ceph
+ annotations:
+ summary: "{{100*threshold}}% of Ceph pool space is used"
+ description: "The Ceph {{pool_name}} pool uses {{100*threshold}}% of available space for 3 minutes. For details, run 'ceph df'."
+ {%- endfor %}
+ {%- endif -%}
+
+ {%- set fqdn_ip4_addresses = [] %}
+ {%- for addr in grains['fqdn_ip4'] %}
+ {%- if not addr.startswith('127.') %}
+ {%- do fqdn_ip4_addresses.append(addr) %}
+ {%- endif %}
+ {%- endfor %}
+ {%- set address = fqdn_ip4_addresses[0] %}
+ {%- if address is defined %}
+ target:
+ static:
+ ceph:
+ enabled: true
+ endpoint:
+ - address: {{ address }}
+ port: 9283
+ honor_labels: true
+ {%- endif %}
{%- endif %}
-{%- endfor %}
-{%- endif %}
+ {%- endif %}
{%- endif %}
diff --git a/ceph/meta/telegraf.yml b/ceph/meta/telegraf.yml
index 0fbb00a..cbbe981 100644
--- a/ceph/meta/telegraf.yml
+++ b/ceph/meta/telegraf.yml
@@ -1,67 +1,70 @@
-{%- from "ceph/map.jinja" import mon, osd, monitoring with context %}
+{%- from "ceph/map.jinja" import common, mon, osd, monitoring with context -%}
-{%- if mon is defined and mon.get('enabled') %}
+{%- if common.version is defined %}
+ {%- if common.version in ['kraken', 'jewel'] -%}
+
+ {%- if mon is defined and mon.get('enabled') %}
remote_agent:
input:
ceph:
template: ceph/files/telegraf.conf
-{%- if monitoring.cluster_stats is defined %}
+ {%- if monitoring.cluster_stats is defined %}
ceph_user: client.{{ monitoring.cluster_stats.ceph_user|default('admin') }}
gather_admin_socket_stats: {{ monitoring.cluster_stats.gather_admin_socket_stats|default('false') }}
gather_cluster_stats: {{ monitoring.cluster_stats.gather_cluster_stats|default('true') }}
gather_pool_loads: {{ monitoring.cluster_stats.gather_pool_loads|default('true') }}
-{%- if monitoring.cluster_stats.ceph_binary is defined %}
+ {%- if monitoring.cluster_stats.ceph_binary is defined %}
ceph_binary: {{ monitoring.cluster_stats.ceph_binary }}
-{%- endif %}
-{%- if monitoring.cluster_stats.socket_dir is defined %}
+ {%- endif %}
+ {%- if monitoring.cluster_stats.socket_dir is defined %}
socket_dir: {{ monitoring.cluster_stats.socket_dir }}
-{%- endif %}
-{%- if monitoring.cluster_stats.mon_prefix is defined %}
+ {%- endif %}
+ {%- if monitoring.cluster_stats.mon_prefix is defined %}
mon_prefix: {{ monitoring.cluster_stats.mon_prefix }}
-{%- endif %}
-{%- if monitoring.cluster_stats.osd_prefix is defined %}
+ {%- endif %}
+ {%- if monitoring.cluster_stats.osd_prefix is defined %}
osd_prefix: {{ monitoring.cluster_stats.osd_prefix }}
-{%- endif %}
-{%- if monitoring.interval is defined %}
+ {%- endif %}
+ {%- if monitoring.interval is defined %}
interval: {{ monitoring.interval }}
-{%- endif %}
-{%- else %}
+ {%- endif %}
+ {%- else %}
ceph_user: client.admin
gather_admin_socket_stats: false
gather_cluster_stats: true
gather_pool_loads: true
-{%- endif %}
+ {%- endif %}
agent:
input:
ceph:
template: ceph/files/telegraf.conf
-{%- if monitoring.cluster_stats is defined %}
+ {%- if monitoring.cluster_stats is defined %}
ceph_user: client.{{ monitoring.cluster_stats.ceph_user|default('admin') }}
gather_admin_socket_stats: {{ monitoring.cluster_stats.gather_admin_socket_stats|default('true') }}
gather_cluster_stats: {{ monitoring.cluster_stats.gather_cluster_stats|default('false') }}
gather_pool_loads: {{ monitoring.cluster_stats.gather_pool_loads|default('false') }}
-{%- if monitoring.cluster_stats.ceph_binary is defined %}
+ {%- if monitoring.cluster_stats.ceph_binary is defined %}
ceph_binary: {{ monitoring.cluster_stats.ceph_binary }}
-{%- endif %}
-{%- if monitoring.cluster_stats.socket_dir is defined %}
+ {%- endif %}
+ {%- if monitoring.cluster_stats.socket_dir is defined %}
socket_dir: {{ monitoring.cluster_stats.socket_dir }}
-{%- endif %}
-{%- if monitoring.cluster_stats.mon_prefix is defined %}
+ {%- endif %}
+ {%- if monitoring.cluster_stats.mon_prefix is defined %}
mon_prefix: {{ monitoring.cluster_stats.mon_prefix }}
-{%- endif %}
-{%- if monitoring.cluster_stats.osd_prefix is defined %}
+ {%- endif %}
+ {%- if monitoring.cluster_stats.osd_prefix is defined %}
osd_prefix: {{ monitoring.cluster_stats.osd_prefix }}
-{%- endif %}
-{%- if monitoring.interval is defined %}
+ {%- endif %}
+ {%- if monitoring.interval is defined %}
interval: {{ monitoring.interval }}
-{%- endif %}
-{%- else %}
+ {%- endif %}
+ {%- else %}
ceph_user: client.admin
gather_admin_socket_stats: true
gather_cluster_stats: false
gather_pool_loads: false
-{%- endif %}
-{%- elif monitoring.get('cluster_stats').get('enabled') %}
+ {%- endif %}
+ {%- elif monitoring.get('cluster_stats').get('enabled') %}
remote_agent:
input:
ceph:
@@ -70,33 +73,36 @@
gather_admin_socket_stats: false
gather_cluster_stats: true
gather_pool_loads: true
-{%- endif %}
+ {%- endif -%}
-{%- if osd is defined and osd.get('enabled') %}
+ {%- if osd is defined and osd.get('enabled') %}
agent:
input:
ceph:
template: ceph/files/telegraf.conf
fieldpass: [ "apply_latency*", "commitcycle_latency*", "op_latency*", "osdop_append", "osdop_delete", "osdop_read", "osdop_write", "recovery_ops" ]
-{%- if monitoring.node_stats is defined %}
+ {%- if monitoring.node_stats is defined %}
gather_admin_socket_stats: {{ monitoring.node_stats.gather_admin_socket_stats|default('true') }}
gather_cluster_stats: {{ monitoring.node_stats.gather_cluster_stats|default('false') }}
gather_pool_loads: {{ monitoring.cluster_stats.gather_pool_loads|default('false') }}
-{%- if monitoring.node_stats.socket_dir is defined %}
+ {%- if monitoring.node_stats.socket_dir is defined %}
socket_dir: {{ monitoring.node_stats.socket_dir }}
-{%- endif %}
-{%- if monitoring.node_stats.mon_prefix is defined %}
+ {%- endif %}
+ {%- if monitoring.node_stats.mon_prefix is defined %}
mon_prefix: {{ monitoring.node_stats.mon_prefix }}
-{%- endif %}
-{%- if monitoring.node_stats.osd_prefix is defined %}
+ {%- endif %}
+ {%- if monitoring.node_stats.osd_prefix is defined %}
osd_prefix: {{ monitoring.node_stats.osd_prefix }}
-{%- endif %}
-{%- if monitoring.interval is defined %}
+ {%- endif %}
+ {%- if monitoring.interval is defined %}
interval: {{ monitoring.interval }}
-{%- endif %}
-{%- else %}
+ {%- endif %}
+ {%- else %}
gather_admin_socket_stats: true
gather_cluster_stats: false
gather_pool_loads: false
-{%- endif %}
+ {%- endif %}
+ {%- endif -%}
+
+ {%- endif %}
{%- endif %}
diff --git a/ceph/mgr.sls b/ceph/mgr.sls
index bfc58b1..ef07e80 100644
--- a/ceph/mgr.sls
+++ b/ceph/mgr.sls
@@ -74,7 +74,7 @@
enable_ceph_dashboard:
cmd.run:
- name: "ceph -c /etc/ceph/{{ common.get('cluster_name', 'ceph') }}.conf mgr module enable dashboard"
- - unless: "ceph -c /etc/ceph/{{ common.get('cluster_name', 'ceph') }}.conf mgr module ls | grep dashboard"
+ - unless: "ceph -c /etc/ceph/{{ common.get('cluster_name', 'ceph') }}.conf mgr module ls | python -c 'import sys, json; print json.load(sys.stdin)[\"enabled_modules\"] | grep dashboard"
- require:
- file: common_config
@@ -83,7 +83,19 @@
disable_ceph_dashboard:
cmd.run:
- name: "ceph -c /etc/ceph/{{ common.get('cluster_name', 'ceph') }}.conf mgr module disable dashboard"
- - onlyif: "ceph -c /etc/ceph/{{ common.get('cluster_name', 'ceph') }}.conf mgr module ls | grep dashboard"
+ - unless: "ceph -c /etc/ceph/{{ common.get('cluster_name', 'ceph') }}.conf mgr module ls | python -c 'import sys, json; print json.load(sys.stdin)[\"disabled_modules\"] | grep dashboard"
+ - require:
+ - file: common_config
+ - file: /var/lib/ceph/mgr/{{ common.get('cluster_name', 'ceph') }}-{{ grains.host }}/
+
+{%- endif %}
+
+{%- if pillar.get('prometheus', {}).get('collector',{}).get("enabled", False) %}
+
+enable_prometheus_plugin:
+ cmd.run:
+ - name: "ceph -c /etc/ceph/{{ common.get('cluster_name', 'ceph') }}.conf mgr module enable prometheus"
+ - unless: "ceph -c /etc/ceph/{{ common.get('cluster_name', 'ceph') }}.conf mgr module ls | python -c 'import sys, json; print json.load(sys.stdin)[\"enabled_modules\"] | grep prometheus"
- require:
- file: common_config
- file: /var/lib/ceph/mgr/{{ common.get('cluster_name', 'ceph') }}-{{ grains.host }}/
@@ -92,4 +104,4 @@
{%- endif %}
-{%- endif %}
\ No newline at end of file
+{%- endif %}
diff --git a/ceph/setup/keyring.sls b/ceph/setup/keyring.sls
index bd49b45..f26c608 100644
--- a/ceph/setup/keyring.sls
+++ b/ceph/setup/keyring.sls
@@ -11,12 +11,6 @@
{% for keyring_name, keyring in common.get('keyring', {}).iteritems() %}
-{% set keyring_cmd = "ceph -c /etc/ceph/"+ common.get('cluster_name', 'ceph') + ".conf auth get-or-create client." %}
-{% set keyring_cap = [] %}
-{%- for cap_name, cap in keyring.caps.iteritems() %}
- {% do keyring_cap.append(cap_name + " '" + cap + "' ")%}
-{%- endfor %}
-
{%- if keyring.name is defined %}
{%- if keyring.name != 'admin' and keyring.key is defined and common.get("manage_keyring", False) %}
@@ -37,11 +31,10 @@
{%- elif keyring.name != 'admin' %}
-{% set key_contents = salt['cmd.shell'](keyring_cmd + keyring.name + " " + keyring_cap|join(" ")) %}
ceph_create_keyring_{{ keyring.name }}:
- file.managed:
- - name: {{ common.prefix_dir }}/etc/ceph/{{ common.get('cluster_name', 'ceph') }}.client.{{ keyring.name }}.keyring
- - contents: {{ key_contents | yaml_encode }}
+ cmd.run:
+ - name: "ceph -c /etc/ceph/{{ common.get('cluster_name', 'ceph') }}.conf auth get-or-create client.{{ keyring.name }} {%- for cap_name, cap in keyring.caps.iteritems() %} {{ cap_name }} '{{ cap }}' {%- endfor %} > {{ common.prefix_dir }}/etc/ceph/{{ common.get('cluster_name', 'ceph') }}.client.{{ keyring.name }}.keyring"
+ - unless: "test -f {{ common.prefix_dir }}/etc/ceph/{{ common.get('cluster_name', 'ceph') }}.client.{{ keyring.name }}.keyring"
{%- endif %}
@@ -65,11 +58,10 @@
{%- elif keyring_name != 'admin' %}
-{% set key_contents = salt['cmd.shell'](keyring_cmd + keyring_name + " " + keyring_cap|join(" ")) %}
ceph_create_keyring_{{ keyring_name }}:
- file.managed:
- - name: {{ common.prefix_dir }}/etc/ceph/{{ common.get('cluster_name', 'ceph') }}.client.{{ keyring_name }}.keyring
- - contents: {{ key_contents | yaml_encode }}
+ cmd.run:
+ - name: "ceph -c /etc/ceph/{{ common.get('cluster_name', 'ceph') }}.conf auth get-or-create client.{{ keyring_name }} {%- for cap_name, cap in keyring.caps.iteritems() %} {{ cap_name }} '{{ cap }}' {%- endfor %} > {{ common.prefix_dir }}/etc/ceph/{{ common.get('cluster_name', 'ceph') }}.client.{{ keyring_name }}.keyring"
+ - unless: "test -f {{ common.prefix_dir }}/etc/ceph/{{ common.get('cluster_name', 'ceph') }}.client.{{ keyring_name }}.keyring"
{%- endif %}
diff --git a/debian/control b/debian/control
index e952b46..17cfb41 100644
--- a/debian/control
+++ b/debian/control
@@ -1,12 +1,12 @@
Source: salt-formula-ceph
-Maintainer: Jakub Pavlik <jakub.pavlik@tcpcloud.eu>
+Maintainer: Mirantis Dev <dev@mirantis.com>
Section: admin
Priority: optional
Build-Depends: salt-master, python, python-yaml, debhelper (>= 9)
Standards-Version: 3.9.6
-Homepage: http://www.tcpcloud.eu
-Vcs-Browser: https://github.com/tcpcloud/salt-formula-ceph
-Vcs-Git: https://github.com/tcpcloud/salt-formula-ceph.git
+Homepage: https://www.mirantis.com
+Vcs-Browser: https://gerrit.mcp.mirantis.com/#/admin/projects/salt-formulas/ceph
+Vcs-Git: https://gerrit.mcp.mirantis.com/salt-formulas/ceph.git
Package: salt-formula-ceph
Architecture: all
diff --git a/debian/copyright b/debian/copyright
index a7f2a6c..80fbb95 100644
--- a/debian/copyright
+++ b/debian/copyright
@@ -1,12 +1,12 @@
Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
Upstream-Name: salt-formula-ceph
-Upstream-Contact: Jakub Pavlik <jakub.pavlik@tcpcloud.eu>
-Source: https://github.com/tcpcloud/salt-formula-ceph
+Upstream-Contact: Mirantis Dev <dev@mirantis.com>
+Source: https://gerrit.mcp.mirantis.com/#/admin/projects/salt-formulas/ceph
Files: *
-Copyright: 2014-2015 tcp cloud
+Copyright: 2014-2019 Mirantis Inc. et al
License: Apache-2.0
- Copyright (C) 2014-2015 tcp cloud
+ Copyright (C) 2014-2019 Mirantis Inc. et al
.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
diff --git a/metadata.yml b/metadata.yml
index e4d8b71..a8aeed0 100644
--- a/metadata.yml
+++ b/metadata.yml
@@ -1,3 +1,3 @@
name: "ceph"
version: "0.2"
-source: "https://github.com/salt-formulas/salt-formula-ceph"
+source: "https://gerrit.mcp.mirantis.com/salt-formulas/ceph"
diff --git a/metadata/service/support.yml b/metadata/service/support.yml
index 2be3736..5c87d50 100644
--- a/metadata/service/support.yml
+++ b/metadata/service/support.yml
@@ -15,3 +15,5 @@
enabled: true
grafana:
enabled: true
+ fluentd:
+ enabled: true