Add support for Ceph monitoring
This change adds support for Ceph monitoring:
- service.monitoring.cluster_stats is applied to monitoring nodes for
collecting cluster-wide metrics (through the Ceph CLI).
- service.monitoring.node_stats is applied to the Ceph nodes for
collecting monitor and OSD metrics (through Unix sockets).
Because Telegraf runs as a container on the monitoring nodes and
requires a working Ceph client configuration, this change also adds
support for deploying Ceph client in container mode.
Change-Id: If7359aca34a350f2c8ee2251bbe8a85314550a45
diff --git a/README.rst b/README.rst
index c34cdc6..a8fa04a 100644
--- a/README.rst
+++ b/README.rst
@@ -65,7 +65,7 @@
glance:
key: 00000000000000000000000000000000000000==
-Client pillar - ussually located at cinder-volume or glance-registry.
+Client pillar - usually located at cinder-volume or glance-registry.
.. code-block:: yaml
@@ -92,6 +92,34 @@
glance:
key: 00000000000000000000000000000000000000==
+Monitoring Ceph cluster - collect cluster metrics
+
+.. code-block:: yaml
+
+ ceph:
+ client:
+ config:
+ global:
+ mon initial members: ceph1,ceph2,ceph3
+ mon host: 10.103.255.252:6789,10.103.255.253:6789,10.103.255.254:6789
+ keyring:
+ monitoring:
+ key: 00000000000000000000000000000000000000==
+ monitoring:
+ cluster_stats:
+ enabled: true
+ ceph_user: monitoring
+
+Monitoring Ceph services - collect metrics from monitor and OSD services
+
+.. code-block:: yaml
+
+ ceph:
+ monitoring:
+ node_stats:
+ enabled: true
+
+
Read more
=========
diff --git a/ceph/client.sls b/ceph/client.sls
index 3361772..bd027a6 100644
--- a/ceph/client.sls
+++ b/ceph/client.sls
@@ -1,11 +1,13 @@
{%- from "ceph/map.jinja" import client with context %}
{%- if client.enabled %}
+{% if not client.container_mode %}
ceph_client_packages:
pkg.installed:
- names: {{ client.pkgs }}
+{%- endif %}
-/etc/ceph:
+{{ client.prefix_dir }}/etc/ceph:
file.directory:
- user: root
- group: root
@@ -14,7 +16,7 @@
{%- for keyring_name, keyring in client.keyring.iteritems() %}
-/etc/ceph/ceph.client.{{ keyring_name }}.keyring:
+{{ client.prefix_dir }}/etc/ceph/ceph.client.{{ keyring_name }}.keyring:
file.managed:
- user: root
- group: root
@@ -24,13 +26,15 @@
- contents: |
[client.{{ keyring_name }}]
- require:
- - file: /etc/ceph
+ - file: {{ client.prefix_dir }}/etc/ceph
ini.options_present:
- sections:
client.{{ keyring_name }}: {{ keyring|yaml }}
+{% if not client.container_mode %}
- require:
- pkg: ceph_client_packages
+{%- endif %}
{%- endfor %}
@@ -43,7 +47,7 @@
{%- set _dummy = config.update(config_fragment) %}
{%- endfor %}
-/etc/ceph/ceph.conf:
+{{ client.prefix_dir }}/etc/ceph/ceph.conf:
file.managed:
- user: root
- group: root
@@ -53,12 +57,14 @@
- contents: |
[global]
- require:
- - file: /etc/ceph
+ - file: {{ client.prefix_dir }}/etc/ceph
ini.options_present:
- sections: {{ config|yaml }}
- require:
+{% if not client.container_mode %}
- pkg: ceph_client_packages
- - file: /etc/ceph
+{%- endif %}
+ - file: {{ client.prefix_dir }}/etc/ceph
{%- endif %}
diff --git a/ceph/files/telegraf.conf b/ceph/files/telegraf.conf
new file mode 100644
index 0000000..2ce6c27
--- /dev/null
+++ b/ceph/files/telegraf.conf
@@ -0,0 +1,33 @@
+[[inputs.ceph]]
+{%- if values.interval is defined %}
+ interval = "{{ values.interval }}"
+{%- endif %}
+ gather_admin_socket_stats = {{ values.gather_admin_socket_stats|lower }}
+ gather_cluster_stats = {{ values.gather_cluster_stats|lower }}
+ gather_pool_loads = {{ values.gather_pool_loads|lower }}
+{%- if values.ceph_binary is defined %}
+ ceph_binary = "{{ values.ceph_binary }}"
+{%- endif %}
+{%- if values.rados_binary is defined %}
+ rados_binary = "{{ values.rados_binary }}"
+{%- endif %}
+{%- if values.ceph_config is defined %}
+ ceph_config = "{{ values.ceph_config }}"
+{%- endif %}
+{%- if values.ceph_user is defined %}
+ ceph_user = "{{ values.ceph_user }}"
+{%- endif %}
+{%- if values.socket_dir is defined %}
+ socket_dir = "{{ values.socket_dir }}"
+{%- endif %}
+{%- if values.mon_prefix is defined %}
+ mon_prefix = "{{ values.mon_prefix }}"
+{%- endif %}
+{%- if values.osd_prefix is defined %}
+ osd_prefix = "{{ values.osd_prefix }}"
+{%- endif %}
+{%- if values.socket_suffix is defined %}
+ socket_suffix = "{{ values.socket_suffix }}"
+{%- endif %}
+
+{%- include 'telegraf/files/input/_tags.conf' %}
diff --git a/ceph/init.sls b/ceph/init.sls
index d44ffec..e6e7103 100644
--- a/ceph/init.sls
+++ b/ceph/init.sls
@@ -10,4 +10,7 @@
{% endif %}
{% if pillar.ceph.radosgw is defined %}
- ceph.radosgw
-{% endif %}
\ No newline at end of file
+{% endif %}
+{% if pillar.ceph.monitoring is defined %}
+- ceph.monitoring
+{% endif %}
diff --git a/ceph/map.jinja b/ceph/map.jinja
index 8eabda2..061bfbc 100644
--- a/ceph/map.jinja
+++ b/ceph/map.jinja
@@ -23,12 +23,23 @@
{% set client = salt['grains.filter_by']({
'Debian': {
'pkgs': ['ceph-common'],
+ 'container_mode': False,
+ 'prefix_dir': '',
},
'RedHat': {
'pkgs': ['ceph-common'],
+ 'container_mode': False,
+ 'prefix_dir': '',
},
}, merge=salt['pillar.get']('ceph:client')) %}
+{% set monitoring = salt['grains.filter_by']({
+ 'default': {
+ 'cluster_stats': {},
+ 'node_stats': {},
+ },
+}, merge=salt['pillar.get']('ceph:monitoring')) %}
+
{% set radosgw = salt['grains.filter_by']({
'Debian': {
'pkgs': ['ceph-common','radosgw','python-rados','librados2'],
diff --git a/ceph/meta/telegraf.yml b/ceph/meta/telegraf.yml
new file mode 100644
index 0000000..b608585
--- /dev/null
+++ b/ceph/meta/telegraf.yml
@@ -0,0 +1,43 @@
+{%- from "ceph/map.jinja" import monitoring with context %}
+
+{%- if monitoring.cluster_stats.get('enabled') and monitoring.cluster_stats.ceph_user is defined %}
+remote_agent:
+ input:
+ ceph:
+ template: ceph/files/telegraf.conf
+ ceph_user: client.{{ monitoring.cluster_stats.ceph_user }}
+{%- if monitoring.cluster_stats.ceph_binary is defined %}
+ ceph_binary: {{ monitoring.cluster_stats.ceph_binary }}
+{%- endif %}
+{%- if monitoring.rados_binary is defined %}
+ rados_binary: {{ monitoring.cluster_stats.rados_binary }}
+{%- endif %}
+ gather_admin_socket_stats: false
+ gather_cluster_stats: true
+ gather_pool_loads: {{ monitoring.cluster_stats.gather_pool_loads|default('true') }}
+{%- if monitoring.interval is defined %}
+ interval: {{ monitoring.interval }}
+{%- endif %}
+{%- endif %}
+
+{%- if monitoring.node_stats.get('enabled') %}
+agent:
+ input:
+ ceph:
+ template: ceph/files/telegraf.conf
+{%- if monitoring.node_stats.socket_dir is defined %}
+ socket_dir: {{ monitoring.node_stats.socket_dir }}
+{%- endif %}
+{%- if monitoring.node_stats.mon_prefix is defined %}
+ mon_prefix: {{ monitoring.node_stats.mon_prefix }}
+{%- endif %}
+{%- if monitoring.node_stats.osd_prefix is defined %}
+ osd_prefix: {{ monitoring.node_stats.osd_prefix }}
+{%- endif %}
+ gather_admin_socket_stats: true
+ gather_cluster_stats: false
+ gather_pool_loads: false
+{%- if monitoring.interval is defined %}
+ interval: {{ monitoring.interval }}
+{%- endif %}
+{%- endif %}
diff --git a/ceph/monitoring.sls b/ceph/monitoring.sls
new file mode 100644
index 0000000..6328255
--- /dev/null
+++ b/ceph/monitoring.sls
@@ -0,0 +1,8 @@
+{#
+
+The ceph.monitoring state is only required for the Telegraf plugins, the
+Prometeus alerts and the Grafana dashboards.
+
+So everything happens in ceph/meta/(grafana|telegraf|prometheus).yml.
+
+#}
diff --git a/metadata/service/client/container.yml b/metadata/service/client/container.yml
new file mode 100644
index 0000000..dfcd5a6
--- /dev/null
+++ b/metadata/service/client/container.yml
@@ -0,0 +1,8 @@
+applications:
+- ceph
+parameters:
+ ceph:
+ client:
+ enabled: true
+ container_mode: true
+ prefix_dir: ${_param:ceph_client_prefix_dir}
diff --git a/metadata/service/monitoring/cluster_stats.yml b/metadata/service/monitoring/cluster_stats.yml
new file mode 100644
index 0000000..02fc70a
--- /dev/null
+++ b/metadata/service/monitoring/cluster_stats.yml
@@ -0,0 +1,10 @@
+applications:
+- ceph
+classes:
+- service.ceph.support
+parameters:
+ ceph:
+ monitoring:
+ cluster_stats:
+ enabled: true
+ ceph_user: ${_param:ceph_monitoring_user}
diff --git a/metadata/service/monitoring/node_stats.yml b/metadata/service/monitoring/node_stats.yml
new file mode 100644
index 0000000..4462309
--- /dev/null
+++ b/metadata/service/monitoring/node_stats.yml
@@ -0,0 +1,9 @@
+applications:
+- ceph
+classes:
+- service.ceph.support
+parameters:
+ ceph:
+ monitoring:
+ node_stats:
+ enabled: true
diff --git a/metadata/service/support.yml b/metadata/service/support.yml
index 532aed8..d245299 100644
--- a/metadata/service/support.yml
+++ b/metadata/service/support.yml
@@ -9,3 +9,5 @@
enabled: true
sphinx:
enabled: true
+ telegraf:
+ enabled: true
diff --git a/tests/pillar/ceph_client_container.sls b/tests/pillar/ceph_client_container.sls
new file mode 100644
index 0000000..41fd7d1
--- /dev/null
+++ b/tests/pillar/ceph_client_container.sls
@@ -0,0 +1,26 @@
+ceph:
+ client:
+ enabled: true
+ container_mode: true
+ prefix_dir: /srv/volumes/ceph
+ config:
+ global:
+ fsid: 00000000-0000-0000-0000-000000000000
+ mon initial members: ceph1,ceph2,ceph3
+ mon host: 10.103.255.252:6789,10.103.255.253:6789,10.103.255.254:6789
+ osd_fs_mkfs_arguments_xfs:
+ osd_fs_mount_options_xfs: rw,noatime
+ network public: 10.0.0.0/24
+ network cluster: 10.0.0.0/24
+ osd_fs_type: xfs
+ osd:
+ osd journal size: 7500
+ filestore xattr use omap: true
+ mon:
+ mon debug dump transactions: false
+ keyring:
+ cinder:
+ key: 00000000000000000000000000000000000000==
+ glance:
+ key: 00000000000000000000000000000000000000==
+