ceph mon backup
Change-Id: I58243e19910d74bcafb7de4da5c97f56faafa158
diff --git a/README.rst b/README.rst
index 71f2197..9a10052 100644
--- a/README.rst
+++ b/README.rst
@@ -77,7 +77,7 @@
* Cluster and public network
-Ceph cluster is accessed using network and thus you need to have decend capacity to handle all the client. There are two networks required for cluster: **public** network and cluster network. Public network is used for client connections and MONs and OSDs are listening on this network. Second network ic called **cluster** networks and this network is used for communication between OSDs.
+Ceph cluster is accessed using network and thus you need to have decend capacity to handle all the client. There are two networks required for cluster: **public** network and cluster network. Public network is used for client connections and MONs and OSDs are listening on this network. Second network ic called **cluster** networks and this network is used for communication between OSDs.
Both networks should have dedicated interfaces, bonding interfaces and dedicating vlans on bonded interfaces isn't allowed. Good practise is dedicate more throughput for the cluster network because cluster traffic is more important than client traffic.
@@ -644,6 +644,50 @@
pool_read_bytes_threshold: 70000000
pool_read_ops_threshold: 1000
+Ceph monitor backups
+--------------------
+
+Backup client with ssh/rsync remote host
+
+.. code-block:: yaml
+
+ ceph:
+ backup:
+ client:
+ enabled: true
+ full_backups_to_keep: 3
+ hours_before_full: 24
+ target:
+ host: cfg01
+
+
+Backup client with local backup only
+
+.. code-block:: yaml
+
+ ceph:
+ backup:
+ client:
+ enabled: true
+ full_backups_to_keep: 3
+ hours_before_full: 24
+
+Backup server rsync
+
+.. code-block:: yaml
+
+ ceph:
+ backup:
+ server:
+ enabled: true
+ hours_before_full: 24
+ full_backups_to_keep: 5
+ key:
+ ceph_pub_key:
+ enabled: true
+ key: ssh_rsa
+
+
More information
================
diff --git a/ceph/backup.sls b/ceph/backup.sls
new file mode 100644
index 0000000..047e7d6
--- /dev/null
+++ b/ceph/backup.sls
@@ -0,0 +1,129 @@
+{%- from "ceph/map.jinja" import backup with context %}
+
+{%- if backup.client is defined %}
+
+{%- if backup.client.enabled %}
+
+ceph_backup_client_packages:
+ pkg.installed:
+ - names: {{ backup.pkgs }}
+
+ceph_backup_runner_script:
+ file.managed:
+ - name: /usr/local/bin/ceph-backup-runner.sh
+ - source: salt://ceph/files/backup/ceph-backup-client-runner.sh
+ - template: jinja
+ - mode: 655
+ - require:
+ - pkg: ceph_backup_client_packages
+
+ceph_call_backup_runner_script:
+ file.managed:
+ - name: /usr/local/bin/ceph-backup-runner-call.sh
+ - source: salt://ceph/files/backup/ceph-backup-client-runner-call.sh
+ - template: jinja
+ - mode: 655
+ - require:
+ - pkg: ceph_backup_client_packages
+
+ceph_backup_dir:
+ file.directory:
+ - name: {{ backup.backup_dir }}/full
+ - user: root
+ - group: root
+ - makedirs: true
+
+ceph_backup_runner_cron:
+ cron.present:
+ - name: /usr/local/bin/ceph-backup-runner-call.sh
+ - user: root
+{%- if not backup.cron %}
+ - commented: True
+{%- endif %}
+ - minute: random
+{%- if backup.client.hours_before_full is defined %}
+{%- if backup.client.hours_before_full <= 23 and backup.client.hours_before_full > 1 %}
+ - hour: '*/{{ backup.client.hours_before_full }}'
+{%- elif not backup.client.hours_before_full <= 1 %}
+ - hour: 2
+{%- endif %}
+{%- else %}
+ - hour: 2
+{%- endif %}
+ - require:
+ - file: ceph_backup_runner_script
+ - file: ceph_call_backup_runner_script
+
+
+{%- endif %}
+
+{%- endif %}
+
+{%- if backup.server is defined %}
+
+{%- if backup.server.enabled %}
+
+ceph_backup_server_packages:
+ pkg.installed:
+ - names: {{ backup.pkgs }}
+
+ceph_user:
+ user.present:
+ - name: ceph
+ - system: true
+ - home: {{ backup.backup_dir }}
+
+{{ backup.backup_dir }}/full:
+ cmd.run:
+ - name: "mkdir -p {{ backup.backup_dir }}/full"
+ - runas: ceph
+ - unless: "test -d {{ backup.backup_dir }}"
+
+{%- for key_name, key in backup.server.key.iteritems() %}
+
+{%- if key.get('enabled', False) %}
+
+ceph_key_{{ key.key }}:
+ ssh_auth.present:
+ - user: ceph
+ - name: {{ key.key }}
+ - require:
+ - file: {{ backup.backup_dir }}/full
+
+
+{%- endif %}
+
+{%- endfor %}
+
+ceph_server_script:
+ file.managed:
+ - name: /usr/local/bin/ceph-backup-runner.sh
+ - source: salt://ceph/files/backup/ceph-backup-server-runner.sh
+ - template: jinja
+ - mode: 655
+ - require:
+ - pkg: ceph_backup_server_packages
+
+ceph_server_cron:
+ cron.present:
+ - name: /usr/local/bin/ceph-backup-runner.sh
+ - user: ceph
+{%- if not backup.cron %}
+ - commented: True
+{%- endif %}
+ - minute: random
+{%- if backup.server.hours_before_full is defined %}
+{%- if backup.server.hours_before_full <= 23 and backup.server.hours_before_full > 1 %}
+ - hour: '*/{{ backup.server.hours_before_full }}'
+{%- elif not backup.server.hours_before_full <= 1 %}
+ - hour: 2
+{%- endif %}
+{%- else %}
+ - hour: 2
+{%- endif %}
+ - require:
+ - file: ceph_server_script
+
+{%- endif %}
+
+{%- endif %}
diff --git a/ceph/files/backup/ceph-backup-client-runner-call.sh b/ceph/files/backup/ceph-backup-client-runner-call.sh
new file mode 100644
index 0000000..bc69521
--- /dev/null
+++ b/ceph/files/backup/ceph-backup-client-runner-call.sh
@@ -0,0 +1,79 @@
+{%- from "ceph/map.jinja" import backup with context -%}
+#!/bin/bash
+# Script to call ceph-backup-runner.sh in for loop to backup all keyspaces.
+# This script is also able to rsync backed up data to remote host and perform clean up on historical backups
+
+# Configuration
+# -------------
+ BACKUPDIR="{{ backup.backup_dir }}/full"
+ TMPDIR="$( pwd )/tmp_ceph_backup"
+ HOSTNAME="$( hostname )"
+ TIMESTAMP="$( date +%m%d%k%M )"
+
+ SCRIPTDIR="/usr/local/bin"
+ KEEP={{ backup.client.full_backups_to_keep }}
+ HOURSFULLBACKUPLIFE={{ backup.client.hours_before_full }} # Lifetime of the latest full backup in seconds
+ RSYNCLOGDIR="/var/log/backups"
+ RSYNCLOG="/var/log/backups/ceph-rsync.log"
+
+
+ if [ $HOURSFULLBACKUPLIFE -gt 24 ]; then
+ FULLBACKUPLIFE=$(( 24 * 60 * 60 ))
+ else
+ FULLBACKUPLIFE=$(( $HOURSFULLBACKUPLIFE * 60 * 60 ))
+ fi
+
+# Functions
+# ---------
+ function check_dependencies() {
+ # Function to iterate through a list of required executables to ensure
+ # they are installed and executable by the current user.
+ DEPS="awk basename cp cqlsh date dirname echo find "
+ DEPS+="getopt grep hostname mkdir rm sed tail tar "
+ for bin in $DEPS; do
+ $( which $bin >/dev/null 2>&1 ) || NOTFOUND+="$bin "
+ done
+
+ if [ ! -z "$NOTFOUND" ]; then
+ printf "Error finding required executables: ${NOTFOUND}\n" >&2
+ exit 1
+ fi
+ }
+
+
+ # Need write access to local directory to create dump file
+ if [ ! -w $( pwd ) ]; then
+ printf "You must have write access to the current directory $( pwd )\n"
+ exit 1
+ fi
+
+ if [ ! -d "$RSYNCLOGDIR" ] && [ ! -e "$RSYNCLOG" ]; then
+ mkdir -p "$RSYNCLOGDIR"
+ fi
+
+ $SCRIPTDIR/ceph-backup-runner.sh
+
+# rsync just the new or modified backup files
+# ---------
+
+ {%- if backup.client.target is defined %}
+ echo "Adding ssh-key of remote host to known_hosts"
+ ssh-keygen -R {{ backup.client.target.host }} 2>&1 | > $RSYNCLOG
+ ssh-keyscan {{ backup.client.target.host }} >> ~/.ssh/known_hosts 2>&1 | >> $RSYNCLOG
+ echo "Rsyncing files to remote host"
+ /usr/bin/rsync -rhtPv --rsync-path=rsync --progress $BACKUPDIR/* -e ssh ceph@{{ backup.client.target.host }}:$BACKUPDIR >> $RSYNCLOG
+
+ # Check if the rsync succeeded or failed
+ if [ -s $RSYNCLOG ] && ! grep -q "rsync error: " $RSYNCLOG; then
+ echo "Rsync to remote host completed OK"
+ else
+ echo "Rsync to remote host FAILED"
+ exit 1
+ fi
+ {%- endif %}
+
+# Cleanup
+# ---------
+ echo "Cleanup. Keeping only $KEEP full backups"
+ AGE=$(($FULLBACKUPLIFE * $KEEP / 60))
+ find $BACKUPDIR -maxdepth 1 -type d -mmin +$AGE -execdir echo "removing: "$BACKUPDIR/{} \; -execdir rm -rf $BACKUPDIR/{} \;
diff --git a/ceph/files/backup/ceph-backup-client-runner.sh b/ceph/files/backup/ceph-backup-client-runner.sh
new file mode 100644
index 0000000..56955c3
--- /dev/null
+++ b/ceph/files/backup/ceph-backup-client-runner.sh
@@ -0,0 +1,54 @@
+{%- from "ceph/map.jinja" import backup with context -%}
+#!/bin/bash
+# Script to backup ceph schema and create snapshot of keyspaces
+
+# Configuration
+# -------------
+ BACKUPDIR="{{ backup.backup_dir }}/full"
+ TMPDIR="$( pwd )/tmp_ceph_backup"
+ HOSTNAME="$( hostname )"
+ TIMESTAMP="$( date +%m%d%k%M )"
+
+ # Need write access to local directory to create dump file
+ if [ ! -w $( pwd ) ]; then
+ printf "You must have write access to the current directory $( pwd )\n"
+ exit 1
+ fi
+
+ # Create temporary working directory. Yes, deliberately avoiding mktemp
+ if [ ! -d "$TMPDIR" ] && [ ! -e "$TMPDIR" ]; then
+ mkdir -p "$TMPDIR"
+ else
+ printf "Error creating temporary directory $TMPDIR"
+ exit 1
+ fi
+
+ # Create backup directory.
+ if [ ! -d "$BACKUPDIR" ] && [ ! -e "$BACKUPDIR" ]; then
+ mkdir -p "$BACKUPDIR"
+ fi
+
+ # Create Backup
+ # --------------------
+
+ mkdir -p "$BACKUPDIR/$HOSTNAME/"
+
+ cp -a /etc/ceph/ $TMPDIR/
+ service ceph-mon@$HOSTNAME stop
+ cp -a /var/lib/ceph/mon/ceph-$HOSTNAME/ $TMPDIR/
+ service ceph-mon@$HOSTNAME start
+
+ tar -cvzf $BACKUPDIR/$HOSTNAME/ceph-$HOSTNAME-$TIMESTAMP.tgz $TMPDIR
+ RC=$?
+
+ if [ $RC -gt 0 ]; then
+ printf "Error generating tar archive.\n"
+ [ "$TMPDIR" != "/" ] && rm -rf "$TMPDIR"
+ exit 1
+ else
+ printf "Successfully created backup\n"
+ [ "$TMPDIR" != "/" ] && rm -rf "$TMPDIR"
+ exit 0
+ fi
+
+# Fin.
diff --git a/ceph/files/backup/ceph-backup-server-runner.sh b/ceph/files/backup/ceph-backup-server-runner.sh
new file mode 100644
index 0000000..3bcdc4c
--- /dev/null
+++ b/ceph/files/backup/ceph-backup-server-runner.sh
@@ -0,0 +1,21 @@
+{%- from "ceph/map.jinja" import backup with context -%}
+#!/bin/bash
+
+# Script to erase old backups on ceph 'server role' node.
+# ---------
+
+ BACKUPDIR="{{ backup.remote_backup_dir }}/full"
+ KEEP={{ backup.server.full_backups_to_keep }}
+ HOURSFULLBACKUPLIFE={{ backup.server.hours_before_full }} # Lifetime of the latest full backup in seconds
+
+ if [ $HOURSFULLBACKUPLIFE -gt 24 ]; then
+ FULLBACKUPLIFE=$(( 24 * 60 * 60 ))
+ else
+ FULLBACKUPLIFE=$(( $HOURSFULLBACKUPLIFE * 60 * 60 ))
+ fi
+
+# Cleanup
+# ---------
+ echo "Cleanup. Keeping only $KEEP full backups"
+ AGE=$(($FULLBACKUPLIFE * $KEEP / 60))
+ find $BACKUPDIR -maxdepth 1 -type d -mmin +$AGE -execdir echo "removing: "$BACKUPDIR/{} \; -execdir rm -rf $BACKUPDIR/{} \;
diff --git a/ceph/init.sls b/ceph/init.sls
index fbd14b9..638c6f5 100644
--- a/ceph/init.sls
+++ b/ceph/init.sls
@@ -3,6 +3,9 @@
- ceph.common
- ceph.setup.keyring
{% endif %}
+{% if pillar.ceph.backup is defined %}
+- ceph.backup
+{% endif %}
{% if pillar.ceph.mon is defined %}
- ceph.mon
{% endif %}
diff --git a/ceph/map.jinja b/ceph/map.jinja
index 4cc3ec5..1b9e559 100644
--- a/ceph/map.jinja
+++ b/ceph/map.jinja
@@ -90,5 +90,24 @@
{%- endload %}
{% set monitoring = salt['grains.filter_by'](monitoring_defaults, merge=salt['pillar.get']('ceph:monitoring')) %}
+{%- load_yaml as backup_defaults %}
+
+backup:
+ Debian:
+ pkgs:
+ - rsync
+ backup_dir: '/var/backups/ceph-mon'
+ cron: True
+ RedHat:
+ pkgs:
+ - rsync
+ backup_dir: '/var/backups/ceph-mon'
+ cron: True
+
+{%- endload %}
+
+{% set backup = salt['grains.filter_by'](backup_defaults['backup'], merge=salt['pillar.get']('ceph:backup', {})) %}
+
+
{#- vim:ft=sls
-#}
diff --git a/metadata/service/backup/client.sls b/metadata/service/backup/client.sls
new file mode 100644
index 0000000..463bc9a
--- /dev/null
+++ b/metadata/service/backup/client.sls
@@ -0,0 +1,11 @@
+applications:
+- ceph
+parameters:
+ ceph:
+ backup:
+ client:
+ enabled: true
+ full_backups_to_keep: 3
+ hours_before_full: 24
+ # target:
+ # host: cfg01
diff --git a/metadata/service/backup/server.sls b/metadata/service/backup/server.sls
new file mode 100644
index 0000000..181101e
--- /dev/null
+++ b/metadata/service/backup/server.sls
@@ -0,0 +1,13 @@
+applications:
+- ceph
+parameters:
+ ceph:
+ backup:
+ server:
+ enabled: true
+ hours_before_full: 24
+ full_backups_to_keep: 5
+ key:
+ ceph_pub_key:
+ enabled: true
+ key: ssh_rsa