Merge "additional healthcheck while doing ceph mon backups" into release/2019.2.0
diff --git a/ceph/files/backup/ceph-backup-client-runner.sh b/ceph/files/backup/ceph-backup-client-runner.sh
index 971f944..329494a 100644
--- a/ceph/files/backup/ceph-backup-client-runner.sh
+++ b/ceph/files/backup/ceph-backup-client-runner.sh
@@ -8,6 +8,7 @@
TMPDIR="$( pwd )/tmp_ceph_backup"
HOSTNAME="$( hostname )"
TIMESTAMP="$( date +%m%d%H%M )"
+ HEALTH="$(ceph health)"
# Need write access to local directory to create dump file
if [ ! -w $( pwd ) ]; then
@@ -45,9 +46,14 @@
rsync -arv --exclude=osd/{{ common.get('cluster_name', 'ceph') }}-*/current /var/lib/ceph $TMPDIR/{{ common.get('cluster_name', 'ceph') }}-$HOSTNAME/
{%- elif mon.get('enabled', False) %}
cp -a /etc/ceph/ $TMPDIR/
- service ceph-mon@$HOSTNAME stop
- cp -a /var/lib/ceph/ $TMPDIR/{{ common.get('cluster_name', 'ceph') }}-$HOSTNAME/
- service ceph-mon@$HOSTNAME start
+ if echo $HEALTH | grep -v "mons down"; then
+ service ceph-mon@$HOSTNAME stop
+ cp -a /var/lib/ceph/ $TMPDIR/{{ common.get('cluster_name', 'ceph') }}-$HOSTNAME/
+ service ceph-mon@$HOSTNAME start
+ else
+ printf "One or more monitor nodes are already stopped or not working correctly. Cannot continue"
+ exit 1
+ fi
{%- endif %}
tar -cvzf $BACKUPDIR/$HOSTNAME/{{ common.get('cluster_name', 'ceph') }}-$HOSTNAME-$TIMESTAMP.tgz $TMPDIR