Merge "additional healthcheck while doing ceph mon backups" into release/2019.2.0
diff --git a/ceph/files/backup/ceph-backup-client-runner.sh b/ceph/files/backup/ceph-backup-client-runner.sh
index 971f944..329494a 100644
--- a/ceph/files/backup/ceph-backup-client-runner.sh
+++ b/ceph/files/backup/ceph-backup-client-runner.sh
@@ -8,6 +8,7 @@
     TMPDIR="$( pwd )/tmp_ceph_backup"
     HOSTNAME="$( hostname )"
     TIMESTAMP="$( date +%m%d%H%M )"
+    HEALTH="$(ceph health)"
 
     # Need write access to local directory to create dump file
     if [ ! -w $( pwd ) ]; then
@@ -45,9 +46,14 @@
     rsync -arv --exclude=osd/{{ common.get('cluster_name', 'ceph') }}-*/current /var/lib/ceph $TMPDIR/{{ common.get('cluster_name', 'ceph') }}-$HOSTNAME/
 {%- elif mon.get('enabled', False) %}
     cp -a /etc/ceph/ $TMPDIR/
-    service ceph-mon@$HOSTNAME stop
-    cp -a /var/lib/ceph/ $TMPDIR/{{ common.get('cluster_name', 'ceph') }}-$HOSTNAME/
-    service ceph-mon@$HOSTNAME start
+    if echo $HEALTH | grep -v "mons down"; then
+      service ceph-mon@$HOSTNAME stop
+      cp -a /var/lib/ceph/ $TMPDIR/{{ common.get('cluster_name', 'ceph') }}-$HOSTNAME/
+      service ceph-mon@$HOSTNAME start
+    else
+      printf "One or more monitor nodes are already stopped or not working correctly. Cannot continue"
+      exit 1
+    fi
 {%- endif %}
 
     tar -cvzf $BACKUPDIR/$HOSTNAME/{{ common.get('cluster_name', 'ceph') }}-$HOSTNAME-$TIMESTAMP.tgz $TMPDIR