ceph pipelines Change-Id: Ib732e266646c5ffc8c2552198ff8b67827629fb2

commit: 2c00f4ca314977c9e5037bfcda52f0d69cd8694c [log] [tgz]
author: Jiri Broulik <jbroulik@mirantis.com> Thu Oct 26 13:23:11 2017 +0200
committer: Jiri Broulik <jbroulik@mirantis.com> Fri Oct 27 17:30:21 2017 +0200
tree: 4df49290c5fc9911abf37223dec5e2cd2e4530c1
parent: 544605144e29b15c9ceb5ef1fcf0c9cfb114f892 [diff] [blame]
diff --git a/ceph-replace-failed-osd.groovy b/ceph-replace-failed-osd.groovy
new file mode 100644
index 0000000..e00160c
--- /dev/null
+++ b/ceph-replace-failed-osd.groovy

@@ -0,0 +1,184 @@
+/**
+ *
+ * Replace failed disk with a new disk
+ *
+ * Requred parameters:
+ *  SALT_MASTER_URL                 URL of Salt master
+ *  SALT_MASTER_CREDENTIALS         Credentials to the Salt API
+ *
+ *  HOST                            Host (minion id) to be removed
+ *  ADMIN_HOST                      Host (minion id) with admin keyring and /etc/crushmap file present
+ *  OSD                             Failed OSD ids to be replaced (comma-separated list - 1,2,3)
+ *  DEVICE                          Comma separated list of failed devices that will be replaced at HOST (/dev/sdb,/dev/sdc)
+ *  JOURNAL_OR_BLOCKDB_PARTITION    Comma separated list of partitions where journal or block_db for the failed devices on this HOST were stored (/dev/sdh2,/dev/sdh3)
+ *  ENFORCE_CRUSHMAP                Set to true if the prepared crush map should be enforced
+ *  WAIT_FOR_PG_REBALANCE           Wait for PGs to rebalance after osd is removed from crush map
+ *  CLUSTER_FLAGS                   Comma separated list of tags to apply to cluster
+ *  WAIT_FOR_HEALTHY                Wait for cluster rebalance before stoping daemons
+ *
+ */
+
+common = new com.mirantis.mk.Common()
+salt = new com.mirantis.mk.Salt()
+def python = new com.mirantis.mk.Python()
+
+def pepperEnv = "pepperEnv"
+def flags = CLUSTER_FLAGS.tokenize(',')
+def osds = OSD.tokenize(',')
+def devices = DEVICE.tokenize(',')
+def journals_blockdbs = JOURNAL_OR_BLOCKDB_PARTITION.tokenize(',')
+
+
+def runCephCommand(master, target, cmd) {
+    return salt.cmdRun(master, target, cmd)
+}
+
+node("python") {
+
+    // create connection to salt master
+    python.setupPepperVirtualenv(pepperEnv, SALT_MASTER_URL, SALT_MASTER_CREDENTIALS)
+
+    if (flags.size() > 0) {
+        stage('Set cluster flags') {
+            for (flag in flags) {
+                runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd set ' + flag)
+            }
+        }
+    }
+
+    def osd_ids = []
+
+    print("osds:")
+    print(osds)
+
+    // get list of osd disks of the host
+    def ceph_disks = salt.getGrain(pepperEnv, HOST, 'ceph')['return'][0].values()[0].values()[0]['ceph_disk']
+    common.prettyPrint(ceph_disks)
+
+    for (i in ceph_disks) {
+        def osd_id = i.getKey().toString()
+        if (osd_id in osds || OSD == '*') {
+            osd_ids.add('osd.' + osd_id)
+            print("Will delete " + osd_id)
+        } else {
+            print("Skipping " + osd_id)
+        }
+    }
+
+    // `ceph osd out <id> <id>`
+    stage('Set OSDs out') {
+            runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd out ' + osd_ids.join(' '))
+    }
+
+    // wait for healthy cluster
+    if (common.validInputParam('WAIT_FOR_HEALTHY') && WAIT_FOR_HEALTHY.toBoolean()) {
+        stage('Waiting for healthy cluster') {
+            while (true) {
+                def health = runCephCommand(pepperEnv, ADMIN_HOST, 'ceph health')['return'][0].values()[0]
+                if (health.contains('HEALTH OK')) {
+                    common.infoMsg('Cluster is healthy')
+                    break;
+                }
+                sleep(60)
+            }
+        }
+    }
+
+    // stop osd daemons
+    stage('Stop OSD daemons') {
+        for (i in osd_ids) {
+            salt.runSaltProcessStep(pepperEnv, HOST, 'service.stop', ['ceph-osd@' + i.replaceAll('osd.', '')],  null, true)
+        }
+    }
+
+    // `ceph osd crush remove osd.2`
+    stage('Remove OSDs from CRUSH') {
+        for (i in osd_ids) {
+            runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd crush remove ' + i)
+        }
+    }
+
+    // wait for pgs to rebalance
+    if (WAIT_FOR_PG_REBALANCE.toBoolean() == true) {
+        stage('Waiting for pgs to rebalance') {
+            while (true) {
+                def status = runCephCommand(pepperEnv, ADMIN_HOST, 'ceph -s')['return'][0].values()[0]
+                if (!status.contains('degraded')) {
+                    common.infoMsg('PGs rebalanced')
+                    break;
+                }
+                sleep(3)
+            }
+        }
+    }
+
+    // remove keyring `ceph auth del osd.3`
+    stage('Remove OSD keyrings from auth') {
+        for (i in osd_ids) {
+            runCephCommand(pepperEnv, ADMIN_HOST, 'ceph auth del ' + i)
+        }
+    }
+
+    // remove osd `ceph osd rm osd.3`
+    stage('Remove OSDs') {
+        for (i in osd_ids) {
+            runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd rm ' + i)
+        }
+    }
+
+    // remove cluster flags
+    if (flags.size() > 0) {
+        stage('Unset cluster flags') {
+            for (flag in flags) {
+                common.infoMsg('Removing flag ' + flag)
+                runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd unset ' + flag)
+            }
+        }
+    }
+
+    // umount `umount /dev/sdi1`
+    stage('Umount devices') {
+        for (dev in devices) {
+            runCephCommand(pepperEnv, HOST, 'umount ' + dev + '1')
+        }
+    }
+
+    // zap disks `ceph-disk zap /dev/sdi`
+    stage('Zap devices') {
+        for (dev in devices) {
+            runCephCommand(pepperEnv, HOST, 'ceph-disk zap ' + dev)
+        }
+    }
+
+    // remove journal or block_db partition `parted /dev/sdj rm 3`
+    stage('Remove journal / block_db partitions') {
+        for (journal_blockdb in journals_blockdbs) {
+            if (journal_blockdb?.trim()) {
+                // dev = /dev/sdi
+                def dev = journal_blockdb.replaceAll("[0-9]", "")
+                // part_id = 2
+                def part_id = journal_blockdb.substring(journal_blockdb.lastIndexOf("/")+1).replaceAll("[^0-9]", "")
+                runCephCommand(pepperEnv, HOST, "parted ${dev} rm ${part_id}")
+            }
+        }
+    }
+
+    // Deploy failed Ceph OSD
+    stage('Deploy Ceph OSD') {
+        salt.enforceState(pepperEnv, HOST, 'ceph.osd', true)
+    }
+
+
+    if (ENFORCE_CRUSHMAP.toBoolean() == true) {
+
+        // enforce crushmap `crushtool -c /etc/ceph/crushmap -o /etc/ceph/crushmap.compiled; ceph osd setcrushmap -i /etc/ceph/crushmap.compiled`
+        stage('Enforce crushmap') {
+
+            stage('Ask for manual confirmation') {
+                input message: "Are you sure that your ADMIN_HOST has correct /etc/ceph/crushmap file? Click proceed to compile and enforce crushmap."
+            }
+            runCephCommand(pepperEnv, ADMIN_HOST, 'crushtool -c /etc/ceph/crushmap -o /etc/ceph/crushmap.compiled')
+            runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd setcrushmap -i /etc/ceph/crushmap.compiled')
+        }
+    }
+}
commit	2c00f4ca314977c9e5037bfcda52f0d69cd8694c	[log] [tgz]
author	Jiri Broulik <jbroulik@mirantis.com>	Thu Oct 26 13:23:11 2017 +0200
committer	Jiri Broulik <jbroulik@mirantis.com>	Fri Oct 27 17:30:21 2017 +0200
tree	4df49290c5fc9911abf37223dec5e2cd2e4530c1
parent	544605144e29b15c9ceb5ef1fcf0c9cfb114f892 [diff] [blame]