Jiri Broulik | dc87d72 | 2017-11-03 15:43:22 +0100 | [diff] [blame] | 1 | /** |
| 2 | * |
| 3 | * Filestore to Bluestore or vice versa backend migration |
| 4 | * |
| 5 | * Requred parameters: |
| 6 | * SALT_MASTER_URL URL of Salt master |
| 7 | * SALT_MASTER_CREDENTIALS Credentials to the Salt API |
| 8 | * |
| 9 | * ADMIN_HOST Host (minion id) with admin keyring and /etc/crushmap file present |
| 10 | * OSD OSD ids to be migrated if single OSD host is targeted (comma-separated list - 1,2,3) |
| 11 | * TARGET Hosts (minion ids) to be targeted |
| 12 | * CLUSTER_FLAGS Comma separated list of tags to apply to cluster |
| 13 | * WAIT_FOR_HEALTHY Wait for cluster rebalance before stoping daemons |
| 14 | * ORIGIN_BACKEND Ceph backend before upgrade |
Jiri Broulik | 96c867a | 2017-11-07 16:14:10 +0100 | [diff] [blame] | 15 | * PER_OSD_CONTROL Set to true if Ceph status verification after every osd disk migration is desired |
| 16 | * PER_OSD_HOST_CONTROL Set to true if Ceph status verificaton after whole OSD host migration is desired |
Jiri Broulik | dc87d72 | 2017-11-03 15:43:22 +0100 | [diff] [blame] | 17 | * |
| 18 | */ |
| 19 | |
| 20 | common = new com.mirantis.mk.Common() |
| 21 | salt = new com.mirantis.mk.Salt() |
| 22 | def python = new com.mirantis.mk.Python() |
| 23 | |
| 24 | MIGRATION_METHOD = "per-osd" |
| 25 | // TBD: per-host |
| 26 | |
| 27 | def pepperEnv = "pepperEnv" |
| 28 | def flags = CLUSTER_FLAGS.tokenize(',') |
| 29 | def osds = OSD.tokenize(',') |
| 30 | |
Jiri Broulik | a657d56 | 2017-11-28 14:19:32 +0100 | [diff] [blame] | 31 | def removePartition(master, target, partition_uuid) { |
| 32 | def partition = "" |
| 33 | try { |
| 34 | // partition = /dev/sdi2 |
| 35 | partition = runCephCommand(master, target, "blkid | grep ${partition_uuid} ")['return'][0].values()[0].split("(?<=[0-9])")[0] |
| 36 | } catch (Exception e) { |
| 37 | common.warningMsg(e) |
| 38 | } |
| 39 | |
| 40 | if (partition?.trim()) { |
| 41 | // dev = /dev/sdi |
| 42 | def dev = partition.replaceAll('\\d+$', "") |
| 43 | // part_id = 2 |
| 44 | def part_id = partition.substring(partition.lastIndexOf("/")+1).replaceAll("[^0-9]", "") |
| 45 | runCephCommand(master, target, "parted ${dev} rm ${part_id}") |
| 46 | } |
| 47 | return |
| 48 | } |
| 49 | |
Jiri Broulik | eb7b82f | 2017-11-30 13:55:40 +0100 | [diff] [blame] | 50 | def removeJournalOrBlockPartitions(master, target, id) { |
| 51 | |
| 52 | // remove journal, block_db, block_wal partition `parted /dev/sdj rm 3` |
| 53 | stage('Remove journal / block_db / block_wal partition') { |
| 54 | def partition_uuid = "" |
| 55 | def journal_partition_uuid = "" |
| 56 | def block_db_partition_uuid = "" |
| 57 | def block_wal_partition_uuid = "" |
| 58 | try { |
| 59 | journal_partition_uuid = runCephCommand(master, target, "ls -la /var/lib/ceph/osd/ceph-${id}/ | grep journal | grep partuuid") |
| 60 | journal_partition_uuid = journal_partition_uuid.toString().trim().split("\n")[0].substring(journal_partition_uuid.toString().trim().lastIndexOf("/")+1) |
| 61 | } catch (Exception e) { |
| 62 | common.infoMsg(e) |
| 63 | } |
| 64 | try { |
| 65 | block_db_partition_uuid = runCephCommand(master, target, "ls -la /var/lib/ceph/osd/ceph-${id}/ | grep 'block.db' | grep partuuid") |
| 66 | block_db_partition_uuid = block_db_partition_uuid.toString().trim().split("\n")[0].substring(block_db_partition_uuid.toString().trim().lastIndexOf("/")+1) |
| 67 | } catch (Exception e) { |
| 68 | common.infoMsg(e) |
| 69 | } |
| 70 | |
| 71 | try { |
| 72 | block_wal_partition_uuid = runCephCommand(master, target, "ls -la /var/lib/ceph/osd/ceph-${id}/ | grep 'block.wal' | grep partuuid") |
| 73 | block_wal_partition_uuid = block_wal_partition_uuid.toString().trim().split("\n")[0].substring(block_wal_partition_uuid.toString().trim().lastIndexOf("/")+1) |
| 74 | } catch (Exception e) { |
| 75 | common.infoMsg(e) |
| 76 | } |
| 77 | |
| 78 | // set partition_uuid = 2c76f144-f412-481e-b150-4046212ca932 |
| 79 | if (journal_partition_uuid?.trim()) { |
| 80 | partition_uuid = journal_partition_uuid |
| 81 | } else if (block_db_partition_uuid?.trim()) { |
| 82 | partition_uuid = block_db_partition_uuid |
| 83 | } |
| 84 | |
| 85 | // if disk has journal, block_db or block_wal on different disk, then remove the partition |
| 86 | if (partition_uuid?.trim()) { |
| 87 | removePartition(master, target, partition_uuid) |
| 88 | } |
| 89 | if (block_wal_partition_uuid?.trim()) { |
| 90 | removePartition(master, target, block_wal_partition_uuid) |
| 91 | } |
| 92 | } |
| 93 | return |
| 94 | } |
| 95 | |
Jiri Broulik | dc87d72 | 2017-11-03 15:43:22 +0100 | [diff] [blame] | 96 | def runCephCommand(master, target, cmd) { |
| 97 | return salt.cmdRun(master, target, cmd) |
| 98 | } |
| 99 | |
Jiri Broulik | 96c867a | 2017-11-07 16:14:10 +0100 | [diff] [blame] | 100 | def waitForHealthy(master, count=0, attempts=300) { |
| 101 | // wait for healthy cluster |
| 102 | while (count<attempts) { |
| 103 | def health = runCephCommand(master, ADMIN_HOST, 'ceph health')['return'][0].values()[0] |
| 104 | if (health.contains('HEALTH_OK')) { |
| 105 | common.infoMsg('Cluster is healthy') |
| 106 | break; |
| 107 | } |
| 108 | count++ |
| 109 | sleep(10) |
| 110 | } |
| 111 | } |
| 112 | |
Jiri Broulik | dc87d72 | 2017-11-03 15:43:22 +0100 | [diff] [blame] | 113 | node("python") { |
| 114 | |
| 115 | // create connection to salt master |
| 116 | python.setupPepperVirtualenv(pepperEnv, SALT_MASTER_URL, SALT_MASTER_CREDENTIALS) |
| 117 | |
| 118 | if (MIGRATION_METHOD == 'per-osd') { |
| 119 | |
| 120 | if (flags.size() > 0) { |
| 121 | stage('Set cluster flags') { |
| 122 | for (flag in flags) { |
| 123 | runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd set ' + flag) |
| 124 | } |
| 125 | } |
| 126 | } |
| 127 | |
| 128 | def target_hosts = salt.getMinions(pepperEnv, TARGET) |
| 129 | |
Jiri Broulik | eb7b82f | 2017-11-30 13:55:40 +0100 | [diff] [blame] | 130 | for (tgt in target_hosts) { |
Jiri Broulik | dc87d72 | 2017-11-03 15:43:22 +0100 | [diff] [blame] | 131 | def osd_ids = [] |
| 132 | |
Jiri Broulik | eb7b82f | 2017-11-30 13:55:40 +0100 | [diff] [blame] | 133 | // get list of osd disks of the tgt |
| 134 | salt.runSaltProcessStep(pepperEnv, tgt, 'saltutil.sync_grains', [], null, true, 5) |
| 135 | def ceph_disks = salt.getGrain(pepperEnv, tgt, 'ceph')['return'][0].values()[0].values()[0]['ceph_disk'] |
Jiri Broulik | dc87d72 | 2017-11-03 15:43:22 +0100 | [diff] [blame] | 136 | |
| 137 | for (i in ceph_disks) { |
| 138 | def osd_id = i.getKey().toString() |
| 139 | if (osd_id in osds || OSD == '*') { |
| 140 | osd_ids.add('osd.' + osd_id) |
| 141 | print("Will migrate " + osd_id) |
| 142 | } else { |
| 143 | print("Skipping " + osd_id) |
| 144 | } |
| 145 | } |
| 146 | |
| 147 | for (osd_id in osd_ids) { |
| 148 | |
| 149 | def id = osd_id.replaceAll('osd.', '') |
| 150 | def backend = runCephCommand(pepperEnv, ADMIN_HOST, "ceph osd metadata ${id} | grep osd_objectstore")['return'][0].values()[0] |
| 151 | |
Jiri Broulik | a657d56 | 2017-11-28 14:19:32 +0100 | [diff] [blame] | 152 | if (backend.contains(ORIGIN_BACKEND.toLowerCase())) { |
Jiri Broulik | dc87d72 | 2017-11-03 15:43:22 +0100 | [diff] [blame] | 153 | |
| 154 | // wait for healthy cluster before manipulating with osds |
| 155 | if (WAIT_FOR_HEALTHY.toBoolean() == true) { |
Jiri Broulik | 96c867a | 2017-11-07 16:14:10 +0100 | [diff] [blame] | 156 | waitForHealthy(pepperEnv) |
Jiri Broulik | dc87d72 | 2017-11-03 15:43:22 +0100 | [diff] [blame] | 157 | } |
| 158 | |
| 159 | // `ceph osd out <id> <id>` |
| 160 | stage('Set OSDs out') { |
| 161 | runCephCommand(pepperEnv, ADMIN_HOST, "ceph osd out ${osd_id}") |
| 162 | } |
| 163 | |
Jiri Broulik | dc87d72 | 2017-11-03 15:43:22 +0100 | [diff] [blame] | 164 | if (WAIT_FOR_HEALTHY.toBoolean() == true) { |
Jiri Broulik | 96c867a | 2017-11-07 16:14:10 +0100 | [diff] [blame] | 165 | sleep(5) |
| 166 | waitForHealthy(pepperEnv) |
Jiri Broulik | dc87d72 | 2017-11-03 15:43:22 +0100 | [diff] [blame] | 167 | } |
| 168 | |
| 169 | // stop osd daemons |
| 170 | stage('Stop OSD daemons') { |
Jiri Broulik | eb7b82f | 2017-11-30 13:55:40 +0100 | [diff] [blame] | 171 | salt.runSaltProcessStep(pepperEnv, tgt, 'service.stop', ['ceph-osd@' + osd_id.replaceAll('osd.', '')], null, true) |
Jiri Broulik | dc87d72 | 2017-11-03 15:43:22 +0100 | [diff] [blame] | 172 | } |
| 173 | |
| 174 | // remove keyring `ceph auth del osd.3` |
| 175 | stage('Remove OSD keyrings from auth') { |
| 176 | runCephCommand(pepperEnv, ADMIN_HOST, 'ceph auth del ' + osd_id) |
| 177 | } |
| 178 | |
| 179 | // remove osd `ceph osd rm osd.3` |
| 180 | stage('Remove OSDs') { |
| 181 | runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd rm ' + osd_id) |
| 182 | } |
| 183 | |
Jiri Broulik | eb7b82f | 2017-11-30 13:55:40 +0100 | [diff] [blame] | 184 | def dmcrypt = "" |
| 185 | try { |
| 186 | dmcrypt = runCephCommand(pepperEnv, tgt, "ls -la /var/lib/ceph/osd/ceph-${id}/ | grep dmcrypt")['return'][0].values()[0] |
| 187 | } catch (Exception e) { |
| 188 | common.warningMsg(e) |
| 189 | } |
Jiri Broulik | dc87d72 | 2017-11-03 15:43:22 +0100 | [diff] [blame] | 190 | |
Jiri Broulik | eb7b82f | 2017-11-30 13:55:40 +0100 | [diff] [blame] | 191 | if (dmcrypt?.trim()) { |
| 192 | def mount = runCephCommand(pepperEnv, tgt, "lsblk -rp | grep /var/lib/ceph/osd/ceph-${id} -B1")['return'][0].values()[0] |
| 193 | dev = mount.split()[0].replaceAll("[0-9]","") |
| 194 | |
| 195 | // remove partition tables |
| 196 | stage('dd part tables') { |
| 197 | runCephCommand(pepperEnv, tgt, "dd if=/dev/zero of=${dev} bs=512 count=1 conv=notrunc") |
Jiri Broulik | dc87d72 | 2017-11-03 15:43:22 +0100 | [diff] [blame] | 198 | } |
| 199 | |
Jiri Broulik | eb7b82f | 2017-11-30 13:55:40 +0100 | [diff] [blame] | 200 | // remove journal, block_db, block_wal partition `parted /dev/sdj rm 3` |
| 201 | removeJournalOrBlockPartitions(pepperEnv, tgt, id) |
| 202 | |
| 203 | // reboot |
| 204 | stage('reboot and wait') { |
| 205 | salt.runSaltProcessStep(pepperEnv, tgt, 'system.reboot', null, null, true, 5) |
| 206 | salt.minionsReachable(pepperEnv, 'I@salt:master', tgt) |
| 207 | sleep(10) |
Jiri Broulik | a657d56 | 2017-11-28 14:19:32 +0100 | [diff] [blame] | 208 | } |
| 209 | |
Jiri Broulik | eb7b82f | 2017-11-30 13:55:40 +0100 | [diff] [blame] | 210 | // zap disks `ceph-disk zap /dev/sdi` |
| 211 | stage('Zap devices') { |
| 212 | try { |
| 213 | runCephCommand(pepperEnv, tgt, 'ceph-disk zap ' + dev) |
| 214 | } catch (Exception e) { |
| 215 | common.warningMsg(e) |
| 216 | } |
| 217 | runCephCommand(pepperEnv, tgt, 'ceph-disk zap ' + dev) |
Jiri Broulik | dc87d72 | 2017-11-03 15:43:22 +0100 | [diff] [blame] | 218 | } |
| 219 | |
Jiri Broulik | eb7b82f | 2017-11-30 13:55:40 +0100 | [diff] [blame] | 220 | } else { |
| 221 | |
| 222 | def mount = runCephCommand(pepperEnv, tgt, "mount | grep /var/lib/ceph/osd/ceph-${id}")['return'][0].values()[0] |
| 223 | dev = mount.split()[0].replaceAll("[0-9]","") |
| 224 | |
| 225 | // remove journal, block_db, block_wal partition `parted /dev/sdj rm 3` |
| 226 | removeJournalOrBlockPartitions(pepperEnv, tgt, id) |
| 227 | |
| 228 | // umount `umount /dev/sdi1` |
| 229 | stage('Umount devices') { |
| 230 | runCephCommand(pepperEnv, tgt, "umount /var/lib/ceph/osd/ceph-${id}") |
Jiri Broulik | a657d56 | 2017-11-28 14:19:32 +0100 | [diff] [blame] | 231 | } |
Jiri Broulik | eb7b82f | 2017-11-30 13:55:40 +0100 | [diff] [blame] | 232 | |
| 233 | // zap disks `ceph-disk zap /dev/sdi` |
| 234 | stage('Zap device') { |
| 235 | runCephCommand(pepperEnv, tgt, 'ceph-disk zap ' + dev) |
Jiri Broulik | dc87d72 | 2017-11-03 15:43:22 +0100 | [diff] [blame] | 236 | } |
| 237 | } |
| 238 | |
Jiri Broulik | eb7b82f | 2017-11-30 13:55:40 +0100 | [diff] [blame] | 239 | // Deploy Ceph OSD |
Jiri Broulik | dc87d72 | 2017-11-03 15:43:22 +0100 | [diff] [blame] | 240 | stage('Deploy Ceph OSD') { |
Jiri Broulik | eb7b82f | 2017-11-30 13:55:40 +0100 | [diff] [blame] | 241 | salt.runSaltProcessStep(pepperEnv, tgt, 'saltutil.refresh_pillar', [], null, true, 5) |
| 242 | salt.enforceState(pepperEnv, tgt, 'ceph.osd', true) |
Jiri Broulik | dc87d72 | 2017-11-03 15:43:22 +0100 | [diff] [blame] | 243 | } |
Jiri Broulik | 96c867a | 2017-11-07 16:14:10 +0100 | [diff] [blame] | 244 | |
| 245 | if (PER_OSD_CONTROL.toBoolean() == true) { |
| 246 | stage("Verify backend version for osd.${id}") { |
| 247 | sleep(5) |
Jiri Broulik | eb7b82f | 2017-11-30 13:55:40 +0100 | [diff] [blame] | 248 | runCephCommand(pepperEnv, tgt, "ceph osd metadata ${id} | grep osd_objectstore") |
| 249 | runCephCommand(pepperEnv, tgt, "ceph -s") |
Jiri Broulik | 96c867a | 2017-11-07 16:14:10 +0100 | [diff] [blame] | 250 | } |
| 251 | |
| 252 | stage('Ask for manual confirmation') { |
| 253 | input message: "From the verification commands above, please check the backend version of osd.${id} and ceph status. If it is correct, Do you want to continue to migrate next osd?" |
| 254 | } |
| 255 | } |
Jiri Broulik | dc87d72 | 2017-11-03 15:43:22 +0100 | [diff] [blame] | 256 | } |
| 257 | } |
Jiri Broulik | 96c867a | 2017-11-07 16:14:10 +0100 | [diff] [blame] | 258 | if (PER_OSD_HOST_CONTROL.toBoolean() == true) { |
| 259 | stage("Verify backend versions") { |
| 260 | sleep(5) |
Jiri Broulik | eb7b82f | 2017-11-30 13:55:40 +0100 | [diff] [blame] | 261 | runCephCommand(pepperEnv, tgt, "ceph osd metadata | grep osd_objectstore -B2") |
| 262 | runCephCommand(pepperEnv, tgt, "ceph -s") |
Jiri Broulik | 96c867a | 2017-11-07 16:14:10 +0100 | [diff] [blame] | 263 | } |
| 264 | |
| 265 | stage('Ask for manual confirmation') { |
| 266 | input message: "From the verification command above, please check the ceph status and backend version of osds on this host. If it is correct, Do you want to continue to migrate next OSD host?" |
| 267 | } |
| 268 | } |
| 269 | |
Jiri Broulik | dc87d72 | 2017-11-03 15:43:22 +0100 | [diff] [blame] | 270 | } |
| 271 | // remove cluster flags |
| 272 | if (flags.size() > 0) { |
| 273 | stage('Unset cluster flags') { |
| 274 | for (flag in flags) { |
| 275 | common.infoMsg('Removing flag ' + flag) |
| 276 | runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd unset ' + flag) |
| 277 | } |
| 278 | } |
| 279 | } |
| 280 | } |
| 281 | } |