Tomáš Kukrál | f72096d | 2017-08-11 12:58:03 +0200 | [diff] [blame] | 1 | /** |
| 2 | * |
| 3 | * Remove OSD from existing cluster |
| 4 | * |
| 5 | * Requred parameters: |
| 6 | * SALT_MASTER_URL URL of Salt master |
| 7 | * SALT_MASTER_CREDENTIALS Credentials to the Salt API |
| 8 | * |
| 9 | * HOST Host (minion id) to be removed |
Jiri Broulik | 2c00f4c | 2017-10-26 13:23:11 +0200 | [diff] [blame] | 10 | * OSD Comma separated list of osd ids to be removed |
Tomáš Kukrál | f72096d | 2017-08-11 12:58:03 +0200 | [diff] [blame] | 11 | * ADMIN_HOST Host (minion id) with admin keyring |
| 12 | * CLUSTER_FLAGS Comma separated list of tags to apply to cluster |
| 13 | * WAIT_FOR_HEALTHY Wait for cluster rebalance before stoping daemons |
| 14 | * |
| 15 | */ |
| 16 | |
| 17 | common = new com.mirantis.mk.Common() |
| 18 | salt = new com.mirantis.mk.Salt() |
chnyda | 625f4b4 | 2017-10-11 14:10:31 +0200 | [diff] [blame] | 19 | def python = new com.mirantis.mk.Python() |
Tomáš Kukrál | f72096d | 2017-08-11 12:58:03 +0200 | [diff] [blame] | 20 | |
chnyda | 625f4b4 | 2017-10-11 14:10:31 +0200 | [diff] [blame] | 21 | def pepperEnv = "pepperEnv" |
Tomáš Kukrál | f72096d | 2017-08-11 12:58:03 +0200 | [diff] [blame] | 22 | def flags = CLUSTER_FLAGS.tokenize(',') |
Tomáš Kukrál | 9d6228b | 2017-08-15 16:54:55 +0200 | [diff] [blame] | 23 | def osds = OSD.tokenize(',') |
Tomáš Kukrál | f72096d | 2017-08-11 12:58:03 +0200 | [diff] [blame] | 24 | |
Jiri Broulik | eb7b82f | 2017-11-30 13:55:40 +0100 | [diff] [blame] | 25 | |
Jiri Broulik | a5bc8f6 | 2018-01-31 15:04:40 +0100 | [diff] [blame] | 26 | def removePartition(master, target, partition_uuid, type='', id=-1) { |
| 27 | def partition = "" |
| 28 | if (type == 'lockbox') { |
| 29 | try { |
| 30 | // umount - partition = /dev/sdi2 |
| 31 | partition = runCephCommand(master, target, "lsblk -rp | grep -v mapper | grep ${partition_uuid} ")['return'][0].values()[0].split()[0] |
| 32 | runCephCommand(master, target, "umount ${partition}") |
| 33 | } catch (Exception e) { |
| 34 | common.warningMsg(e) |
| 35 | } |
| 36 | } else if (type == 'data') { |
| 37 | try { |
| 38 | // umount - partition = /dev/sdi2 |
| 39 | partition = runCephCommand(master, target, "df | grep /var/lib/ceph/osd/ceph-${id}")['return'][0].values()[0].split()[0] |
| 40 | runCephCommand(master, target, "umount ${partition}") |
| 41 | } catch (Exception e) { |
| 42 | common.warningMsg(e) |
| 43 | } |
| 44 | try { |
| 45 | // partition = /dev/sdi2 |
| 46 | partition = runCephCommand(master, target, "blkid | grep ${partition_uuid} ")['return'][0].values()[0].split(":")[0] |
| 47 | } catch (Exception e) { |
| 48 | common.warningMsg(e) |
| 49 | } |
| 50 | } else { |
| 51 | try { |
| 52 | // partition = /dev/sdi2 |
| 53 | partition = runCephCommand(master, target, "blkid | grep ${partition_uuid} ")['return'][0].values()[0].split(":")[0] |
| 54 | } catch (Exception e) { |
| 55 | common.warningMsg(e) |
| 56 | } |
| 57 | } |
Jiri Broulik | eb7b82f | 2017-11-30 13:55:40 +0100 | [diff] [blame] | 58 | if (partition?.trim()) { |
Mateusz Los | 0665907 | 2019-09-02 18:28:55 +0200 | [diff] [blame] | 59 | if (partition.contains("nvme")) { |
| 60 | // dev = /dev/nvme1n1p1 |
| 61 | def dev = partition.replaceAll('\\d+$', "") |
| 62 | print("Skipping " + dev) |
| 63 | // part_id = 2 |
| 64 | def part_id = partition.substring(partition.lastIndexOf("p")+1).replaceAll("[^0-9]+", "") |
| 65 | print("Skipping" + part_id) |
| 66 | runCephCommand(master, target, "Ignore | parted ${dev} rm ${part_id}") |
| 67 | } |
| 68 | else { |
| 69 | // dev = /dev/sdi |
| 70 | def dev = partition.replaceAll('\\d+$', "") |
| 71 | // part_id = 2 |
| 72 | def part_id = partition.substring(partition.lastIndexOf("/")+1).replaceAll("[^0-9]+", "") |
| 73 | runCephCommand(master, target, "Ignore | parted ${dev} rm ${part_id}") |
| 74 | } |
Jiri Broulik | eb7b82f | 2017-11-30 13:55:40 +0100 | [diff] [blame] | 75 | } |
| 76 | return |
| 77 | } |
| 78 | |
| 79 | def runCephCommand(master, target, cmd) { |
| 80 | return salt.cmdRun(master, target, cmd) |
Tomáš Kukrál | f72096d | 2017-08-11 12:58:03 +0200 | [diff] [blame] | 81 | } |
| 82 | |
Jiri Broulik | 96c867a | 2017-11-07 16:14:10 +0100 | [diff] [blame] | 83 | def waitForHealthy(master, count=0, attempts=300) { |
| 84 | // wait for healthy cluster |
| 85 | while (count<attempts) { |
| 86 | def health = runCephCommand(master, ADMIN_HOST, 'ceph health')['return'][0].values()[0] |
| 87 | if (health.contains('HEALTH_OK')) { |
| 88 | common.infoMsg('Cluster is healthy') |
| 89 | break; |
| 90 | } |
| 91 | count++ |
| 92 | sleep(10) |
| 93 | } |
| 94 | } |
Jakub Josef | a63f986 | 2018-01-11 17:58:38 +0100 | [diff] [blame] | 95 | timeout(time: 12, unit: 'HOURS') { |
| 96 | node("python") { |
Jiri Broulik | 96c867a | 2017-11-07 16:14:10 +0100 | [diff] [blame] | 97 | |
Jakub Josef | a63f986 | 2018-01-11 17:58:38 +0100 | [diff] [blame] | 98 | // create connection to salt master |
| 99 | python.setupPepperVirtualenv(pepperEnv, SALT_MASTER_URL, SALT_MASTER_CREDENTIALS) |
Tomáš Kukrál | f72096d | 2017-08-11 12:58:03 +0200 | [diff] [blame] | 100 | |
Jakub Josef | a63f986 | 2018-01-11 17:58:38 +0100 | [diff] [blame] | 101 | if (flags.size() > 0) { |
| 102 | stage('Set cluster flags') { |
| 103 | for (flag in flags) { |
| 104 | runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd set ' + flag) |
| 105 | } |
Tomáš Kukrál | f72096d | 2017-08-11 12:58:03 +0200 | [diff] [blame] | 106 | } |
| 107 | } |
Tomáš Kukrál | f72096d | 2017-08-11 12:58:03 +0200 | [diff] [blame] | 108 | |
Jakub Josef | a63f986 | 2018-01-11 17:58:38 +0100 | [diff] [blame] | 109 | def osd_ids = [] |
Tomáš Kukrál | f72096d | 2017-08-11 12:58:03 +0200 | [diff] [blame] | 110 | |
Jakub Josef | a63f986 | 2018-01-11 17:58:38 +0100 | [diff] [blame] | 111 | // get list of osd disks of the host |
| 112 | salt.runSaltProcessStep(pepperEnv, HOST, 'saltutil.sync_grains', [], null, true, 5) |
Jiri Broulik | a5bc8f6 | 2018-01-31 15:04:40 +0100 | [diff] [blame] | 113 | def cephGrain = salt.getGrain(pepperEnv, HOST, 'ceph') |
| 114 | |
Jakub Josef | ed670ca | 2018-01-18 14:22:20 +0100 | [diff] [blame] | 115 | if(cephGrain['return'].isEmpty()){ |
| 116 | throw new Exception("Ceph salt grain cannot be found!") |
| 117 | } |
Jiri Broulik | a5bc8f6 | 2018-01-31 15:04:40 +0100 | [diff] [blame] | 118 | common.print(cephGrain) |
Jakub Josef | ed670ca | 2018-01-18 14:22:20 +0100 | [diff] [blame] | 119 | def ceph_disks = cephGrain['return'][0].values()[0].values()[0]['ceph_disk'] |
Jakub Josef | a63f986 | 2018-01-11 17:58:38 +0100 | [diff] [blame] | 120 | common.prettyPrint(ceph_disks) |
Jiri Broulik | adc7ecd | 2017-10-18 06:59:27 +0200 | [diff] [blame] | 121 | |
Jakub Josef | a63f986 | 2018-01-11 17:58:38 +0100 | [diff] [blame] | 122 | for (i in ceph_disks) { |
| 123 | def osd_id = i.getKey().toString() |
| 124 | if (osd_id in osds || OSD == '*') { |
| 125 | osd_ids.add('osd.' + osd_id) |
| 126 | print("Will delete " + osd_id) |
| 127 | } else { |
| 128 | print("Skipping " + osd_id) |
Jiri Broulik | eb7b82f | 2017-11-30 13:55:40 +0100 | [diff] [blame] | 129 | } |
Jiri Broulik | eb7b82f | 2017-11-30 13:55:40 +0100 | [diff] [blame] | 130 | } |
Jakub Josef | a63f986 | 2018-01-11 17:58:38 +0100 | [diff] [blame] | 131 | |
| 132 | // wait for healthy cluster |
Mateusz Los | 6381196 | 2019-03-28 14:12:23 +0100 | [diff] [blame] | 133 | // if (WAIT_FOR_HEALTHY.toBoolean()) { |
| 134 | // waitForHealthy(pepperEnv) |
| 135 | // } |
| 136 | |
| 137 | if ( osd_ids == [] ) |
| 138 | { |
| 139 | currentBuild.result = 'SUCCESS' |
| 140 | return |
Jakub Josef | a63f986 | 2018-01-11 17:58:38 +0100 | [diff] [blame] | 141 | } |
| 142 | |
| 143 | // `ceph osd out <id> <id>` |
| 144 | stage('Set OSDs out') { |
| 145 | runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd out ' + osd_ids.join(' ')) |
| 146 | } |
| 147 | |
| 148 | // wait for healthy cluster |
Jakub Josef | ed670ca | 2018-01-18 14:22:20 +0100 | [diff] [blame] | 149 | if (WAIT_FOR_HEALTHY.toBoolean()) { |
Jakub Josef | a63f986 | 2018-01-11 17:58:38 +0100 | [diff] [blame] | 150 | sleep(5) |
| 151 | waitForHealthy(pepperEnv) |
| 152 | } |
| 153 | |
| 154 | // stop osd daemons |
| 155 | stage('Stop OSD daemons') { |
| 156 | for (i in osd_ids) { |
| 157 | salt.runSaltProcessStep(pepperEnv, HOST, 'service.stop', ['ceph-osd@' + i.replaceAll('osd.', '')], null, true) |
| 158 | } |
| 159 | } |
| 160 | |
| 161 | // `ceph osd crush remove osd.2` |
| 162 | stage('Remove OSDs from CRUSH') { |
| 163 | for (i in osd_ids) { |
| 164 | runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd crush remove ' + i) |
| 165 | } |
| 166 | } |
| 167 | |
| 168 | // remove keyring `ceph auth del osd.3` |
| 169 | stage('Remove OSD keyrings from auth') { |
| 170 | for (i in osd_ids) { |
| 171 | runCephCommand(pepperEnv, ADMIN_HOST, 'ceph auth del ' + i) |
| 172 | } |
| 173 | } |
| 174 | |
| 175 | // remove osd `ceph osd rm osd.3` |
| 176 | stage('Remove OSDs') { |
| 177 | for (i in osd_ids) { |
| 178 | runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd rm ' + i) |
| 179 | } |
| 180 | } |
| 181 | |
| 182 | for (osd_id in osd_ids) { |
Jakub Josef | a63f986 | 2018-01-11 17:58:38 +0100 | [diff] [blame] | 183 | id = osd_id.replaceAll('osd.', '') |
Jiri Broulik | a5bc8f6 | 2018-01-31 15:04:40 +0100 | [diff] [blame] | 184 | /* |
| 185 | |
Jakub Josef | a63f986 | 2018-01-11 17:58:38 +0100 | [diff] [blame] | 186 | def dmcrypt = "" |
Jiri Broulik | eb7b82f | 2017-11-30 13:55:40 +0100 | [diff] [blame] | 187 | try { |
Jakub Josef | a63f986 | 2018-01-11 17:58:38 +0100 | [diff] [blame] | 188 | dmcrypt = runCephCommand(pepperEnv, HOST, "ls -la /var/lib/ceph/osd/ceph-${id}/ | grep dmcrypt")['return'][0].values()[0] |
Jiri Broulik | eb7b82f | 2017-11-30 13:55:40 +0100 | [diff] [blame] | 189 | } catch (Exception e) { |
Jakub Josef | a63f986 | 2018-01-11 17:58:38 +0100 | [diff] [blame] | 190 | common.warningMsg(e) |
Jiri Broulik | eb7b82f | 2017-11-30 13:55:40 +0100 | [diff] [blame] | 191 | } |
| 192 | |
Jakub Josef | a63f986 | 2018-01-11 17:58:38 +0100 | [diff] [blame] | 193 | if (dmcrypt?.trim()) { |
| 194 | mount = runCephCommand(pepperEnv, HOST, "lsblk -rp | grep /var/lib/ceph/osd/ceph-${id} -B1")['return'][0].values()[0] |
| 195 | dev = mount.split()[0].replaceAll("[0-9]","") |
Jiri Broulik | eb7b82f | 2017-11-30 13:55:40 +0100 | [diff] [blame] | 196 | |
Jakub Josef | a63f986 | 2018-01-11 17:58:38 +0100 | [diff] [blame] | 197 | // remove partition tables |
| 198 | stage("dd part table on ${dev}") { |
| 199 | runCephCommand(pepperEnv, HOST, "dd if=/dev/zero of=${dev} bs=512 count=1 conv=notrunc") |
| 200 | } |
Jiri Broulik | eb7b82f | 2017-11-30 13:55:40 +0100 | [diff] [blame] | 201 | |
Jiri Broulik | eb7b82f | 2017-11-30 13:55:40 +0100 | [diff] [blame] | 202 | } |
Jiri Broulik | a5bc8f6 | 2018-01-31 15:04:40 +0100 | [diff] [blame] | 203 | */ |
| 204 | |
Jakub Josef | a63f986 | 2018-01-11 17:58:38 +0100 | [diff] [blame] | 205 | // remove journal, block_db, block_wal partition `parted /dev/sdj rm 3` |
| 206 | stage('Remove journal / block_db / block_wal partition') { |
| 207 | def partition_uuid = "" |
| 208 | def journal_partition_uuid = "" |
| 209 | def block_db_partition_uuid = "" |
| 210 | def block_wal_partition_uuid = "" |
| 211 | try { |
Jiri Broulik | a5bc8f6 | 2018-01-31 15:04:40 +0100 | [diff] [blame] | 212 | journal_partition_uuid = runCephCommand(pepperEnv, HOST, "cat /var/lib/ceph/osd/ceph-${id}/journal_uuid")['return'][0].values()[0].split("\n")[0] |
Jakub Josef | a63f986 | 2018-01-11 17:58:38 +0100 | [diff] [blame] | 213 | } catch (Exception e) { |
| 214 | common.infoMsg(e) |
| 215 | } |
| 216 | try { |
Jiri Broulik | a5bc8f6 | 2018-01-31 15:04:40 +0100 | [diff] [blame] | 217 | block_db_partition_uuid = runCephCommand(pepperEnv, HOST, "cat /var/lib/ceph/osd/ceph-${id}/block.db_uuid")['return'][0].values()[0].split("\n")[0] |
Jakub Josef | a63f986 | 2018-01-11 17:58:38 +0100 | [diff] [blame] | 218 | } catch (Exception e) { |
| 219 | common.infoMsg(e) |
| 220 | } |
| 221 | |
| 222 | try { |
Jiri Broulik | a5bc8f6 | 2018-01-31 15:04:40 +0100 | [diff] [blame] | 223 | block_wal_partition_uuid = runCephCommand(pepperEnv, HOST, "cat /var/lib/ceph/osd/ceph-${id}/block.wal_uuid")['return'][0].values()[0].split("\n")[0] |
Jakub Josef | a63f986 | 2018-01-11 17:58:38 +0100 | [diff] [blame] | 224 | } catch (Exception e) { |
| 225 | common.infoMsg(e) |
| 226 | } |
| 227 | |
Jiri Broulik | a5bc8f6 | 2018-01-31 15:04:40 +0100 | [diff] [blame] | 228 | // remove partition_uuid = 2c76f144-f412-481e-b150-4046212ca932 |
Jakub Josef | a63f986 | 2018-01-11 17:58:38 +0100 | [diff] [blame] | 229 | if (journal_partition_uuid?.trim()) { |
Jiri Broulik | a5bc8f6 | 2018-01-31 15:04:40 +0100 | [diff] [blame] | 230 | removePartition(pepperEnv, HOST, journal_partition_uuid) |
Jakub Josef | a63f986 | 2018-01-11 17:58:38 +0100 | [diff] [blame] | 231 | } |
Jiri Broulik | a5bc8f6 | 2018-01-31 15:04:40 +0100 | [diff] [blame] | 232 | if (block_db_partition_uuid?.trim()) { |
| 233 | removePartition(pepperEnv, HOST, block_db_partition_uuid) |
Jakub Josef | a63f986 | 2018-01-11 17:58:38 +0100 | [diff] [blame] | 234 | } |
| 235 | if (block_wal_partition_uuid?.trim()) { |
| 236 | removePartition(pepperEnv, HOST, block_wal_partition_uuid) |
| 237 | } |
Jiri Broulik | a5bc8f6 | 2018-01-31 15:04:40 +0100 | [diff] [blame] | 238 | |
| 239 | try { |
| 240 | runCephCommand(pepperEnv, HOST, "partprobe") |
| 241 | } catch (Exception e) { |
| 242 | common.warningMsg(e) |
| 243 | } |
| 244 | } |
| 245 | |
| 246 | // remove data / block / lockbox partition `parted /dev/sdj rm 3` |
| 247 | stage('Remove data / block / lockbox partition') { |
| 248 | def data_partition_uuid = "" |
| 249 | def block_partition_uuid = "" |
| 250 | def lockbox_partition_uuid = "" |
| 251 | try { |
| 252 | data_partition_uuid = runCephCommand(pepperEnv, HOST, "cat /var/lib/ceph/osd/ceph-${id}/fsid")['return'][0].values()[0].split("\n")[0] |
| 253 | common.print(data_partition_uuid) |
| 254 | } catch (Exception e) { |
| 255 | common.infoMsg(e) |
| 256 | } |
| 257 | try { |
| 258 | block_partition_uuid = runCephCommand(pepperEnv, HOST, "cat /var/lib/ceph/osd/ceph-${id}/block_uuid")['return'][0].values()[0].split("\n")[0] |
| 259 | } catch (Exception e) { |
| 260 | common.infoMsg(e) |
| 261 | } |
| 262 | |
| 263 | try { |
| 264 | lockbox_partition_uuid = data_partition_uuid |
| 265 | } catch (Exception e) { |
| 266 | common.infoMsg(e) |
| 267 | } |
| 268 | |
| 269 | // remove partition_uuid = 2c76f144-f412-481e-b150-4046212ca932 |
| 270 | if (block_partition_uuid?.trim()) { |
| 271 | removePartition(pepperEnv, HOST, block_partition_uuid) |
| 272 | } |
| 273 | if (data_partition_uuid?.trim()) { |
| 274 | removePartition(pepperEnv, HOST, data_partition_uuid, 'data', id) |
| 275 | } |
| 276 | if (lockbox_partition_uuid?.trim()) { |
| 277 | removePartition(pepperEnv, HOST, lockbox_partition_uuid, 'lockbox') |
| 278 | } |
Jiri Broulik | eb7b82f | 2017-11-30 13:55:40 +0100 | [diff] [blame] | 279 | } |
| 280 | } |
Jakub Josef | a63f986 | 2018-01-11 17:58:38 +0100 | [diff] [blame] | 281 | // remove cluster flags |
| 282 | if (flags.size() > 0) { |
| 283 | stage('Unset cluster flags') { |
| 284 | for (flag in flags) { |
| 285 | common.infoMsg('Removing flag ' + flag) |
| 286 | runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd unset ' + flag) |
| 287 | } |
Tomáš Kukrál | f72096d | 2017-08-11 12:58:03 +0200 | [diff] [blame] | 288 | } |
| 289 | } |
| 290 | } |
Tomáš Kukrál | f72096d | 2017-08-11 12:58:03 +0200 | [diff] [blame] | 291 | } |