Ivan Berezovskiy | 1443646 | 2019-11-05 17:42:09 +0400 | [diff] [blame] | 1 | package com.mirantis.mk |
| 2 | |
| 3 | /** |
Tomek Jaroszyk | b8dc1a1 | 2020-04-30 15:14:39 +0200 | [diff] [blame] | 4 | * Install and configure ceph clients |
Ivan Berezovskiy | 1443646 | 2019-11-05 17:42:09 +0400 | [diff] [blame] | 5 | * |
Tomek Jaroszyk | b8dc1a1 | 2020-04-30 15:14:39 +0200 | [diff] [blame] | 6 | * @param master Salt connection object |
| 7 | * @param extra_tgt Extra targets for compound |
Ivan Berezovskiy | 1443646 | 2019-11-05 17:42:09 +0400 | [diff] [blame] | 8 | */ |
Tomek Jaroszyk | b8dc1a1 | 2020-04-30 15:14:39 +0200 | [diff] [blame] | 9 | def installClient(master, extra_tgt='') { |
| 10 | def salt = new Salt() |
| 11 | |
| 12 | // install Ceph Radosgw |
| 13 | installRgw(master, "I@ceph:radosgw", extra_tgt) |
| 14 | |
| 15 | // setup keyring for Openstack services |
| 16 | salt.enforceStateWithTest([saltId: master, target: "I@ceph:common and I@glance:server $extra_tgt", state: ['ceph.common', 'ceph.setup.keyring']]) |
| 17 | salt.enforceStateWithTest([saltId: master, target: "I@ceph:common and I@cinder:controller $extra_tgt", state: ['ceph.common', 'ceph.setup.keyring']]) |
| 18 | salt.enforceStateWithTest([saltId: master, target: "I@ceph:common and I@nova:compute $extra_tgt", state: ['ceph.common', 'ceph.setup.keyring']]) |
| 19 | salt.enforceStateWithTest([saltId: master, target: "I@ceph:common and I@gnocchi:server $extra_tgt", state: ['ceph.common', 'ceph.setup.keyring']]) |
| 20 | } |
Ivan Berezovskiy | 1443646 | 2019-11-05 17:42:09 +0400 | [diff] [blame] | 21 | |
| 22 | /** |
Tomek Jaroszyk | b8dc1a1 | 2020-04-30 15:14:39 +0200 | [diff] [blame] | 23 | * Install and configure ceph monitor on target |
Ivan Berezovskiy | 1443646 | 2019-11-05 17:42:09 +0400 | [diff] [blame] | 24 | * |
Tomek Jaroszyk | b8dc1a1 | 2020-04-30 15:14:39 +0200 | [diff] [blame] | 25 | * @param master Salt connection object |
| 26 | * @param target Target specification, compliance to compound matcher in salt |
| 27 | * @param extra_tgt Extra targets for compound |
Ivan Berezovskiy | 1443646 | 2019-11-05 17:42:09 +0400 | [diff] [blame] | 28 | */ |
Tomek Jaroszyk | b8dc1a1 | 2020-04-30 15:14:39 +0200 | [diff] [blame] | 29 | def installMon(master, target="I@ceph:mon", extra_tgt='') { |
| 30 | def salt = new Salt() |
| 31 | |
| 32 | salt.enforceState([saltId: master, target: "$target $extra_tgt", state: 'salt.minion.grains']) |
| 33 | |
| 34 | // TODO: can we re-add cmn01 with proper keyrings? |
| 35 | // generate keyrings |
| 36 | if(salt.testTarget(master, "( I@ceph:mon:keyring:mon or I@ceph:common:keyring:admin ) $extra_tgt")) { |
| 37 | salt.enforceState([saltId: master, target: "( I@ceph:mon:keyring:mon or I@ceph:common:keyring:admin ) $extra_tgt", state: 'ceph.mon']) |
| 38 | salt.runSaltProcessStep(master, "I@ceph:mon $extra_tgt", 'saltutil.sync_grains') |
| 39 | salt.runSaltProcessStep(master, "( I@ceph:mon:keyring:mon or I@ceph:common:keyring:admin ) $extra_tgt", 'mine.update') |
| 40 | |
| 41 | // on target nodes mine is used to get pillar from 'ceph:common:keyring:admin' via grain.items |
| 42 | // we need to refresh all pillar/grains to make data sharing work correctly |
| 43 | salt.fullRefresh(master, "( I@ceph:mon:keyring:mon or I@ceph:common:keyring:admin ) $extra_tgt") |
| 44 | |
| 45 | sleep(5) |
| 46 | } |
| 47 | // install Ceph Mons |
| 48 | salt.enforceState([saltId: master, target: "I@ceph:mon $extra_tgt", state: 'ceph.mon']) |
| 49 | salt.enforceStateWithTest([saltId: master, target: "I@ceph:mgr $extra_tgt", state: 'ceph.mgr']) |
| 50 | |
| 51 | // update config |
| 52 | salt.enforceState([saltId: master, target: "I@ceph:common $extra_tgt", state: 'ceph.common']) |
| 53 | } |
| 54 | |
| 55 | /** |
| 56 | * Install and configure osd daemons on target |
| 57 | * |
| 58 | * @param master Salt connection object |
| 59 | * @param target Target specification, compliance to compound matcher in salt |
| 60 | * @param extra_tgt Extra targets for compound |
| 61 | */ |
| 62 | def installOsd(master, target="I@ceph:osd", setup=true, extra_tgt='') { |
| 63 | def salt = new Salt() |
| 64 | def orchestrate = new Orchestrate() |
| 65 | |
| 66 | // install Ceph OSDs |
| 67 | salt.enforceState([saltId: master, target: target, state: ['linux.storage','ceph.osd']]) |
| 68 | salt.runSaltProcessStep(master, "I@ceph:osd $extra_tgt", 'saltutil.sync_grains') |
| 69 | salt.enforceState([saltId: master, target: target, state: 'ceph.osd.custom']) |
| 70 | salt.runSaltProcessStep(master, "I@ceph:osd $extra_tgt", 'saltutil.sync_grains') |
| 71 | salt.runSaltProcessStep(master, "I@ceph:osd $extra_tgt", 'mine.update') |
| 72 | |
| 73 | // setup pools, keyrings and maybe crush |
| 74 | if(salt.testTarget(master, "I@ceph:setup $extra_tgt") && setup) { |
| 75 | orchestrate.installBackup(master, 'ceph') |
| 76 | salt.enforceState([saltId: master, target: "I@ceph:setup $extra_tgt", state: 'ceph.setup']) |
| 77 | } |
| 78 | } |
| 79 | |
| 80 | /** |
| 81 | * Install and configure rgw service on target |
| 82 | * |
| 83 | * @param master Salt connection object |
| 84 | * @param target Target specification, compliance to compound matcher in salt |
| 85 | * @param extra_tgt Extra targets for compound |
| 86 | */ |
| 87 | def installRgw(master, target="I@ceph:radosgw", extra_tgt='') { |
| 88 | def salt = new Salt() |
| 89 | |
| 90 | if(salt.testTarget(master, "I@ceph:radosgw $extra_tgt")) { |
| 91 | salt.fullRefresh(master, "I@ceph:radosgw $extra_tgt") |
| 92 | salt.enforceState([saltId: master, target: "I@ceph:radosgw $extra_tgt", state: ['keepalived', 'haproxy', 'ceph.radosgw']]) |
| 93 | } |
| 94 | } |
| 95 | |
| 96 | /** |
| 97 | * Remove rgw daemons from target |
| 98 | * |
| 99 | * @param master Salt connection object |
| 100 | * @param target Target specification, compliance to compound matcher in salt |
| 101 | * @param extra_tgt Extra targets for compound |
| 102 | */ |
| 103 | def removeRgw(master, target, extra_tgt='') { |
| 104 | def salt = new Salt() |
| 105 | |
| 106 | // TODO needs to be reviewed |
| 107 | salt.fullRefresh(master, "I@ceph:radosgw $extra_tgt") |
| 108 | salt.enforceState([saltId: master, target: "I@ceph:radosgw $extra_tgt", state: ['keepalived', 'haproxy', 'ceph.radosgw']]) |
| 109 | } |
| 110 | |
| 111 | /** |
| 112 | * Remove osd daemons from target |
| 113 | * |
| 114 | * @param master Salt connection object |
| 115 | * @param target Target specification, compliance to compound matcher in salt |
| 116 | * @param osds List of osd to remove |
| 117 | * @param safeRemove Wait for data rebalance before remove drive |
| 118 | * @param target Target specification, compliance to compound matcher in salt |
| 119 | */ |
| 120 | def removeOsd(master, target, osds, flags, safeRemove=true, wipeDisks=false) { |
| 121 | def common = new Common() |
| 122 | def salt = new Salt() |
| 123 | |
| 124 | // systemctl stop ceph-osd@0 && ceph osd purge 0 --yes-i-really-mean-it && umount /dev/vdc1; test -b /dev/vdc1 && dd if=/dev/zero of=/dev/vdc1 bs=1M; test -b /dev/vdc2 && dd if=/dev/zero of=/dev/vdc2 bs=1M count=100; sgdisk -d1 -d2 /dev/vdc; partprobe |
| 125 | if(osds.isEmpty()) { |
| 126 | common.warningMsg('List of OSDs was empty. No OSD is removed from cluster') |
| 127 | return |
| 128 | } |
| 129 | |
| 130 | // `ceph osd out <id> <id>` |
| 131 | cmdRun(master, 'ceph osd out ' + osds.join(' '), true, true) |
| 132 | |
| 133 | if(safeRemove) { |
| 134 | waitForHealthy(master, flags) |
| 135 | } |
| 136 | |
| 137 | for(osd in osds) { |
| 138 | salt.runSaltProcessStep(master, target, 'service.stop', "ceph-osd@$osd", null, true) |
| 139 | cmdRun(master, "ceph osd purge $osd --yes-i-really-mean-it", true, true) |
| 140 | } |
| 141 | |
| 142 | for(osd in osds) { |
| 143 | def lvm_enabled = getPillar(master, target, "ceph:osd:lvm_enabled") |
| 144 | if(lvm_enabled) { |
| 145 | // ceph-volume lvm zap --osd-id 1 --osd-fsid 55BD4219-16A7-4037-BC20-0F158EFCC83D --destroy |
| 146 | def output = cmdRunOnTarget(master, target, "ceph-volume lvm zap --osd-id $osd --destroy >/dev/null && echo 'zaped'", false) |
| 147 | if(output == 'zaped') { continue } |
| 148 | } |
| 149 | |
| 150 | common.infoMsg("Removing legacy osd.") |
| 151 | def journal_partition = "" |
| 152 | def block_db_partition = "" |
| 153 | def block_wal_partition = "" |
| 154 | def block_partition = "" |
| 155 | def data_partition = "" |
| 156 | def dataDir = "/var/lib/ceph/osd/ceph-$osd" |
| 157 | journal_partition = cmdRunOnTarget(master, target, |
| 158 | "test -f $dataDir/journal_uuid && readlink -f /dev/disk/by-partuuid/`cat $dataDir/journal_uuid`", false) |
| 159 | block_db_partition = cmdRunOnTarget(master, target, |
| 160 | "test -f $dataDir/block.db_uuid && readlink -f /dev/disk/by-partuuid/`cat $dataDir/block.db_uuid`", false) |
| 161 | block_wal_partition = cmdRunOnTarget(master, target, |
| 162 | "test -f $dataDir/block.wal_uuid && readlink -f /dev/disk/by-partuuid/`cat $dataDir/block.wal_uuid`", false) |
| 163 | block_partition = cmdRunOnTarget(master, target, |
| 164 | "test -f $dataDir/block_uuid && readlink -f /dev/disk/by-partuuid/`cat $dataDir/block_uuid`", false) |
| 165 | data_partition = cmdRunOnTarget(master, target, |
| 166 | "test -f $dataDir/fsid && readlink -f /dev/disk/by-partuuid/`cat $dataDir/fsid`", false) |
| 167 | |
| 168 | try { |
| 169 | if(journal_partition.trim()) { removePartition(master, target, journal_partition) } |
| 170 | if(block_db_partition.trim()) { removePartition(master, target, block_db_partition) } |
| 171 | if(block_wal_partition.trim()) { removePartition(master, target, block_wal_partition) } |
| 172 | if(block_partition.trim()) { removePartition(master, target, block_partition, 'block', wipeDisks) } |
| 173 | if(data_partition.trim()) { removePartition(master, target, data_partition, 'data', wipeDisks) } |
| 174 | else { common.warningMsg("Can't find data partition for osd.$osd") } |
| 175 | } |
| 176 | catch(Exception e) { |
| 177 | // report but continue as problem on one osd could be sorted out after |
| 178 | common.errorMsg("Found some issue during cleaning partition for osd.$osd on $target") |
| 179 | common.errorMsg(e) |
| 180 | currentBuild.result = 'FAILURE' |
| 181 | } |
| 182 | |
| 183 | cmdRunOnTarget(master, target, "partprobe", false) |
| 184 | } |
| 185 | } |
| 186 | |
| 187 | /** |
| 188 | * Update montoring for target hosts |
| 189 | * |
| 190 | * @param master Salt connection object |
| 191 | * @param target Target specification, compliance to compound matcher in salt |
| 192 | * @param extra_tgt Extra targets for compound |
| 193 | */ |
| 194 | def updateMonitoring(master, target="I@ceph:common", extra_tgt='') { |
| 195 | def common = new Common() |
| 196 | def salt = new Salt() |
| 197 | |
| 198 | def prometheusNodes = salt.getMinions(master, "I@prometheus:server $extra_tgt") |
| 199 | if(!prometheusNodes.isEmpty()) { |
| 200 | //Collect Grains |
| 201 | salt.enforceState([saltId: master, target: "$target $extra_tgt", state: 'salt.minion.grains']) |
| 202 | salt.runSaltProcessStep(master, "$target $extra_tgt", 'saltutil.refresh_modules') |
| 203 | salt.runSaltProcessStep(master, "$target $extra_tgt", 'mine.update') |
| 204 | sleep(5) |
| 205 | salt.enforceState([saltId: master, target: "$target $extra_tgt", state: ['fluentd', 'telegraf', 'prometheus']]) |
| 206 | salt.enforceState([saltId: master, target: "I@prometheus:server $extra_tgt", state: 'prometheus']) |
| 207 | } |
| 208 | else { |
| 209 | common.infoMsg('No Prometheus nodes in cluster. Nothing to do.') |
| 210 | } |
| 211 | } |
| 212 | |
| 213 | def connectCeph(master, extra_tgt='') { |
| 214 | new Common().infoMsg("This method was renamed. Use method connectOS insead.") |
| 215 | connectOS(master, extra_tgt) |
| 216 | } |
| 217 | |
| 218 | /** |
| 219 | * Enforce configuration and connect OpenStack clients |
| 220 | * |
| 221 | * @param master Salt connection object |
| 222 | * @param extra_tgt Extra targets for compound |
| 223 | */ |
| 224 | def connectOS(master, extra_tgt='') { |
| 225 | def salt = new Salt() |
| 226 | |
| 227 | // setup Keystone service and endpoints for swift or / and S3 |
| 228 | salt.enforceStateWithTest([saltId: master, target: "I@keystone:client $extra_tgt", state: 'keystone.client']) |
| 229 | |
| 230 | // connect Ceph to the env |
| 231 | if(salt.testTarget(master, "I@ceph:common and I@glance:server $extra_tgt")) { |
| 232 | salt.enforceState([saltId: master, target: "I@ceph:common and I@glance:server $extra_tgt", state: ['glance']]) |
| 233 | salt.runSaltProcessStep(master, "I@ceph:common and I@glance:server $extra_tgt", 'service.restart', ['glance-api']) |
| 234 | } |
| 235 | if(salt.testTarget(master, "I@ceph:common and I@cinder:controller $extra_tgt")) { |
| 236 | salt.enforceState([saltId: master, target: "I@ceph:common and I@cinder:controller $extra_tgt", state: ['cinder']]) |
| 237 | salt.runSaltProcessStep(master, "I@ceph:common and I@cinder:controller $extra_tgt", 'service.restart', ['cinder-volume']) |
| 238 | } |
| 239 | if(salt.testTarget(master, "I@ceph:common and I@nova:compute $extra_tgt")) { |
| 240 | salt.enforceState([saltId: master, target: "I@ceph:common and I@nova:compute $extra_tgt", state: ['nova']]) |
| 241 | salt.runSaltProcessStep(master, "I@ceph:common and I@nova:compute $extra_tgt", 'service.restart', ['nova-compute']) |
| 242 | } |
| 243 | if(salt.testTarget(master, "I@ceph:common and I@gnocchi:server $extra_tgt")) { |
| 244 | salt.enforceState([saltId: master, target: "I@ceph:common and I@gnocchi:server:role:primary $extra_tgt", state: 'gnocchi.server']) |
| 245 | salt.enforceState([saltId: master, target: "I@ceph:common and I@gnocchi:server $extra_tgt", state: 'gnocchi.server']) |
| 246 | } |
| 247 | } |
| 248 | |
| 249 | /** |
| 250 | * Remove vm from VCP |
| 251 | * |
| 252 | * @param master Salt connection object |
| 253 | * @param target Target specification, compliance to compound matcher in salt |
| 254 | */ |
| 255 | def removeVm(master, target) { |
| 256 | def common = new Common() |
| 257 | def salt = new Salt() |
| 258 | |
| 259 | def fqdn = getGrain(master, target, 'id') |
| 260 | def hostname = salt.stripDomainName(fqdn) |
| 261 | def hypervisor = getPillar(master, "I@salt:control", "salt:control:cluster:internal:node:$hostname:provider") |
| 262 | |
| 263 | removeSalt(master, target) |
| 264 | |
| 265 | if(hypervisor?.trim()) { |
| 266 | cmdRunOnTarget(master, hypervisor, "virsh destroy $fqdn") |
| 267 | cmdRunOnTarget(master, hypervisor, "virsh undefine $fqdn") |
| 268 | } |
| 269 | else { |
Denis Egorenko | d45fda5 | 2021-03-10 17:02:10 +0400 | [diff] [blame] | 270 | common.errorMsg("There is no provider in pillar for $hostname") |
Tomek Jaroszyk | b8dc1a1 | 2020-04-30 15:14:39 +0200 | [diff] [blame] | 271 | } |
| 272 | } |
| 273 | |
| 274 | /** |
| 275 | * Stop target salt minion, remove its key on master and definition in reclass |
| 276 | * |
| 277 | * @param master Salt connection object |
| 278 | * @param target Target specification, compliance to compound matcher in salt |
| 279 | */ |
| 280 | def removeSalt(master, target) { |
| 281 | def common = new Common() |
| 282 | |
| 283 | def fqdn = getGrain(master, target, 'id') |
| 284 | try { |
| 285 | cmdRunOnTarget(master, 'I@salt:master', "salt-key --include-accepted -r $fqdn -y") |
| 286 | } |
| 287 | catch(Exception e) { |
| 288 | common.warningMsg(e) |
| 289 | } |
| 290 | } |
| 291 | |
| 292 | def deleteKeyrings(master, target, extra_tgt='') { |
| 293 | def host = getGrain(master, target, 'host') |
Tomek Jaroszyk | 262d688 | 2021-02-05 16:58:24 +0100 | [diff] [blame] | 294 | def keys = cmdRun(master, "ceph auth list 2>/dev/null | grep $host", false).tokenize('\n') |
| 295 | if(keys.isEmpty()) { |
Tomek Jaroszyk | b8dc1a1 | 2020-04-30 15:14:39 +0200 | [diff] [blame] | 296 | new Common().warningMsg("Nothing to do. There is no keyring for $host") |
| 297 | } |
| 298 | for(key in keys) { |
| 299 | cmdRun(master, "ceph auth del $key") |
| 300 | } |
| 301 | } |
| 302 | |
| 303 | def generateMapping(pgmap,map) { |
| 304 | def pg_new |
| 305 | def pg_old |
| 306 | for(pg in pgmap) { |
| 307 | pg_new = pg["up"].minus(pg["acting"]) |
| 308 | pg_old = pg["acting"].minus(pg["up"]) |
Tomek Jaroszyk | e6c58cc | 2022-02-02 12:59:26 +0100 | [diff] [blame] | 309 | if(pg_old.isEmpty()) { |
| 310 | // use standard rebalancing to just fill gaps with new osds |
| 311 | unsetFlags('norebalance') |
| 312 | } |
| 313 | else { |
| 314 | for(int i = 0; i < pg_new.size(); i++) { |
| 315 | def string = "ceph osd pg-upmap-items ${pg["pgid"]} ${pg_new[i]} ${pg_old[i]}" |
| 316 | map.add(string) |
| 317 | } |
Tomek Jaroszyk | b8dc1a1 | 2020-04-30 15:14:39 +0200 | [diff] [blame] | 318 | } |
| 319 | } |
| 320 | } |
| 321 | |
| 322 | /** |
| 323 | * Run command on the first of avaliable ceph monitors |
| 324 | * |
| 325 | * @param master Salt connection object |
| 326 | * @param cmd Command to run |
| 327 | * @param checkResponse Check response of command. (optional, default true) |
| 328 | * @param output Print output (optional, default false) |
| 329 | */ |
| 330 | def cmdRun(master, cmd, checkResponse=true, output=false) { |
| 331 | def salt = new Salt() |
| 332 | def cmn01 = salt.getFirstMinion(master, "I@ceph:mon") |
| 333 | return salt.cmdRun(master, cmn01, cmd, checkResponse, null, output)['return'][0][cmn01] |
| 334 | } |
| 335 | |
| 336 | /** |
| 337 | * Run command on target host |
| 338 | * |
| 339 | * @param master Salt connection object |
| 340 | * @param target Target specification, compliance to compound matcher in salt |
| 341 | * @param cmd Command to run |
| 342 | * @param checkResponse Check response of command. (optional, default true) |
| 343 | * @param output Print output (optional, default false) |
| 344 | */ |
| 345 | def cmdRunOnTarget(master, target, cmd, checkResponse=true, output=false) { |
| 346 | def salt = new Salt() |
| 347 | return salt.cmdRun(master, target, cmd, checkResponse, null, output)['return'][0].values()[0] |
| 348 | } |
| 349 | |
| 350 | /** |
| 351 | * Ceph refresh pillars and get one for first host |
| 352 | * |
| 353 | * @param master Salt connection object |
| 354 | * @param target Target specification, compliance to compound matcher in salt |
| 355 | * @param pillar Pillar to obtain |
| 356 | */ |
| 357 | def getPillar(master, target, pillar) { |
| 358 | def common = new Common() |
| 359 | def salt = new Salt() |
| 360 | try { |
| 361 | return salt.getPillar(master, target, pillar)['return'][0].values()[0] |
| 362 | } |
| 363 | catch(Exception e) { |
| 364 | common.warningMsg('There was no pillar for the target.') |
| 365 | } |
| 366 | } |
| 367 | |
| 368 | /** |
| 369 | * Ceph refresh grains and get one for first host |
| 370 | * |
| 371 | * @param master Salt connection object |
| 372 | * @param target Target specification, compliance to compound matcher in salt |
| 373 | * @param grain Grain to obtain |
| 374 | */ |
| 375 | def getGrain(master, target, grain) { |
| 376 | def common = new Common() |
| 377 | def salt = new Salt() |
| 378 | try { |
| 379 | return salt.getGrain(master, target, grain)['return'][0].values()[0].values()[0] |
| 380 | } |
| 381 | catch(Exception e) { |
| 382 | common.warningMsg('There was no grain for the target.') |
| 383 | } |
| 384 | } |
| 385 | |
| 386 | /** |
| 387 | * Set flags |
| 388 | * |
| 389 | * @param master Salt connection object |
| 390 | * @param flags Collection of flags to set |
| 391 | */ |
| 392 | def setFlags(master, flags) { |
| 393 | if(flags instanceof String) { flags = [flags] } |
| 394 | for(flag in flags) { |
| 395 | cmdRun(master, 'ceph osd set ' + flag) |
| 396 | } |
| 397 | } |
| 398 | |
| 399 | /** |
| 400 | * Unset flags |
| 401 | * |
| 402 | * @param master Salt connection object |
| 403 | * @param flags Collection of flags to unset (optional) |
| 404 | */ |
| 405 | def unsetFlags(master, flags=[]) { |
| 406 | if(flags instanceof String) { flags = [flags] } |
| 407 | for(flag in flags) { |
| 408 | cmdRun(master, 'ceph osd unset ' + flag) |
| 409 | } |
| 410 | } |
| 411 | |
| 412 | /** |
| 413 | * Wait for healthy cluster while ignoring flags which have been set |
| 414 | * |
| 415 | * @param master Salt connection object |
| 416 | * @param attempts Attempts before it pause execution (optional, default 300) |
| 417 | */ |
| 418 | def waitForHealthy(master, flags, attempts=300) { |
| 419 | def common = new Common() |
| 420 | |
| 421 | def count = 0 |
Tomek Jaroszyk | b8dc1a1 | 2020-04-30 15:14:39 +0200 | [diff] [blame] | 422 | def health = '' |
| 423 | |
Tomek Jaroszyk | 85a7548 | 2021-08-19 02:43:06 +0200 | [diff] [blame] | 424 | // warning that can appeared during operation while are unrelated to data safety |
| 425 | def acceptableWarnings = [ |
| 426 | 'AUTH_INSECURE_GLOBAL_ID_RECLAIM', |
| 427 | 'AUTH_INSECURE_GLOBAL_ID_RECLAIM_ALLOWED', |
| 428 | 'MON_MSGR2_NOT_ENABLED' |
| 429 | ] |
Tomek Jaroszyk | b8dc1a1 | 2020-04-30 15:14:39 +0200 | [diff] [blame] | 430 | // wait for current ops will be reflected in status |
| 431 | sleep(5) |
| 432 | |
| 433 | while(count++ < attempts) { |
Tomek Jaroszyk | 85a7548 | 2021-08-19 02:43:06 +0200 | [diff] [blame] | 434 | health = cmdRun(master, 'ceph health -f json', false) |
| 435 | health = common.parseJSON(health) |
| 436 | |
| 437 | if(health['status'] == 'HEALTH_OK') { return } |
| 438 | if(health['checks'].containsKey('OSDMAP_FLAGS')) { |
| 439 | def unexpectedFlags = health['checks']['OSDMAP_FLAGS']['summary']['message'].tokenize(' ').getAt(0)?.tokenize(',') |
Tomek Jaroszyk | b8dc1a1 | 2020-04-30 15:14:39 +0200 | [diff] [blame] | 440 | unexpectedFlags.removeAll(flags) |
Tomek Jaroszyk | 85a7548 | 2021-08-19 02:43:06 +0200 | [diff] [blame] | 441 | if(unexpectedFlags.isEmpty()) { |
| 442 | health['checks'].remove('OSDMAP_FLAGS') |
| 443 | } |
Tomek Jaroszyk | b8dc1a1 | 2020-04-30 15:14:39 +0200 | [diff] [blame] | 444 | } |
Tomek Jaroszyk | 85a7548 | 2021-08-19 02:43:06 +0200 | [diff] [blame] | 445 | |
| 446 | // ignore acceptable warnings |
| 447 | for(w in acceptableWarnings) { |
| 448 | if(health['checks'].containsKey(w)) { |
| 449 | health['checks'].remove(w) |
| 450 | } |
| 451 | } |
| 452 | |
| 453 | if(health['checks'].isEmpty()) { return } |
| 454 | |
| 455 | common.warningMsg("Ceph cluster is still unhealthy: " + health['status']) |
| 456 | for(check in health['checks']) { |
| 457 | common.warningMsg(check.value['summary']['message']) |
| 458 | } |
Tomek Jaroszyk | b8dc1a1 | 2020-04-30 15:14:39 +0200 | [diff] [blame] | 459 | sleep(10) |
| 460 | } |
| 461 | // TODO: MissingMethodException |
| 462 | input message: "After ${count} attempts cluster is still unhealthy." |
| 463 | //throw new RuntimeException("After ${count} attempts cluster is still unhealthy. Can't proceed") |
| 464 | } |
| 465 | def waitForHealthy(master, String host, flags, attempts=300) { |
| 466 | new Common().warningMsg('This method will be deprecated.') |
| 467 | waitForHealthy(master, flags, attempts) |
| 468 | } |
| 469 | |
| 470 | /** |
| 471 | * Remove unused orphan partition after some osds |
| 472 | * |
| 473 | * @param master Salt connection object |
| 474 | * @param target Target specification, compliance to compound matcher in salt |
| 475 | * @param wipePartitions Wipe each found partitions completely (optional, defaul false) |
| 476 | */ |
| 477 | def removeOrphans(master, target, wipePartitions=false) { |
| 478 | def common = new Common() |
| 479 | def salt = new Salt() |
| 480 | |
| 481 | def orphans = [] |
| 482 | // TODO: ceph-disk is avaliable only in luminous |
| 483 | def disks = cmdRunOnTarget(master, target, "ceph-disk list --format json 2>/dev/null",false) |
| 484 | disks = "{\"disks\":$disks}" // common.parseJSON() can't parse a list of maps |
| 485 | disks = common.parseJSON(disks)['disks'] |
| 486 | for(disk in disks) { |
| 487 | for(partition in disk.get('partitions')) { |
| 488 | def orphan = false |
| 489 | if(partition.get('type') == 'block.db' && !partition.containsKey('block.db_for')) { orphan = true } |
| 490 | else if(partition.get('type') == 'block' && !partition.containsKey('block_for')) { orphan = true } |
| 491 | else if(partition.get('type') == 'data' && !partition.get('state') == 'active') { orphan = true } |
| 492 | // TODO: test for the rest of types |
| 493 | |
| 494 | if(orphan) { |
| 495 | if(partition.get('path')) { |
| 496 | removePartition(master, target, partition['path'], partition['type'], wipePartitions) |
| 497 | } |
| 498 | else { |
| 499 | common.warningMsg("Found orphan partition on $target but failed to remove it.") |
Ivan Berezovskiy | 1443646 | 2019-11-05 17:42:09 +0400 | [diff] [blame] | 500 | } |
| 501 | } |
| 502 | } |
Ivan Berezovskiy | 1443646 | 2019-11-05 17:42:09 +0400 | [diff] [blame] | 503 | } |
Tomek Jaroszyk | b8dc1a1 | 2020-04-30 15:14:39 +0200 | [diff] [blame] | 504 | cmdRunOnTarget(master, target, "partprobe", false) |
Ivan Berezovskiy | 1443646 | 2019-11-05 17:42:09 +0400 | [diff] [blame] | 505 | } |
| 506 | |
| 507 | /** |
| 508 | * Ceph remove partition |
| 509 | * |
Tomek Jaroszyk | b8dc1a1 | 2020-04-30 15:14:39 +0200 | [diff] [blame] | 510 | * @param master Salt connection object |
| 511 | * @param target Target specification, compliance to compound matcher in salt |
| 512 | * @param partition Partition to remove on target host |
| 513 | * @param type Type of partition. Some partition need additional steps (optional, default empty string) |
| 514 | * @param fullWipe Fill the entire partition with zeros (optional, default false) |
Ivan Berezovskiy | 1443646 | 2019-11-05 17:42:09 +0400 | [diff] [blame] | 515 | */ |
Tomek Jaroszyk | b8dc1a1 | 2020-04-30 15:14:39 +0200 | [diff] [blame] | 516 | def removePartition(master, target, partition, type='', fullWipe=false) { |
| 517 | def common = new Common() |
| 518 | def salt = new Salt() |
| 519 | |
mjedynski | ffed8f8 | 2019-12-12 20:46:47 +0100 | [diff] [blame] | 520 | def dev = '' |
Tomek Jaroszyk | 3a82bbe | 2020-04-07 11:34:48 +0200 | [diff] [blame] | 521 | def part_id = '' |
Tomek Jaroszyk | b8dc1a1 | 2020-04-30 15:14:39 +0200 | [diff] [blame] | 522 | def partitionID = '' |
| 523 | def disk = '' |
| 524 | def wipeCmd = '' |
| 525 | def lvm_enabled = getPillar(master, target, "ceph:osd:lvm_enabled") |
mjedynski | ffed8f8 | 2019-12-12 20:46:47 +0100 | [diff] [blame] | 526 | |
Tomek Jaroszyk | b8dc1a1 | 2020-04-30 15:14:39 +0200 | [diff] [blame] | 527 | if(!partition?.trim()) { |
| 528 | throw new Exception("Can't proceed without defined partition.") |
| 529 | } |
| 530 | cmdRunOnTarget(master, target, "test -b $partition") |
| 531 | |
| 532 | if(fullWipe) { wipeCmd = "dd if=/dev/zero of=$partition bs=1M 2>/dev/null" } |
| 533 | else { wipeCmd = "dd if=/dev/zero of=$partition bs=1M count=100 2>/dev/null" } |
| 534 | |
| 535 | common.infoMsg("Removing from the cluster $type partition $partition on $target.") |
| 536 | if(type == 'lockbox') { |
| 537 | try { |
| 538 | partition = cmdRunOnTarget(master, target, "lsblk -rp | grep -v mapper | grep $partition", false) |
| 539 | cmdRunOnTarget(master, target, "umount $partition") |
| 540 | } |
| 541 | catch (Exception e) { |
| 542 | common.warningMsg(e) |
Ivan Berezovskiy | 1443646 | 2019-11-05 17:42:09 +0400 | [diff] [blame] | 543 | } |
| 544 | } |
Tomek Jaroszyk | b8dc1a1 | 2020-04-30 15:14:39 +0200 | [diff] [blame] | 545 | else if(type == 'data') { |
| 546 | cmdRunOnTarget(master, target, "umount $partition 2>/dev/null", false) |
| 547 | cmdRunOnTarget(master, target, wipeCmd, false) |
Ivan Berezovskiy | 1443646 | 2019-11-05 17:42:09 +0400 | [diff] [blame] | 548 | } |
Tomek Jaroszyk | b8dc1a1 | 2020-04-30 15:14:39 +0200 | [diff] [blame] | 549 | else if(type == 'block' || fullWipe) { |
| 550 | cmdRunOnTarget(master, target, wipeCmd, false) |
| 551 | } |
| 552 | try { |
| 553 | partitionID = cmdRunOnTarget(master, target, "cat /sys/dev/block/`lsblk $partition -no MAJ:MIN | xargs`/partition", false) |
| 554 | disk = cmdRunOnTarget(master, target, "lsblk $partition -no pkname", false) |
| 555 | } |
| 556 | catch (Exception e) { |
| 557 | common.errorMsg("Couldn't get disk name or partition number for $partition") |
| 558 | common.warningMsg(e) |
| 559 | } |
| 560 | try { |
| 561 | cmdRunOnTarget(master, target, "sgdisk -d$partitionID /dev/$disk", true, true) |
| 562 | } |
| 563 | catch (Exception e) { |
| 564 | common.warningMsg("Did not found any device to be wiped.") |
| 565 | common.warningMsg(e) |
| 566 | } |
| 567 | // try to remove partition table if disk have no partitions left - required by ceph-volume |
| 568 | cmdRunOnTarget(master, target, "partprobe -d -s /dev/$disk | grep partitions\$ && sgdisk -Z /dev/$disk", false, true) |
Ivan Berezovskiy | 1443646 | 2019-11-05 17:42:09 +0400 | [diff] [blame] | 569 | } |