blob: ee4ef38c7401f9e8bd6a2bb589d26e17a57cc64a [file] [log] [blame]
Jiri Broulik2c00f4c2017-10-26 13:23:11 +02001/**
2 *
3 * Replace failed disk with a new disk
4 *
5 * Requred parameters:
6 * SALT_MASTER_URL URL of Salt master
7 * SALT_MASTER_CREDENTIALS Credentials to the Salt API
8 *
9 * HOST Host (minion id) to be removed
10 * ADMIN_HOST Host (minion id) with admin keyring and /etc/crushmap file present
11 * OSD Failed OSD ids to be replaced (comma-separated list - 1,2,3)
12 * DEVICE Comma separated list of failed devices that will be replaced at HOST (/dev/sdb,/dev/sdc)
13 * JOURNAL_OR_BLOCKDB_PARTITION Comma separated list of partitions where journal or block_db for the failed devices on this HOST were stored (/dev/sdh2,/dev/sdh3)
Jiri Broulik2c00f4c2017-10-26 13:23:11 +020014 * CLUSTER_FLAGS Comma separated list of tags to apply to cluster
15 * WAIT_FOR_HEALTHY Wait for cluster rebalance before stoping daemons
16 *
17 */
18
19common = new com.mirantis.mk.Common()
20salt = new com.mirantis.mk.Salt()
21def python = new com.mirantis.mk.Python()
22
23def pepperEnv = "pepperEnv"
24def flags = CLUSTER_FLAGS.tokenize(',')
25def osds = OSD.tokenize(',')
26def devices = DEVICE.tokenize(',')
27def journals_blockdbs = JOURNAL_OR_BLOCKDB_PARTITION.tokenize(',')
28
29
30def runCephCommand(master, target, cmd) {
31 return salt.cmdRun(master, target, cmd)
32}
33
34node("python") {
35
36 // create connection to salt master
37 python.setupPepperVirtualenv(pepperEnv, SALT_MASTER_URL, SALT_MASTER_CREDENTIALS)
38
39 if (flags.size() > 0) {
40 stage('Set cluster flags') {
41 for (flag in flags) {
42 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd set ' + flag)
43 }
44 }
45 }
46
47 def osd_ids = []
48
49 print("osds:")
50 print(osds)
51
52 // get list of osd disks of the host
53 def ceph_disks = salt.getGrain(pepperEnv, HOST, 'ceph')['return'][0].values()[0].values()[0]['ceph_disk']
54 common.prettyPrint(ceph_disks)
55
56 for (i in ceph_disks) {
57 def osd_id = i.getKey().toString()
58 if (osd_id in osds || OSD == '*') {
59 osd_ids.add('osd.' + osd_id)
60 print("Will delete " + osd_id)
61 } else {
62 print("Skipping " + osd_id)
63 }
64 }
65
66 // `ceph osd out <id> <id>`
67 stage('Set OSDs out') {
Jiri Broulikdc87d722017-11-03 15:43:22 +010068 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd out ' + osd_ids.join(' '))
Jiri Broulik2c00f4c2017-10-26 13:23:11 +020069 }
70
71 // wait for healthy cluster
Jiri Broulik99887c82017-10-31 09:27:52 +010072 if (WAIT_FOR_HEALTHY.toBoolean() == true) {
Jiri Broulik2c00f4c2017-10-26 13:23:11 +020073 stage('Waiting for healthy cluster') {
Jiri Broulikdc87d722017-11-03 15:43:22 +010074 sleep(5)
Jiri Broulik2c00f4c2017-10-26 13:23:11 +020075 while (true) {
76 def health = runCephCommand(pepperEnv, ADMIN_HOST, 'ceph health')['return'][0].values()[0]
Jiri Broulik99887c82017-10-31 09:27:52 +010077 if (health.contains('HEALTH_OK')) {
Jiri Broulik2c00f4c2017-10-26 13:23:11 +020078 common.infoMsg('Cluster is healthy')
79 break;
80 }
Jiri Broulik99887c82017-10-31 09:27:52 +010081 sleep(10)
Jiri Broulik2c00f4c2017-10-26 13:23:11 +020082 }
83 }
84 }
85
86 // stop osd daemons
87 stage('Stop OSD daemons') {
88 for (i in osd_ids) {
89 salt.runSaltProcessStep(pepperEnv, HOST, 'service.stop', ['ceph-osd@' + i.replaceAll('osd.', '')], null, true)
90 }
91 }
Jiri Broulikdc87d722017-11-03 15:43:22 +010092 /*
Jiri Broulik2c00f4c2017-10-26 13:23:11 +020093 // `ceph osd crush remove osd.2`
94 stage('Remove OSDs from CRUSH') {
95 for (i in osd_ids) {
96 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd crush remove ' + i)
97 }
98 }
99
100 // wait for pgs to rebalance
101 if (WAIT_FOR_PG_REBALANCE.toBoolean() == true) {
102 stage('Waiting for pgs to rebalance') {
103 while (true) {
104 def status = runCephCommand(pepperEnv, ADMIN_HOST, 'ceph -s')['return'][0].values()[0]
105 if (!status.contains('degraded')) {
106 common.infoMsg('PGs rebalanced')
107 break;
108 }
Jiri Broulik99887c82017-10-31 09:27:52 +0100109 sleep(10)
Jiri Broulik2c00f4c2017-10-26 13:23:11 +0200110 }
111 }
112 }
Jiri Broulikdc87d722017-11-03 15:43:22 +0100113 */
Jiri Broulik2c00f4c2017-10-26 13:23:11 +0200114 // remove keyring `ceph auth del osd.3`
115 stage('Remove OSD keyrings from auth') {
116 for (i in osd_ids) {
117 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph auth del ' + i)
118 }
119 }
120
121 // remove osd `ceph osd rm osd.3`
122 stage('Remove OSDs') {
123 for (i in osd_ids) {
124 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd rm ' + i)
125 }
126 }
127
Jiri Broulik2c00f4c2017-10-26 13:23:11 +0200128 // umount `umount /dev/sdi1`
129 stage('Umount devices') {
130 for (dev in devices) {
131 runCephCommand(pepperEnv, HOST, 'umount ' + dev + '1')
132 }
133 }
134
135 // zap disks `ceph-disk zap /dev/sdi`
136 stage('Zap devices') {
137 for (dev in devices) {
138 runCephCommand(pepperEnv, HOST, 'ceph-disk zap ' + dev)
139 }
140 }
141
142 // remove journal or block_db partition `parted /dev/sdj rm 3`
143 stage('Remove journal / block_db partitions') {
144 for (journal_blockdb in journals_blockdbs) {
145 if (journal_blockdb?.trim()) {
146 // dev = /dev/sdi
147 def dev = journal_blockdb.replaceAll("[0-9]", "")
148 // part_id = 2
149 def part_id = journal_blockdb.substring(journal_blockdb.lastIndexOf("/")+1).replaceAll("[^0-9]", "")
150 runCephCommand(pepperEnv, HOST, "parted ${dev} rm ${part_id}")
151 }
152 }
153 }
154
155 // Deploy failed Ceph OSD
156 stage('Deploy Ceph OSD') {
157 salt.enforceState(pepperEnv, HOST, 'ceph.osd', true)
158 }
159
Jiri Broulikdc87d722017-11-03 15:43:22 +0100160 // remove cluster flags
161 if (flags.size() > 0) {
162 stage('Unset cluster flags') {
163 for (flag in flags) {
164 common.infoMsg('Removing flag ' + flag)
165 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd unset ' + flag)
166 }
167 }
168 }
Jiri Broulik2c00f4c2017-10-26 13:23:11 +0200169
Jiri Broulikdc87d722017-11-03 15:43:22 +0100170 /*
Jiri Broulik2c00f4c2017-10-26 13:23:11 +0200171 if (ENFORCE_CRUSHMAP.toBoolean() == true) {
172
173 // enforce crushmap `crushtool -c /etc/ceph/crushmap -o /etc/ceph/crushmap.compiled; ceph osd setcrushmap -i /etc/ceph/crushmap.compiled`
174 stage('Enforce crushmap') {
175
176 stage('Ask for manual confirmation') {
177 input message: "Are you sure that your ADMIN_HOST has correct /etc/ceph/crushmap file? Click proceed to compile and enforce crushmap."
178 }
179 runCephCommand(pepperEnv, ADMIN_HOST, 'crushtool -c /etc/ceph/crushmap -o /etc/ceph/crushmap.compiled')
180 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd setcrushmap -i /etc/ceph/crushmap.compiled')
181 }
182 }
Jiri Broulikdc87d722017-11-03 15:43:22 +0100183 */
Jiri Broulik2c00f4c2017-10-26 13:23:11 +0200184}