blob: 0a27dc513279a83ec4fe7596704e659dfd5e8409 [file] [log] [blame]
Jiri Broulik2c00f4c2017-10-26 13:23:11 +02001/**
2 *
3 * Replace failed disk with a new disk
4 *
5 * Requred parameters:
Jiri Broulika657d562017-11-28 14:19:32 +01006 * SALT_MASTER_URL URL of Salt master
7 * SALT_MASTER_CREDENTIALS Credentials to the Salt API
Jiri Broulik2c00f4c2017-10-26 13:23:11 +02008 *
Jiri Broulika657d562017-11-28 14:19:32 +01009 * HOST Host (minion id) to be removed
10 * ADMIN_HOST Host (minion id) with admin keyring and /etc/crushmap file present
11 * OSD Failed OSD ids to be replaced (comma-separated list - 1,2,3)
12 * DEVICE Comma separated list of failed devices that will be replaced at HOST (/dev/sdb,/dev/sdc)
13 * JOURNAL_BLOCKDB_BLOCKWAL_PARTITION Comma separated list of partitions where journal or block_db or block_wal for the failed devices on this HOST were stored (/dev/sdh2,/dev/sdh3)
14 * CLUSTER_FLAGS Comma separated list of tags to apply to cluster
15 * WAIT_FOR_HEALTHY Wait for cluster rebalance before stoping daemons
Jiri Broulikeb7b82f2017-11-30 13:55:40 +010016 * DMCRYPT Set to True if replacing osds are/were encrypted
Jiri Broulik2c00f4c2017-10-26 13:23:11 +020017 *
18 */
19
20common = new com.mirantis.mk.Common()
21salt = new com.mirantis.mk.Salt()
22def python = new com.mirantis.mk.Python()
23
24def pepperEnv = "pepperEnv"
25def flags = CLUSTER_FLAGS.tokenize(',')
26def osds = OSD.tokenize(',')
27def devices = DEVICE.tokenize(',')
Jiri Broulika657d562017-11-28 14:19:32 +010028def journals_blockdbs_blockwals = JOURNAL_BLOCKDB_BLOCKWAL_PARTITION.tokenize(',')
Jiri Broulik2c00f4c2017-10-26 13:23:11 +020029
30
31def runCephCommand(master, target, cmd) {
32 return salt.cmdRun(master, target, cmd)
33}
34
Jiri Broulik96c867a2017-11-07 16:14:10 +010035def waitForHealthy(master, count=0, attempts=300) {
36 // wait for healthy cluster
37 while (count<attempts) {
38 def health = runCephCommand(master, ADMIN_HOST, 'ceph health')['return'][0].values()[0]
39 if (health.contains('HEALTH_OK')) {
40 common.infoMsg('Cluster is healthy')
41 break;
42 }
43 count++
44 sleep(10)
45 }
46}
47
Jiri Broulik2c00f4c2017-10-26 13:23:11 +020048node("python") {
49
50 // create connection to salt master
51 python.setupPepperVirtualenv(pepperEnv, SALT_MASTER_URL, SALT_MASTER_CREDENTIALS)
52
Jiri Broulik2c00f4c2017-10-26 13:23:11 +020053 def osd_ids = []
54
Jiri Broulikeb7b82f2017-11-30 13:55:40 +010055 for (osd_id in osds) {
56 osd_ids.add('osd.' + osd_id)
57 print("Will delete " + osd_id)
Jiri Broulik2c00f4c2017-10-26 13:23:11 +020058 }
59
60 // `ceph osd out <id> <id>`
61 stage('Set OSDs out') {
Jiri Broulikdc87d722017-11-03 15:43:22 +010062 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd out ' + osd_ids.join(' '))
Jiri Broulik2c00f4c2017-10-26 13:23:11 +020063 }
64
65 // wait for healthy cluster
Jiri Broulik99887c82017-10-31 09:27:52 +010066 if (WAIT_FOR_HEALTHY.toBoolean() == true) {
Jiri Broulik96c867a2017-11-07 16:14:10 +010067 sleep(5)
68 waitForHealthy(pepperEnv)
Jiri Broulik2c00f4c2017-10-26 13:23:11 +020069 }
70
Jiri Broulikeb7b82f2017-11-30 13:55:40 +010071
72 if (flags.size() > 0) {
73 stage('Set cluster flags') {
74 for (flag in flags) {
75 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd set ' + flag)
76 }
77 }
78 }
79
Jiri Broulik2c00f4c2017-10-26 13:23:11 +020080 // stop osd daemons
81 stage('Stop OSD daemons') {
82 for (i in osd_ids) {
83 salt.runSaltProcessStep(pepperEnv, HOST, 'service.stop', ['ceph-osd@' + i.replaceAll('osd.', '')], null, true)
84 }
85 }
Jiri Broulikdc87d722017-11-03 15:43:22 +010086 /*
Jiri Broulik2c00f4c2017-10-26 13:23:11 +020087 // `ceph osd crush remove osd.2`
88 stage('Remove OSDs from CRUSH') {
89 for (i in osd_ids) {
90 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd crush remove ' + i)
91 }
92 }
93
94 // wait for pgs to rebalance
95 if (WAIT_FOR_PG_REBALANCE.toBoolean() == true) {
96 stage('Waiting for pgs to rebalance') {
97 while (true) {
98 def status = runCephCommand(pepperEnv, ADMIN_HOST, 'ceph -s')['return'][0].values()[0]
99 if (!status.contains('degraded')) {
100 common.infoMsg('PGs rebalanced')
101 break;
102 }
Jiri Broulik99887c82017-10-31 09:27:52 +0100103 sleep(10)
Jiri Broulik2c00f4c2017-10-26 13:23:11 +0200104 }
105 }
106 }
Jiri Broulikdc87d722017-11-03 15:43:22 +0100107 */
Jiri Broulik2c00f4c2017-10-26 13:23:11 +0200108 // remove keyring `ceph auth del osd.3`
109 stage('Remove OSD keyrings from auth') {
110 for (i in osd_ids) {
111 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph auth del ' + i)
112 }
113 }
114
115 // remove osd `ceph osd rm osd.3`
116 stage('Remove OSDs') {
117 for (i in osd_ids) {
118 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd rm ' + i)
119 }
120 }
121
Jiri Broulikeb7b82f2017-11-30 13:55:40 +0100122 if (DMCRYPT.toBoolean() == true) {
Jiri Broulik2c00f4c2017-10-26 13:23:11 +0200123
Jiri Broulikeb7b82f2017-11-30 13:55:40 +0100124 // remove partition tables
125 stage('dd part tables') {
126 for (dev in devices) {
127 runCephCommand(pepperEnv, HOST, "dd if=/dev/zero of=${dev} bs=512 count=1 conv=notrunc")
128 }
Jiri Broulik2c00f4c2017-10-26 13:23:11 +0200129 }
Jiri Broulik2c00f4c2017-10-26 13:23:11 +0200130
Jiri Broulikeb7b82f2017-11-30 13:55:40 +0100131 // remove journal, block_db or block_wal partition `parted /dev/sdj rm 3`
132 stage('Remove journal / block_db / block_wal partitions') {
133 for (partition in journals_blockdbs_blockwals) {
134 if (partition?.trim()) {
135 // dev = /dev/sdi
136 def dev = partition.replaceAll("[0-9]", "")
137 // part_id = 2
138 def part_id = partition.substring(partition.lastIndexOf("/")+1).replaceAll("[^0-9]", "")
139 try {
140 runCephCommand(pepperEnv, HOST, "Ignore | parted ${dev} rm ${part_id}")
141 } catch (Exception e) {
142 common.warningMsg(e)
143 }
144 }
145 }
146 }
147
148 // reboot
149 stage('reboot and wait') {
150 salt.runSaltProcessStep(pepperEnv, HOST, 'system.reboot', null, null, true, 5)
151 salt.minionsReachable(pepperEnv, 'I@salt:master', HOST)
152 sleep(10)
153 }
154
155
156
157 // zap disks `ceph-disk zap /dev/sdi`
158 stage('Zap devices') {
159 for (dev in devices) {
160 try {
161 runCephCommand(pepperEnv, HOST, 'ceph-disk zap ' + dev)
162 } catch (Exception e) {
163 common.warningMsg(e)
164 }
165 runCephCommand(pepperEnv, HOST, 'ceph-disk zap ' + dev)
166 }
167 }
168
169 } else {
170
171 // umount `umount /dev/sdi1`
172 stage('Umount devices') {
173 for (dev in devices) {
174 runCephCommand(pepperEnv, HOST, 'umount ' + dev + '1')
175 }
176 }
177
178 // zap disks `ceph-disk zap /dev/sdi`
179 stage('Zap devices') {
180 for (dev in devices) {
181 runCephCommand(pepperEnv, HOST, 'ceph-disk zap ' + dev)
182 }
183 }
184
185 // remove journal, block_db or block_wal partition `parted /dev/sdj rm 3`
186 stage('Remove journal / block_db / block_wal partitions') {
187 for (partition in journals_blockdbs_blockwals) {
188 if (partition?.trim()) {
189 // dev = /dev/sdi
190 def dev = partition.replaceAll("[0-9]", "")
191 // part_id = 2
192 def part_id = partition.substring(partition.lastIndexOf("/")+1).replaceAll("[^0-9]", "")
193 try {
194 runCephCommand(pepperEnv, HOST, "parted ${dev} rm ${part_id}")
195 } catch (Exception e) {
196 common.warningMsg(e)
197 }
198 }
Jiri Broulik2c00f4c2017-10-26 13:23:11 +0200199 }
200 }
201 }
202
203 // Deploy failed Ceph OSD
204 stage('Deploy Ceph OSD') {
205 salt.enforceState(pepperEnv, HOST, 'ceph.osd', true)
206 }
207
Jiri Broulikdc87d722017-11-03 15:43:22 +0100208 // remove cluster flags
209 if (flags.size() > 0) {
210 stage('Unset cluster flags') {
211 for (flag in flags) {
212 common.infoMsg('Removing flag ' + flag)
213 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd unset ' + flag)
214 }
215 }
216 }
Jiri Broulik2c00f4c2017-10-26 13:23:11 +0200217
Jiri Broulikdc87d722017-11-03 15:43:22 +0100218 /*
Jiri Broulik2c00f4c2017-10-26 13:23:11 +0200219 if (ENFORCE_CRUSHMAP.toBoolean() == true) {
220
221 // enforce crushmap `crushtool -c /etc/ceph/crushmap -o /etc/ceph/crushmap.compiled; ceph osd setcrushmap -i /etc/ceph/crushmap.compiled`
222 stage('Enforce crushmap') {
223
224 stage('Ask for manual confirmation') {
225 input message: "Are you sure that your ADMIN_HOST has correct /etc/ceph/crushmap file? Click proceed to compile and enforce crushmap."
226 }
227 runCephCommand(pepperEnv, ADMIN_HOST, 'crushtool -c /etc/ceph/crushmap -o /etc/ceph/crushmap.compiled')
228 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd setcrushmap -i /etc/ceph/crushmap.compiled')
229 }
230 }
Jiri Broulikdc87d722017-11-03 15:43:22 +0100231 */
Jiri Broulik2c00f4c2017-10-26 13:23:11 +0200232}