blob: 086f9aa00635ab4bf0a151b0b227b6bc37b0da40 [file] [log] [blame]
Jiri Broulik2c00f4c2017-10-26 13:23:11 +02001/**
2 *
3 * Replace failed disk with a new disk
4 *
5 * Requred parameters:
Jiri Broulika657d562017-11-28 14:19:32 +01006 * SALT_MASTER_URL URL of Salt master
7 * SALT_MASTER_CREDENTIALS Credentials to the Salt API
Jiri Broulik2c00f4c2017-10-26 13:23:11 +02008 *
Jiri Broulika657d562017-11-28 14:19:32 +01009 * HOST Host (minion id) to be removed
10 * ADMIN_HOST Host (minion id) with admin keyring and /etc/crushmap file present
11 * OSD Failed OSD ids to be replaced (comma-separated list - 1,2,3)
12 * DEVICE Comma separated list of failed devices that will be replaced at HOST (/dev/sdb,/dev/sdc)
13 * JOURNAL_BLOCKDB_BLOCKWAL_PARTITION Comma separated list of partitions where journal or block_db or block_wal for the failed devices on this HOST were stored (/dev/sdh2,/dev/sdh3)
14 * CLUSTER_FLAGS Comma separated list of tags to apply to cluster
15 * WAIT_FOR_HEALTHY Wait for cluster rebalance before stoping daemons
Jiri Broulik2c00f4c2017-10-26 13:23:11 +020016 *
17 */
18
19common = new com.mirantis.mk.Common()
20salt = new com.mirantis.mk.Salt()
21def python = new com.mirantis.mk.Python()
22
23def pepperEnv = "pepperEnv"
24def flags = CLUSTER_FLAGS.tokenize(',')
25def osds = OSD.tokenize(',')
26def devices = DEVICE.tokenize(',')
Jiri Broulika657d562017-11-28 14:19:32 +010027def journals_blockdbs_blockwals = JOURNAL_BLOCKDB_BLOCKWAL_PARTITION.tokenize(',')
Jiri Broulik2c00f4c2017-10-26 13:23:11 +020028
29
30def runCephCommand(master, target, cmd) {
31 return salt.cmdRun(master, target, cmd)
32}
33
Jiri Broulik96c867a2017-11-07 16:14:10 +010034def waitForHealthy(master, count=0, attempts=300) {
35 // wait for healthy cluster
36 while (count<attempts) {
37 def health = runCephCommand(master, ADMIN_HOST, 'ceph health')['return'][0].values()[0]
38 if (health.contains('HEALTH_OK')) {
39 common.infoMsg('Cluster is healthy')
40 break;
41 }
42 count++
43 sleep(10)
44 }
45}
46
Jiri Broulik2c00f4c2017-10-26 13:23:11 +020047node("python") {
48
49 // create connection to salt master
50 python.setupPepperVirtualenv(pepperEnv, SALT_MASTER_URL, SALT_MASTER_CREDENTIALS)
51
52 if (flags.size() > 0) {
53 stage('Set cluster flags') {
54 for (flag in flags) {
55 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd set ' + flag)
56 }
57 }
58 }
59
60 def osd_ids = []
61
62 print("osds:")
63 print(osds)
64
65 // get list of osd disks of the host
66 def ceph_disks = salt.getGrain(pepperEnv, HOST, 'ceph')['return'][0].values()[0].values()[0]['ceph_disk']
67 common.prettyPrint(ceph_disks)
68
69 for (i in ceph_disks) {
70 def osd_id = i.getKey().toString()
71 if (osd_id in osds || OSD == '*') {
72 osd_ids.add('osd.' + osd_id)
73 print("Will delete " + osd_id)
74 } else {
75 print("Skipping " + osd_id)
76 }
77 }
78
79 // `ceph osd out <id> <id>`
80 stage('Set OSDs out') {
Jiri Broulikdc87d722017-11-03 15:43:22 +010081 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd out ' + osd_ids.join(' '))
Jiri Broulik2c00f4c2017-10-26 13:23:11 +020082 }
83
84 // wait for healthy cluster
Jiri Broulik99887c82017-10-31 09:27:52 +010085 if (WAIT_FOR_HEALTHY.toBoolean() == true) {
Jiri Broulik96c867a2017-11-07 16:14:10 +010086 sleep(5)
87 waitForHealthy(pepperEnv)
Jiri Broulik2c00f4c2017-10-26 13:23:11 +020088 }
89
90 // stop osd daemons
91 stage('Stop OSD daemons') {
92 for (i in osd_ids) {
93 salt.runSaltProcessStep(pepperEnv, HOST, 'service.stop', ['ceph-osd@' + i.replaceAll('osd.', '')], null, true)
94 }
95 }
Jiri Broulikdc87d722017-11-03 15:43:22 +010096 /*
Jiri Broulik2c00f4c2017-10-26 13:23:11 +020097 // `ceph osd crush remove osd.2`
98 stage('Remove OSDs from CRUSH') {
99 for (i in osd_ids) {
100 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd crush remove ' + i)
101 }
102 }
103
104 // wait for pgs to rebalance
105 if (WAIT_FOR_PG_REBALANCE.toBoolean() == true) {
106 stage('Waiting for pgs to rebalance') {
107 while (true) {
108 def status = runCephCommand(pepperEnv, ADMIN_HOST, 'ceph -s')['return'][0].values()[0]
109 if (!status.contains('degraded')) {
110 common.infoMsg('PGs rebalanced')
111 break;
112 }
Jiri Broulik99887c82017-10-31 09:27:52 +0100113 sleep(10)
Jiri Broulik2c00f4c2017-10-26 13:23:11 +0200114 }
115 }
116 }
Jiri Broulikdc87d722017-11-03 15:43:22 +0100117 */
Jiri Broulik2c00f4c2017-10-26 13:23:11 +0200118 // remove keyring `ceph auth del osd.3`
119 stage('Remove OSD keyrings from auth') {
120 for (i in osd_ids) {
121 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph auth del ' + i)
122 }
123 }
124
125 // remove osd `ceph osd rm osd.3`
126 stage('Remove OSDs') {
127 for (i in osd_ids) {
128 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd rm ' + i)
129 }
130 }
131
Jiri Broulik2c00f4c2017-10-26 13:23:11 +0200132 // umount `umount /dev/sdi1`
133 stage('Umount devices') {
134 for (dev in devices) {
135 runCephCommand(pepperEnv, HOST, 'umount ' + dev + '1')
136 }
137 }
138
139 // zap disks `ceph-disk zap /dev/sdi`
140 stage('Zap devices') {
141 for (dev in devices) {
142 runCephCommand(pepperEnv, HOST, 'ceph-disk zap ' + dev)
143 }
144 }
145
Jiri Broulika657d562017-11-28 14:19:32 +0100146 // remove journal, block_db or block_wal partition `parted /dev/sdj rm 3`
147 stage('Remove journal / block_db / block_wal partitions') {
148 for (partition in journals_blockdbs_blockwals) {
149 if (partition?.trim()) {
Jiri Broulik2c00f4c2017-10-26 13:23:11 +0200150 // dev = /dev/sdi
Jiri Broulika657d562017-11-28 14:19:32 +0100151 def dev = partition.replaceAll("[0-9]", "")
Jiri Broulik2c00f4c2017-10-26 13:23:11 +0200152 // part_id = 2
Jiri Broulika657d562017-11-28 14:19:32 +0100153 def part_id = partition.substring(partition.lastIndexOf("/")+1).replaceAll("[^0-9]", "")
Jiri Broulik2c00f4c2017-10-26 13:23:11 +0200154 runCephCommand(pepperEnv, HOST, "parted ${dev} rm ${part_id}")
155 }
156 }
157 }
158
159 // Deploy failed Ceph OSD
160 stage('Deploy Ceph OSD') {
161 salt.enforceState(pepperEnv, HOST, 'ceph.osd', true)
162 }
163
Jiri Broulikdc87d722017-11-03 15:43:22 +0100164 // remove cluster flags
165 if (flags.size() > 0) {
166 stage('Unset cluster flags') {
167 for (flag in flags) {
168 common.infoMsg('Removing flag ' + flag)
169 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd unset ' + flag)
170 }
171 }
172 }
Jiri Broulik2c00f4c2017-10-26 13:23:11 +0200173
Jiri Broulikdc87d722017-11-03 15:43:22 +0100174 /*
Jiri Broulik2c00f4c2017-10-26 13:23:11 +0200175 if (ENFORCE_CRUSHMAP.toBoolean() == true) {
176
177 // enforce crushmap `crushtool -c /etc/ceph/crushmap -o /etc/ceph/crushmap.compiled; ceph osd setcrushmap -i /etc/ceph/crushmap.compiled`
178 stage('Enforce crushmap') {
179
180 stage('Ask for manual confirmation') {
181 input message: "Are you sure that your ADMIN_HOST has correct /etc/ceph/crushmap file? Click proceed to compile and enforce crushmap."
182 }
183 runCephCommand(pepperEnv, ADMIN_HOST, 'crushtool -c /etc/ceph/crushmap -o /etc/ceph/crushmap.compiled')
184 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd setcrushmap -i /etc/ceph/crushmap.compiled')
185 }
186 }
Jiri Broulikdc87d722017-11-03 15:43:22 +0100187 */
Jiri Broulik2c00f4c2017-10-26 13:23:11 +0200188}