blob: 7a5821df59b4514d97297c8ebf31b01cb2debdaa [file] [log] [blame]
Jiri Broulikdc87d722017-11-03 15:43:22 +01001/**
2 *
3 * Filestore to Bluestore or vice versa backend migration
4 *
5 * Requred parameters:
6 * SALT_MASTER_URL URL of Salt master
7 * SALT_MASTER_CREDENTIALS Credentials to the Salt API
8 *
9 * ADMIN_HOST Host (minion id) with admin keyring and /etc/crushmap file present
10 * OSD OSD ids to be migrated if single OSD host is targeted (comma-separated list - 1,2,3)
11 * TARGET Hosts (minion ids) to be targeted
12 * CLUSTER_FLAGS Comma separated list of tags to apply to cluster
13 * WAIT_FOR_HEALTHY Wait for cluster rebalance before stoping daemons
14 * ORIGIN_BACKEND Ceph backend before upgrade
Jiri Broulik96c867a2017-11-07 16:14:10 +010015 * PER_OSD_CONTROL Set to true if Ceph status verification after every osd disk migration is desired
16 * PER_OSD_HOST_CONTROL Set to true if Ceph status verificaton after whole OSD host migration is desired
Jiri Broulikdc87d722017-11-03 15:43:22 +010017 *
18 */
19
20common = new com.mirantis.mk.Common()
21salt = new com.mirantis.mk.Salt()
22def python = new com.mirantis.mk.Python()
23
24MIGRATION_METHOD = "per-osd"
25// TBD: per-host
26
27def pepperEnv = "pepperEnv"
28def flags = CLUSTER_FLAGS.tokenize(',')
29def osds = OSD.tokenize(',')
30
Jiri Broulika657d562017-11-28 14:19:32 +010031def removePartition(master, target, partition_uuid) {
32 def partition = ""
33 try {
34 // partition = /dev/sdi2
35 partition = runCephCommand(master, target, "blkid | grep ${partition_uuid} ")['return'][0].values()[0].split("(?<=[0-9])")[0]
36 } catch (Exception e) {
37 common.warningMsg(e)
38 }
39
40 if (partition?.trim()) {
41 // dev = /dev/sdi
42 def dev = partition.replaceAll('\\d+$', "")
43 // part_id = 2
44 def part_id = partition.substring(partition.lastIndexOf("/")+1).replaceAll("[^0-9]", "")
45 runCephCommand(master, target, "parted ${dev} rm ${part_id}")
46 }
47 return
48}
49
Jiri Broulikeb7b82f2017-11-30 13:55:40 +010050def removeJournalOrBlockPartitions(master, target, id) {
51
52 // remove journal, block_db, block_wal partition `parted /dev/sdj rm 3`
53 stage('Remove journal / block_db / block_wal partition') {
54 def partition_uuid = ""
55 def journal_partition_uuid = ""
56 def block_db_partition_uuid = ""
57 def block_wal_partition_uuid = ""
58 try {
59 journal_partition_uuid = runCephCommand(master, target, "ls -la /var/lib/ceph/osd/ceph-${id}/ | grep journal | grep partuuid")
60 journal_partition_uuid = journal_partition_uuid.toString().trim().split("\n")[0].substring(journal_partition_uuid.toString().trim().lastIndexOf("/")+1)
61 } catch (Exception e) {
62 common.infoMsg(e)
63 }
64 try {
65 block_db_partition_uuid = runCephCommand(master, target, "ls -la /var/lib/ceph/osd/ceph-${id}/ | grep 'block.db' | grep partuuid")
66 block_db_partition_uuid = block_db_partition_uuid.toString().trim().split("\n")[0].substring(block_db_partition_uuid.toString().trim().lastIndexOf("/")+1)
67 } catch (Exception e) {
68 common.infoMsg(e)
69 }
70
71 try {
72 block_wal_partition_uuid = runCephCommand(master, target, "ls -la /var/lib/ceph/osd/ceph-${id}/ | grep 'block.wal' | grep partuuid")
73 block_wal_partition_uuid = block_wal_partition_uuid.toString().trim().split("\n")[0].substring(block_wal_partition_uuid.toString().trim().lastIndexOf("/")+1)
74 } catch (Exception e) {
75 common.infoMsg(e)
76 }
77
78 // set partition_uuid = 2c76f144-f412-481e-b150-4046212ca932
79 if (journal_partition_uuid?.trim()) {
80 partition_uuid = journal_partition_uuid
81 } else if (block_db_partition_uuid?.trim()) {
82 partition_uuid = block_db_partition_uuid
83 }
84
85 // if disk has journal, block_db or block_wal on different disk, then remove the partition
86 if (partition_uuid?.trim()) {
87 removePartition(master, target, partition_uuid)
88 }
89 if (block_wal_partition_uuid?.trim()) {
90 removePartition(master, target, block_wal_partition_uuid)
91 }
92 }
93 return
94}
95
Jiri Broulikdc87d722017-11-03 15:43:22 +010096def runCephCommand(master, target, cmd) {
97 return salt.cmdRun(master, target, cmd)
98}
99
Jiri Broulik96c867a2017-11-07 16:14:10 +0100100def waitForHealthy(master, count=0, attempts=300) {
101 // wait for healthy cluster
102 while (count<attempts) {
103 def health = runCephCommand(master, ADMIN_HOST, 'ceph health')['return'][0].values()[0]
104 if (health.contains('HEALTH_OK')) {
105 common.infoMsg('Cluster is healthy')
106 break;
107 }
108 count++
109 sleep(10)
110 }
111}
Jakub Josefa63f9862018-01-11 17:58:38 +0100112timeout(time: 12, unit: 'HOURS') {
113 node("python") {
Jiri Broulik96c867a2017-11-07 16:14:10 +0100114
Jakub Josefa63f9862018-01-11 17:58:38 +0100115 // create connection to salt master
116 python.setupPepperVirtualenv(pepperEnv, SALT_MASTER_URL, SALT_MASTER_CREDENTIALS)
Jiri Broulikdc87d722017-11-03 15:43:22 +0100117
Jakub Josefa63f9862018-01-11 17:58:38 +0100118 if (MIGRATION_METHOD == 'per-osd') {
Jiri Broulikdc87d722017-11-03 15:43:22 +0100119
Jakub Josefa63f9862018-01-11 17:58:38 +0100120 if (flags.size() > 0) {
121 stage('Set cluster flags') {
122 for (flag in flags) {
123 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd set ' + flag)
124 }
Jiri Broulikdc87d722017-11-03 15:43:22 +0100125 }
126 }
127
Jakub Josefa63f9862018-01-11 17:58:38 +0100128 def target_hosts = salt.getMinions(pepperEnv, TARGET)
Jiri Broulikdc87d722017-11-03 15:43:22 +0100129
Jakub Josefa63f9862018-01-11 17:58:38 +0100130 for (tgt in target_hosts) {
131 def osd_ids = []
Jiri Broulikdc87d722017-11-03 15:43:22 +0100132
Jakub Josefa63f9862018-01-11 17:58:38 +0100133 // get list of osd disks of the tgt
134 salt.runSaltProcessStep(pepperEnv, tgt, 'saltutil.sync_grains', [], null, true, 5)
135 def ceph_disks = salt.getGrain(pepperEnv, tgt, 'ceph')['return'][0].values()[0].values()[0]['ceph_disk']
Jiri Broulikdc87d722017-11-03 15:43:22 +0100136
Jakub Josefa63f9862018-01-11 17:58:38 +0100137 for (i in ceph_disks) {
138 def osd_id = i.getKey().toString()
139 if (osd_id in osds || OSD == '*') {
140 osd_ids.add('osd.' + osd_id)
141 print("Will migrate " + osd_id)
Jiri Broulikeb7b82f2017-11-30 13:55:40 +0100142 } else {
Jakub Josefa63f9862018-01-11 17:58:38 +0100143 print("Skipping " + osd_id)
144 }
145 }
Jiri Broulikeb7b82f2017-11-30 13:55:40 +0100146
Jakub Josefa63f9862018-01-11 17:58:38 +0100147 for (osd_id in osd_ids) {
Jiri Broulikeb7b82f2017-11-30 13:55:40 +0100148
Jakub Josefa63f9862018-01-11 17:58:38 +0100149 def id = osd_id.replaceAll('osd.', '')
150 def backend = runCephCommand(pepperEnv, ADMIN_HOST, "ceph osd metadata ${id} | grep osd_objectstore")['return'][0].values()[0]
Jiri Broulikeb7b82f2017-11-30 13:55:40 +0100151
Jakub Josefa63f9862018-01-11 17:58:38 +0100152 if (backend.contains(ORIGIN_BACKEND.toLowerCase())) {
153
154 // wait for healthy cluster before manipulating with osds
155 if (WAIT_FOR_HEALTHY.toBoolean() == true) {
156 waitForHealthy(pepperEnv)
Jiri Broulika657d562017-11-28 14:19:32 +0100157 }
Jiri Broulikeb7b82f2017-11-30 13:55:40 +0100158
Jakub Josefa63f9862018-01-11 17:58:38 +0100159 // `ceph osd out <id> <id>`
160 stage('Set OSDs out') {
161 runCephCommand(pepperEnv, ADMIN_HOST, "ceph osd out ${osd_id}")
Jiri Broulikdc87d722017-11-03 15:43:22 +0100162 }
Jiri Broulikdc87d722017-11-03 15:43:22 +0100163
Jakub Josefa63f9862018-01-11 17:58:38 +0100164 if (WAIT_FOR_HEALTHY.toBoolean() == true) {
Jiri Broulik96c867a2017-11-07 16:14:10 +0100165 sleep(5)
Jakub Josefa63f9862018-01-11 17:58:38 +0100166 waitForHealthy(pepperEnv)
Jiri Broulik96c867a2017-11-07 16:14:10 +0100167 }
168
Jakub Josefa63f9862018-01-11 17:58:38 +0100169 // stop osd daemons
170 stage('Stop OSD daemons') {
171 salt.runSaltProcessStep(pepperEnv, tgt, 'service.stop', ['ceph-osd@' + osd_id.replaceAll('osd.', '')], null, true)
172 }
173
174 // remove keyring `ceph auth del osd.3`
175 stage('Remove OSD keyrings from auth') {
176 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph auth del ' + osd_id)
177 }
178
179 // remove osd `ceph osd rm osd.3`
180 stage('Remove OSDs') {
181 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd rm ' + osd_id)
182 }
183
184 def dmcrypt = ""
185 try {
186 dmcrypt = runCephCommand(pepperEnv, tgt, "ls -la /var/lib/ceph/osd/ceph-${id}/ | grep dmcrypt")['return'][0].values()[0]
187 } catch (Exception e) {
188 common.warningMsg(e)
189 }
190
191 if (dmcrypt?.trim()) {
192 def mount = runCephCommand(pepperEnv, tgt, "lsblk -rp | grep /var/lib/ceph/osd/ceph-${id} -B1")['return'][0].values()[0]
193 dev = mount.split()[0].replaceAll("[0-9]","")
194
195 // remove partition tables
196 stage('dd part tables') {
197 runCephCommand(pepperEnv, tgt, "dd if=/dev/zero of=${dev} bs=512 count=1 conv=notrunc")
198 }
199
200 // remove journal, block_db, block_wal partition `parted /dev/sdj rm 3`
201 removeJournalOrBlockPartitions(pepperEnv, tgt, id)
202
203 // reboot
204 stage('reboot and wait') {
205 salt.runSaltProcessStep(pepperEnv, tgt, 'system.reboot', null, null, true, 5)
206 salt.minionsReachable(pepperEnv, 'I@salt:master', tgt)
207 sleep(10)
208 }
209
210 // zap disks `ceph-disk zap /dev/sdi`
211 stage('Zap devices') {
212 try {
213 runCephCommand(pepperEnv, tgt, 'ceph-disk zap ' + dev)
214 } catch (Exception e) {
215 common.warningMsg(e)
216 }
217 runCephCommand(pepperEnv, tgt, 'ceph-disk zap ' + dev)
218 }
219
220 } else {
221
222 def mount = runCephCommand(pepperEnv, tgt, "mount | grep /var/lib/ceph/osd/ceph-${id}")['return'][0].values()[0]
223 dev = mount.split()[0].replaceAll("[0-9]","")
224
225 // remove journal, block_db, block_wal partition `parted /dev/sdj rm 3`
226 removeJournalOrBlockPartitions(pepperEnv, tgt, id)
227
228 // umount `umount /dev/sdi1`
229 stage('Umount devices') {
230 runCephCommand(pepperEnv, tgt, "umount /var/lib/ceph/osd/ceph-${id}")
231 }
232
233 // zap disks `ceph-disk zap /dev/sdi`
234 stage('Zap device') {
235 runCephCommand(pepperEnv, tgt, 'ceph-disk zap ' + dev)
236 }
237 }
238
239 // Deploy Ceph OSD
240 stage('Deploy Ceph OSD') {
241 salt.runSaltProcessStep(pepperEnv, tgt, 'saltutil.refresh_pillar', [], null, true, 5)
242 salt.enforceState(pepperEnv, tgt, 'ceph.osd', true)
243 }
244
245 if (PER_OSD_CONTROL.toBoolean() == true) {
246 stage("Verify backend version for osd.${id}") {
247 sleep(5)
248 runCephCommand(pepperEnv, tgt, "ceph osd metadata ${id} | grep osd_objectstore")
249 runCephCommand(pepperEnv, tgt, "ceph -s")
250 }
251
252 stage('Ask for manual confirmation') {
253 input message: "From the verification commands above, please check the backend version of osd.${id} and ceph status. If it is correct, Do you want to continue to migrate next osd?"
254 }
Jiri Broulik96c867a2017-11-07 16:14:10 +0100255 }
256 }
Jiri Broulikdc87d722017-11-03 15:43:22 +0100257 }
Jakub Josefa63f9862018-01-11 17:58:38 +0100258 if (PER_OSD_HOST_CONTROL.toBoolean() == true) {
259 stage("Verify backend versions") {
260 sleep(5)
261 runCephCommand(pepperEnv, tgt, "ceph osd metadata | grep osd_objectstore -B2")
262 runCephCommand(pepperEnv, tgt, "ceph -s")
263 }
264
265 stage('Ask for manual confirmation') {
266 input message: "From the verification command above, please check the ceph status and backend version of osds on this host. If it is correct, Do you want to continue to migrate next OSD host?"
267 }
Jiri Broulik96c867a2017-11-07 16:14:10 +0100268 }
269
Jiri Broulik96c867a2017-11-07 16:14:10 +0100270 }
Jakub Josefa63f9862018-01-11 17:58:38 +0100271 // remove cluster flags
272 if (flags.size() > 0) {
273 stage('Unset cluster flags') {
274 for (flag in flags) {
275 common.infoMsg('Removing flag ' + flag)
276 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd unset ' + flag)
277 }
Jiri Broulikdc87d722017-11-03 15:43:22 +0100278 }
279 }
280 }
281 }
282}