blob: f5c99a4b731b4b8821226ab839534c52a9fdb505 [file] [log] [blame]
Jiri Broulikdc87d722017-11-03 15:43:22 +01001/**
2 *
3 * Filestore to Bluestore or vice versa backend migration
4 *
5 * Requred parameters:
6 * SALT_MASTER_URL URL of Salt master
7 * SALT_MASTER_CREDENTIALS Credentials to the Salt API
8 *
9 * ADMIN_HOST Host (minion id) with admin keyring and /etc/crushmap file present
10 * OSD OSD ids to be migrated if single OSD host is targeted (comma-separated list - 1,2,3)
11 * TARGET Hosts (minion ids) to be targeted
12 * CLUSTER_FLAGS Comma separated list of tags to apply to cluster
13 * WAIT_FOR_HEALTHY Wait for cluster rebalance before stoping daemons
14 * ORIGIN_BACKEND Ceph backend before upgrade
Jiri Broulik96c867a2017-11-07 16:14:10 +010015 * PER_OSD_CONTROL Set to true if Ceph status verification after every osd disk migration is desired
16 * PER_OSD_HOST_CONTROL Set to true if Ceph status verificaton after whole OSD host migration is desired
Jiri Broulikdc87d722017-11-03 15:43:22 +010017 *
18 */
19
20common = new com.mirantis.mk.Common()
21salt = new com.mirantis.mk.Salt()
22def python = new com.mirantis.mk.Python()
23
24MIGRATION_METHOD = "per-osd"
25// TBD: per-host
26
27def pepperEnv = "pepperEnv"
28def flags = CLUSTER_FLAGS.tokenize(',')
29def osds = OSD.tokenize(',')
30
Jiri Broulika657d562017-11-28 14:19:32 +010031def removePartition(master, target, partition_uuid) {
32 def partition = ""
33 try {
34 // partition = /dev/sdi2
35 partition = runCephCommand(master, target, "blkid | grep ${partition_uuid} ")['return'][0].values()[0].split("(?<=[0-9])")[0]
36 } catch (Exception e) {
37 common.warningMsg(e)
38 }
39
40 if (partition?.trim()) {
41 // dev = /dev/sdi
42 def dev = partition.replaceAll('\\d+$', "")
43 // part_id = 2
44 def part_id = partition.substring(partition.lastIndexOf("/")+1).replaceAll("[^0-9]", "")
45 runCephCommand(master, target, "parted ${dev} rm ${part_id}")
46 }
47 return
48}
49
Jiri Broulikeb7b82f2017-11-30 13:55:40 +010050def removeJournalOrBlockPartitions(master, target, id) {
51
52 // remove journal, block_db, block_wal partition `parted /dev/sdj rm 3`
53 stage('Remove journal / block_db / block_wal partition') {
54 def partition_uuid = ""
55 def journal_partition_uuid = ""
56 def block_db_partition_uuid = ""
57 def block_wal_partition_uuid = ""
58 try {
59 journal_partition_uuid = runCephCommand(master, target, "ls -la /var/lib/ceph/osd/ceph-${id}/ | grep journal | grep partuuid")
60 journal_partition_uuid = journal_partition_uuid.toString().trim().split("\n")[0].substring(journal_partition_uuid.toString().trim().lastIndexOf("/")+1)
61 } catch (Exception e) {
62 common.infoMsg(e)
63 }
64 try {
65 block_db_partition_uuid = runCephCommand(master, target, "ls -la /var/lib/ceph/osd/ceph-${id}/ | grep 'block.db' | grep partuuid")
66 block_db_partition_uuid = block_db_partition_uuid.toString().trim().split("\n")[0].substring(block_db_partition_uuid.toString().trim().lastIndexOf("/")+1)
67 } catch (Exception e) {
68 common.infoMsg(e)
69 }
70
71 try {
72 block_wal_partition_uuid = runCephCommand(master, target, "ls -la /var/lib/ceph/osd/ceph-${id}/ | grep 'block.wal' | grep partuuid")
73 block_wal_partition_uuid = block_wal_partition_uuid.toString().trim().split("\n")[0].substring(block_wal_partition_uuid.toString().trim().lastIndexOf("/")+1)
74 } catch (Exception e) {
75 common.infoMsg(e)
76 }
77
78 // set partition_uuid = 2c76f144-f412-481e-b150-4046212ca932
79 if (journal_partition_uuid?.trim()) {
80 partition_uuid = journal_partition_uuid
81 } else if (block_db_partition_uuid?.trim()) {
82 partition_uuid = block_db_partition_uuid
83 }
84
85 // if disk has journal, block_db or block_wal on different disk, then remove the partition
86 if (partition_uuid?.trim()) {
87 removePartition(master, target, partition_uuid)
88 }
89 if (block_wal_partition_uuid?.trim()) {
90 removePartition(master, target, block_wal_partition_uuid)
91 }
92 }
93 return
94}
95
Jiri Broulikdc87d722017-11-03 15:43:22 +010096def runCephCommand(master, target, cmd) {
97 return salt.cmdRun(master, target, cmd)
98}
99
Jiri Broulik96c867a2017-11-07 16:14:10 +0100100def waitForHealthy(master, count=0, attempts=300) {
101 // wait for healthy cluster
102 while (count<attempts) {
103 def health = runCephCommand(master, ADMIN_HOST, 'ceph health')['return'][0].values()[0]
104 if (health.contains('HEALTH_OK')) {
105 common.infoMsg('Cluster is healthy')
106 break;
107 }
108 count++
109 sleep(10)
110 }
111}
112
Jiri Broulikdc87d722017-11-03 15:43:22 +0100113node("python") {
114
115 // create connection to salt master
116 python.setupPepperVirtualenv(pepperEnv, SALT_MASTER_URL, SALT_MASTER_CREDENTIALS)
117
118 if (MIGRATION_METHOD == 'per-osd') {
119
120 if (flags.size() > 0) {
121 stage('Set cluster flags') {
122 for (flag in flags) {
123 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd set ' + flag)
124 }
125 }
126 }
127
128 def target_hosts = salt.getMinions(pepperEnv, TARGET)
129
Jiri Broulikeb7b82f2017-11-30 13:55:40 +0100130 for (tgt in target_hosts) {
Jiri Broulikdc87d722017-11-03 15:43:22 +0100131 def osd_ids = []
132
Jiri Broulikeb7b82f2017-11-30 13:55:40 +0100133 // get list of osd disks of the tgt
134 salt.runSaltProcessStep(pepperEnv, tgt, 'saltutil.sync_grains', [], null, true, 5)
135 def ceph_disks = salt.getGrain(pepperEnv, tgt, 'ceph')['return'][0].values()[0].values()[0]['ceph_disk']
Jiri Broulikdc87d722017-11-03 15:43:22 +0100136
137 for (i in ceph_disks) {
138 def osd_id = i.getKey().toString()
139 if (osd_id in osds || OSD == '*') {
140 osd_ids.add('osd.' + osd_id)
141 print("Will migrate " + osd_id)
142 } else {
143 print("Skipping " + osd_id)
144 }
145 }
146
147 for (osd_id in osd_ids) {
148
149 def id = osd_id.replaceAll('osd.', '')
150 def backend = runCephCommand(pepperEnv, ADMIN_HOST, "ceph osd metadata ${id} | grep osd_objectstore")['return'][0].values()[0]
151
Jiri Broulika657d562017-11-28 14:19:32 +0100152 if (backend.contains(ORIGIN_BACKEND.toLowerCase())) {
Jiri Broulikdc87d722017-11-03 15:43:22 +0100153
154 // wait for healthy cluster before manipulating with osds
155 if (WAIT_FOR_HEALTHY.toBoolean() == true) {
Jiri Broulik96c867a2017-11-07 16:14:10 +0100156 waitForHealthy(pepperEnv)
Jiri Broulikdc87d722017-11-03 15:43:22 +0100157 }
158
159 // `ceph osd out <id> <id>`
160 stage('Set OSDs out') {
161 runCephCommand(pepperEnv, ADMIN_HOST, "ceph osd out ${osd_id}")
162 }
163
Jiri Broulikdc87d722017-11-03 15:43:22 +0100164 if (WAIT_FOR_HEALTHY.toBoolean() == true) {
Jiri Broulik96c867a2017-11-07 16:14:10 +0100165 sleep(5)
166 waitForHealthy(pepperEnv)
Jiri Broulikdc87d722017-11-03 15:43:22 +0100167 }
168
169 // stop osd daemons
170 stage('Stop OSD daemons') {
Jiri Broulikeb7b82f2017-11-30 13:55:40 +0100171 salt.runSaltProcessStep(pepperEnv, tgt, 'service.stop', ['ceph-osd@' + osd_id.replaceAll('osd.', '')], null, true)
Jiri Broulikdc87d722017-11-03 15:43:22 +0100172 }
173
174 // remove keyring `ceph auth del osd.3`
175 stage('Remove OSD keyrings from auth') {
176 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph auth del ' + osd_id)
177 }
178
179 // remove osd `ceph osd rm osd.3`
180 stage('Remove OSDs') {
181 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd rm ' + osd_id)
182 }
183
Jiri Broulikeb7b82f2017-11-30 13:55:40 +0100184 def dmcrypt = ""
185 try {
186 dmcrypt = runCephCommand(pepperEnv, tgt, "ls -la /var/lib/ceph/osd/ceph-${id}/ | grep dmcrypt")['return'][0].values()[0]
187 } catch (Exception e) {
188 common.warningMsg(e)
189 }
Jiri Broulikdc87d722017-11-03 15:43:22 +0100190
Jiri Broulikeb7b82f2017-11-30 13:55:40 +0100191 if (dmcrypt?.trim()) {
192 def mount = runCephCommand(pepperEnv, tgt, "lsblk -rp | grep /var/lib/ceph/osd/ceph-${id} -B1")['return'][0].values()[0]
193 dev = mount.split()[0].replaceAll("[0-9]","")
194
195 // remove partition tables
196 stage('dd part tables') {
197 runCephCommand(pepperEnv, tgt, "dd if=/dev/zero of=${dev} bs=512 count=1 conv=notrunc")
Jiri Broulikdc87d722017-11-03 15:43:22 +0100198 }
199
Jiri Broulikeb7b82f2017-11-30 13:55:40 +0100200 // remove journal, block_db, block_wal partition `parted /dev/sdj rm 3`
201 removeJournalOrBlockPartitions(pepperEnv, tgt, id)
202
203 // reboot
204 stage('reboot and wait') {
205 salt.runSaltProcessStep(pepperEnv, tgt, 'system.reboot', null, null, true, 5)
206 salt.minionsReachable(pepperEnv, 'I@salt:master', tgt)
207 sleep(10)
Jiri Broulika657d562017-11-28 14:19:32 +0100208 }
209
Jiri Broulikeb7b82f2017-11-30 13:55:40 +0100210 // zap disks `ceph-disk zap /dev/sdi`
211 stage('Zap devices') {
212 try {
213 runCephCommand(pepperEnv, tgt, 'ceph-disk zap ' + dev)
214 } catch (Exception e) {
215 common.warningMsg(e)
216 }
217 runCephCommand(pepperEnv, tgt, 'ceph-disk zap ' + dev)
Jiri Broulikdc87d722017-11-03 15:43:22 +0100218 }
219
Jiri Broulikeb7b82f2017-11-30 13:55:40 +0100220 } else {
221
222 def mount = runCephCommand(pepperEnv, tgt, "mount | grep /var/lib/ceph/osd/ceph-${id}")['return'][0].values()[0]
223 dev = mount.split()[0].replaceAll("[0-9]","")
224
225 // remove journal, block_db, block_wal partition `parted /dev/sdj rm 3`
226 removeJournalOrBlockPartitions(pepperEnv, tgt, id)
227
228 // umount `umount /dev/sdi1`
229 stage('Umount devices') {
230 runCephCommand(pepperEnv, tgt, "umount /var/lib/ceph/osd/ceph-${id}")
Jiri Broulika657d562017-11-28 14:19:32 +0100231 }
Jiri Broulikeb7b82f2017-11-30 13:55:40 +0100232
233 // zap disks `ceph-disk zap /dev/sdi`
234 stage('Zap device') {
235 runCephCommand(pepperEnv, tgt, 'ceph-disk zap ' + dev)
Jiri Broulikdc87d722017-11-03 15:43:22 +0100236 }
237 }
238
Jiri Broulikeb7b82f2017-11-30 13:55:40 +0100239 // Deploy Ceph OSD
Jiri Broulikdc87d722017-11-03 15:43:22 +0100240 stage('Deploy Ceph OSD') {
Jiri Broulikeb7b82f2017-11-30 13:55:40 +0100241 salt.runSaltProcessStep(pepperEnv, tgt, 'saltutil.refresh_pillar', [], null, true, 5)
242 salt.enforceState(pepperEnv, tgt, 'ceph.osd', true)
Jiri Broulikdc87d722017-11-03 15:43:22 +0100243 }
Jiri Broulik96c867a2017-11-07 16:14:10 +0100244
245 if (PER_OSD_CONTROL.toBoolean() == true) {
246 stage("Verify backend version for osd.${id}") {
247 sleep(5)
Jiri Broulikeb7b82f2017-11-30 13:55:40 +0100248 runCephCommand(pepperEnv, tgt, "ceph osd metadata ${id} | grep osd_objectstore")
249 runCephCommand(pepperEnv, tgt, "ceph -s")
Jiri Broulik96c867a2017-11-07 16:14:10 +0100250 }
251
252 stage('Ask for manual confirmation') {
253 input message: "From the verification commands above, please check the backend version of osd.${id} and ceph status. If it is correct, Do you want to continue to migrate next osd?"
254 }
255 }
Jiri Broulikdc87d722017-11-03 15:43:22 +0100256 }
257 }
Jiri Broulik96c867a2017-11-07 16:14:10 +0100258 if (PER_OSD_HOST_CONTROL.toBoolean() == true) {
259 stage("Verify backend versions") {
260 sleep(5)
Jiri Broulikeb7b82f2017-11-30 13:55:40 +0100261 runCephCommand(pepperEnv, tgt, "ceph osd metadata | grep osd_objectstore -B2")
262 runCephCommand(pepperEnv, tgt, "ceph -s")
Jiri Broulik96c867a2017-11-07 16:14:10 +0100263 }
264
265 stage('Ask for manual confirmation') {
266 input message: "From the verification command above, please check the ceph status and backend version of osds on this host. If it is correct, Do you want to continue to migrate next OSD host?"
267 }
268 }
269
Jiri Broulikdc87d722017-11-03 15:43:22 +0100270 }
271 // remove cluster flags
272 if (flags.size() > 0) {
273 stage('Unset cluster flags') {
274 for (flag in flags) {
275 common.infoMsg('Removing flag ' + flag)
276 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd unset ' + flag)
277 }
278 }
279 }
280 }
281}