blob: 634cdf8c00f00447f2ae8024b25f6658bc633098 [file] [log] [blame]
Jiri Broulikdc87d722017-11-03 15:43:22 +01001/**
2 *
3 * Filestore to Bluestore or vice versa backend migration
4 *
5 * Requred parameters:
6 * SALT_MASTER_URL URL of Salt master
7 * SALT_MASTER_CREDENTIALS Credentials to the Salt API
8 *
9 * ADMIN_HOST Host (minion id) with admin keyring and /etc/crushmap file present
10 * OSD OSD ids to be migrated if single OSD host is targeted (comma-separated list - 1,2,3)
11 * TARGET Hosts (minion ids) to be targeted
12 * CLUSTER_FLAGS Comma separated list of tags to apply to cluster
13 * WAIT_FOR_HEALTHY Wait for cluster rebalance before stoping daemons
14 * ORIGIN_BACKEND Ceph backend before upgrade
Jiri Broulik96c867a2017-11-07 16:14:10 +010015 * PER_OSD_CONTROL Set to true if Ceph status verification after every osd disk migration is desired
16 * PER_OSD_HOST_CONTROL Set to true if Ceph status verificaton after whole OSD host migration is desired
Jiri Broulikdc87d722017-11-03 15:43:22 +010017 *
18 */
19
20common = new com.mirantis.mk.Common()
21salt = new com.mirantis.mk.Salt()
22def python = new com.mirantis.mk.Python()
23
24MIGRATION_METHOD = "per-osd"
25// TBD: per-host
26
27def pepperEnv = "pepperEnv"
28def flags = CLUSTER_FLAGS.tokenize(',')
29def osds = OSD.tokenize(',')
30
31def runCephCommand(master, target, cmd) {
32 return salt.cmdRun(master, target, cmd)
33}
34
Jiri Broulik96c867a2017-11-07 16:14:10 +010035def waitForHealthy(master, count=0, attempts=300) {
36 // wait for healthy cluster
37 while (count<attempts) {
38 def health = runCephCommand(master, ADMIN_HOST, 'ceph health')['return'][0].values()[0]
39 if (health.contains('HEALTH_OK')) {
40 common.infoMsg('Cluster is healthy')
41 break;
42 }
43 count++
44 sleep(10)
45 }
46}
47
Jiri Broulikdc87d722017-11-03 15:43:22 +010048node("python") {
49
50 // create connection to salt master
51 python.setupPepperVirtualenv(pepperEnv, SALT_MASTER_URL, SALT_MASTER_CREDENTIALS)
52
53 if (MIGRATION_METHOD == 'per-osd') {
54
55 if (flags.size() > 0) {
56 stage('Set cluster flags') {
57 for (flag in flags) {
58 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd set ' + flag)
59 }
60 }
61 }
62
63 def target_hosts = salt.getMinions(pepperEnv, TARGET)
64
65 for (HOST in target_hosts) {
66 def osd_ids = []
67
68 // get list of osd disks of the host
69 def ceph_disks = salt.getGrain(pepperEnv, HOST, 'ceph')['return'][0].values()[0].values()[0]['ceph_disk']
70
71 for (i in ceph_disks) {
72 def osd_id = i.getKey().toString()
73 if (osd_id in osds || OSD == '*') {
74 osd_ids.add('osd.' + osd_id)
75 print("Will migrate " + osd_id)
76 } else {
77 print("Skipping " + osd_id)
78 }
79 }
80
81 for (osd_id in osd_ids) {
82
83 def id = osd_id.replaceAll('osd.', '')
84 def backend = runCephCommand(pepperEnv, ADMIN_HOST, "ceph osd metadata ${id} | grep osd_objectstore")['return'][0].values()[0]
85
86 if (backend.contains(ORIGIN_BACKEND)) {
87
88 // wait for healthy cluster before manipulating with osds
89 if (WAIT_FOR_HEALTHY.toBoolean() == true) {
Jiri Broulik96c867a2017-11-07 16:14:10 +010090 waitForHealthy(pepperEnv)
Jiri Broulikdc87d722017-11-03 15:43:22 +010091 }
92
93 // `ceph osd out <id> <id>`
94 stage('Set OSDs out') {
95 runCephCommand(pepperEnv, ADMIN_HOST, "ceph osd out ${osd_id}")
96 }
97
Jiri Broulikdc87d722017-11-03 15:43:22 +010098 if (WAIT_FOR_HEALTHY.toBoolean() == true) {
Jiri Broulik96c867a2017-11-07 16:14:10 +010099 sleep(5)
100 waitForHealthy(pepperEnv)
Jiri Broulikdc87d722017-11-03 15:43:22 +0100101 }
102
103 // stop osd daemons
104 stage('Stop OSD daemons') {
105 salt.runSaltProcessStep(pepperEnv, HOST, 'service.stop', ['ceph-osd@' + osd_id.replaceAll('osd.', '')], null, true)
106 }
107
108 // remove keyring `ceph auth del osd.3`
109 stage('Remove OSD keyrings from auth') {
110 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph auth del ' + osd_id)
111 }
112
113 // remove osd `ceph osd rm osd.3`
114 stage('Remove OSDs') {
115 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd rm ' + osd_id)
116 }
117
118 def mount = runCephCommand(pepperEnv, HOST, "mount | grep /var/lib/ceph/osd/ceph-${id}")['return'][0].values()[0]
119 dev = mount.split()[0].replaceAll("[0-9]","")
120
121 // remove journal or block_db partition `parted /dev/sdj rm 3`
122 stage('Remove journal / block_db partition') {
123 def partition_uuid = ""
124 def journal_partition_uuid = ""
125 def block_db_partition_uuid = ""
126 try {
127 journal_partition_uuid = runCephCommand(pepperEnv, HOST, "ls -la /var/lib/ceph/osd/ceph-${id}/ | grep journal | grep partuuid")
128 journal_partition_uuid = journal_partition_uuid.toString().trim().split("\n")[0].substring(journal_partition_uuid.toString().trim().lastIndexOf("/")+1)
129 } catch (Exception e) {
130 common.infoMsg(e)
131 }
132 try {
133 block_db_partition_uuid = runCephCommand(pepperEnv, HOST, "ls -la /var/lib/ceph/osd/ceph-${id}/ | grep 'block.db' | grep partuuid")
134 block_db_partition_uuid = block_db_partition_uuid.toString().trim().split("\n")[0].substring(block_db_partition_uuid.toString().trim().lastIndexOf("/")+1)
135 } catch (Exception e) {
136 common.infoMsg(e)
137 }
138
139 // set partition_uuid = 2c76f144-f412-481e-b150-4046212ca932
140 if (journal_partition_uuid?.trim()) {
141 partition_uuid = journal_partition_uuid
142 } else if (block_db_partition_uuid?.trim()) {
143 partition_uuid = block_db_partition_uuid
144 }
145
146 // if failed disk had block_db or journal on different disk, then remove the partition
147 if (partition_uuid?.trim()) {
148 def partition = ""
149 try {
150 // partition = /dev/sdi2
151 partition = runCephCommand(pepperEnv, HOST, "blkid | grep ${partition_uuid} ")['return'][0].values()[0].split("(?<=[0-9])")[0]
152 } catch (Exception e) {
153 common.warningMsg(e)
154 }
155
156 if (partition?.trim()) {
157 // dev = /dev/sdi
Jiri Broulik42dfbb12017-11-13 11:27:45 +0100158 def dev = partition.replaceAll('\\d+$', "")
Jiri Broulikdc87d722017-11-03 15:43:22 +0100159 // part_id = 2
160 def part_id = partition.substring(partition.lastIndexOf("/")+1).replaceAll("[^0-9]", "")
161 runCephCommand(pepperEnv, HOST, "parted ${dev} rm ${part_id}")
162 }
163 }
164 }
165
166 // umount `umount /dev/sdi1`
167 stage('Umount devices') {
168 runCephCommand(pepperEnv, HOST, "umount /var/lib/ceph/osd/ceph-${id}")
169 }
170
171 // zap disks `ceph-disk zap /dev/sdi`
172 stage('Zap device') {
173 runCephCommand(pepperEnv, HOST, 'ceph-disk zap ' + dev)
174 }
175
176 // Deploy failed Ceph OSD
177 stage('Deploy Ceph OSD') {
178 salt.runSaltProcessStep(pepperEnv, HOST, 'saltutil.refresh_pillar', [], null, true, 5)
179 salt.enforceState(pepperEnv, HOST, 'ceph.osd', true)
180 }
Jiri Broulik96c867a2017-11-07 16:14:10 +0100181
182 if (PER_OSD_CONTROL.toBoolean() == true) {
183 stage("Verify backend version for osd.${id}") {
184 sleep(5)
185 runCephCommand(pepperEnv, HOST, "ceph osd metadata ${id} | grep osd_objectstore")
186 runCephCommand(pepperEnv, HOST, "ceph -s")
187 }
188
189 stage('Ask for manual confirmation') {
190 input message: "From the verification commands above, please check the backend version of osd.${id} and ceph status. If it is correct, Do you want to continue to migrate next osd?"
191 }
192 }
Jiri Broulikdc87d722017-11-03 15:43:22 +0100193 }
194 }
Jiri Broulik96c867a2017-11-07 16:14:10 +0100195 if (PER_OSD_HOST_CONTROL.toBoolean() == true) {
196 stage("Verify backend versions") {
197 sleep(5)
198 runCephCommand(pepperEnv, HOST, "ceph osd metadata | grep osd_objectstore -B2")
199 runCephCommand(pepperEnv, HOST, "ceph -s")
200 }
201
202 stage('Ask for manual confirmation') {
203 input message: "From the verification command above, please check the ceph status and backend version of osds on this host. If it is correct, Do you want to continue to migrate next OSD host?"
204 }
205 }
206
Jiri Broulikdc87d722017-11-03 15:43:22 +0100207 }
208 // remove cluster flags
209 if (flags.size() > 0) {
210 stage('Unset cluster flags') {
211 for (flag in flags) {
212 common.infoMsg('Removing flag ' + flag)
213 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd unset ' + flag)
214 }
215 }
216 }
217 }
218}