blob: e00160c2d8fffdff09be378db6d9772098f0195a [file] [log] [blame]
Jiri Broulik2c00f4c2017-10-26 13:23:11 +02001/**
2 *
3 * Replace failed disk with a new disk
4 *
5 * Requred parameters:
6 * SALT_MASTER_URL URL of Salt master
7 * SALT_MASTER_CREDENTIALS Credentials to the Salt API
8 *
9 * HOST Host (minion id) to be removed
10 * ADMIN_HOST Host (minion id) with admin keyring and /etc/crushmap file present
11 * OSD Failed OSD ids to be replaced (comma-separated list - 1,2,3)
12 * DEVICE Comma separated list of failed devices that will be replaced at HOST (/dev/sdb,/dev/sdc)
13 * JOURNAL_OR_BLOCKDB_PARTITION Comma separated list of partitions where journal or block_db for the failed devices on this HOST were stored (/dev/sdh2,/dev/sdh3)
14 * ENFORCE_CRUSHMAP Set to true if the prepared crush map should be enforced
15 * WAIT_FOR_PG_REBALANCE Wait for PGs to rebalance after osd is removed from crush map
16 * CLUSTER_FLAGS Comma separated list of tags to apply to cluster
17 * WAIT_FOR_HEALTHY Wait for cluster rebalance before stoping daemons
18 *
19 */
20
21common = new com.mirantis.mk.Common()
22salt = new com.mirantis.mk.Salt()
23def python = new com.mirantis.mk.Python()
24
25def pepperEnv = "pepperEnv"
26def flags = CLUSTER_FLAGS.tokenize(',')
27def osds = OSD.tokenize(',')
28def devices = DEVICE.tokenize(',')
29def journals_blockdbs = JOURNAL_OR_BLOCKDB_PARTITION.tokenize(',')
30
31
32def runCephCommand(master, target, cmd) {
33 return salt.cmdRun(master, target, cmd)
34}
35
36node("python") {
37
38 // create connection to salt master
39 python.setupPepperVirtualenv(pepperEnv, SALT_MASTER_URL, SALT_MASTER_CREDENTIALS)
40
41 if (flags.size() > 0) {
42 stage('Set cluster flags') {
43 for (flag in flags) {
44 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd set ' + flag)
45 }
46 }
47 }
48
49 def osd_ids = []
50
51 print("osds:")
52 print(osds)
53
54 // get list of osd disks of the host
55 def ceph_disks = salt.getGrain(pepperEnv, HOST, 'ceph')['return'][0].values()[0].values()[0]['ceph_disk']
56 common.prettyPrint(ceph_disks)
57
58 for (i in ceph_disks) {
59 def osd_id = i.getKey().toString()
60 if (osd_id in osds || OSD == '*') {
61 osd_ids.add('osd.' + osd_id)
62 print("Will delete " + osd_id)
63 } else {
64 print("Skipping " + osd_id)
65 }
66 }
67
68 // `ceph osd out <id> <id>`
69 stage('Set OSDs out') {
70 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd out ' + osd_ids.join(' '))
71 }
72
73 // wait for healthy cluster
74 if (common.validInputParam('WAIT_FOR_HEALTHY') && WAIT_FOR_HEALTHY.toBoolean()) {
75 stage('Waiting for healthy cluster') {
76 while (true) {
77 def health = runCephCommand(pepperEnv, ADMIN_HOST, 'ceph health')['return'][0].values()[0]
78 if (health.contains('HEALTH OK')) {
79 common.infoMsg('Cluster is healthy')
80 break;
81 }
82 sleep(60)
83 }
84 }
85 }
86
87 // stop osd daemons
88 stage('Stop OSD daemons') {
89 for (i in osd_ids) {
90 salt.runSaltProcessStep(pepperEnv, HOST, 'service.stop', ['ceph-osd@' + i.replaceAll('osd.', '')], null, true)
91 }
92 }
93
94 // `ceph osd crush remove osd.2`
95 stage('Remove OSDs from CRUSH') {
96 for (i in osd_ids) {
97 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd crush remove ' + i)
98 }
99 }
100
101 // wait for pgs to rebalance
102 if (WAIT_FOR_PG_REBALANCE.toBoolean() == true) {
103 stage('Waiting for pgs to rebalance') {
104 while (true) {
105 def status = runCephCommand(pepperEnv, ADMIN_HOST, 'ceph -s')['return'][0].values()[0]
106 if (!status.contains('degraded')) {
107 common.infoMsg('PGs rebalanced')
108 break;
109 }
110 sleep(3)
111 }
112 }
113 }
114
115 // remove keyring `ceph auth del osd.3`
116 stage('Remove OSD keyrings from auth') {
117 for (i in osd_ids) {
118 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph auth del ' + i)
119 }
120 }
121
122 // remove osd `ceph osd rm osd.3`
123 stage('Remove OSDs') {
124 for (i in osd_ids) {
125 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd rm ' + i)
126 }
127 }
128
129 // remove cluster flags
130 if (flags.size() > 0) {
131 stage('Unset cluster flags') {
132 for (flag in flags) {
133 common.infoMsg('Removing flag ' + flag)
134 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd unset ' + flag)
135 }
136 }
137 }
138
139 // umount `umount /dev/sdi1`
140 stage('Umount devices') {
141 for (dev in devices) {
142 runCephCommand(pepperEnv, HOST, 'umount ' + dev + '1')
143 }
144 }
145
146 // zap disks `ceph-disk zap /dev/sdi`
147 stage('Zap devices') {
148 for (dev in devices) {
149 runCephCommand(pepperEnv, HOST, 'ceph-disk zap ' + dev)
150 }
151 }
152
153 // remove journal or block_db partition `parted /dev/sdj rm 3`
154 stage('Remove journal / block_db partitions') {
155 for (journal_blockdb in journals_blockdbs) {
156 if (journal_blockdb?.trim()) {
157 // dev = /dev/sdi
158 def dev = journal_blockdb.replaceAll("[0-9]", "")
159 // part_id = 2
160 def part_id = journal_blockdb.substring(journal_blockdb.lastIndexOf("/")+1).replaceAll("[^0-9]", "")
161 runCephCommand(pepperEnv, HOST, "parted ${dev} rm ${part_id}")
162 }
163 }
164 }
165
166 // Deploy failed Ceph OSD
167 stage('Deploy Ceph OSD') {
168 salt.enforceState(pepperEnv, HOST, 'ceph.osd', true)
169 }
170
171
172 if (ENFORCE_CRUSHMAP.toBoolean() == true) {
173
174 // enforce crushmap `crushtool -c /etc/ceph/crushmap -o /etc/ceph/crushmap.compiled; ceph osd setcrushmap -i /etc/ceph/crushmap.compiled`
175 stage('Enforce crushmap') {
176
177 stage('Ask for manual confirmation') {
178 input message: "Are you sure that your ADMIN_HOST has correct /etc/ceph/crushmap file? Click proceed to compile and enforce crushmap."
179 }
180 runCephCommand(pepperEnv, ADMIN_HOST, 'crushtool -c /etc/ceph/crushmap -o /etc/ceph/crushmap.compiled')
181 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd setcrushmap -i /etc/ceph/crushmap.compiled')
182 }
183 }
184}