blob: a50c25365c0bbaaac7a6a77f2eee5836ba7ba086 [file] [log] [blame]
Jiri Broulikdc87d722017-11-03 15:43:22 +01001/**
2 *
3 * Upgrade Ceph mon/mgr/osd/rgw/client
4 *
5 * Requred parameters:
6 * SALT_MASTER_URL URL of Salt master
7 * SALT_MASTER_CREDENTIALS Credentials to the Salt API
8 *
9 * ADMIN_HOST Host (minion id) with admin keyring and /etc/crushmap file present
10 * CLUSTER_FLAGS Comma separated list of tags to apply to cluster
11 * WAIT_FOR_HEALTHY Wait for cluster rebalance before stoping daemons
12 * ORIGIN_RELEASE Ceph release version before upgrade
13 * TARGET_RELEASE Ceph release version after upgrade
14 * STAGE_UPGRADE_MON Set to True if Ceph mon nodes upgrade is desired
15 * STAGE_UPGRADE_MGR Set to True if Ceph mgr nodes upgrade or new deploy is desired
16 * STAGE_UPGRADE_OSD Set to True if Ceph osd nodes upgrade is desired
17 * STAGE_UPGRADE_RGW Set to True if Ceph rgw nodes upgrade is desired
18 * STAGE_UPGRADE_CLIENT Set to True if Ceph client nodes upgrade is desired (includes for example ctl/cmp nodes)
Michael Vollmanafe91522019-05-07 08:10:00 -040019 * STAGE_FINALIZE Set to True if configs recommended for TARGET_RELEASE should be set after upgrade is done
20 * BACKUP_ENABLED Select to copy the disks of Ceph VMs before upgrade and backup Ceph directories on OSD nodes
21 * BACKUP_DIR Select the target dir to backup to when BACKUP_ENABLED
Jiri Broulikdc87d722017-11-03 15:43:22 +010022 *
23 */
24
25common = new com.mirantis.mk.Common()
26salt = new com.mirantis.mk.Salt()
27def python = new com.mirantis.mk.Python()
28
29def pepperEnv = "pepperEnv"
30def flags = CLUSTER_FLAGS.tokenize(',')
31
32def runCephCommand(master, target, cmd) {
33 return salt.cmdRun(master, target, cmd)
34}
35
Mateusz Losa4b024f2019-09-18 21:58:54 +020036def waitForHealthy(master, flags, count=0, attempts=300) {
Jiri Broulik96c867a2017-11-07 16:14:10 +010037 // wait for healthy cluster
38 while (count<attempts) {
39 def health = runCephCommand(master, ADMIN_HOST, 'ceph health')['return'][0].values()[0]
40 if (health.contains('HEALTH_OK')) {
41 common.infoMsg('Cluster is healthy')
42 break;
Mateusz Losa4b024f2019-09-18 21:58:54 +020043 } else {
44 for (flag in flags) {
45 if (health.contains(flag + ' flag(s) set') && !(health.contains('down'))) {
46 common.infoMsg('Cluster is healthy')
47 return;
48 }
49 }
Jiri Broulik96c867a2017-11-07 16:14:10 +010050 }
51 count++
52 sleep(10)
53 }
54}
55
Mateusz Losa4b024f2019-09-18 21:58:54 +020056def backup(master, flags, target) {
Jiri Broulik96c867a2017-11-07 16:14:10 +010057 stage("backup ${target}") {
58
Jiri Broulikfd2dcaf2017-12-08 15:19:51 +010059 if (target == 'osd') {
Jiri Broulik96c867a2017-11-07 16:14:10 +010060 try {
Jiri Broulikfd2dcaf2017-12-08 15:19:51 +010061 salt.enforceState(master, "I@ceph:${target}", "ceph.backup", true)
62 runCephCommand(master, "I@ceph:${target}", "su root -c '/usr/local/bin/ceph-backup-runner-call.sh'")
Jiri Broulik96c867a2017-11-07 16:14:10 +010063 } catch (Exception e) {
Jiri Broulikfd2dcaf2017-12-08 15:19:51 +010064 common.errorMsg(e)
65 common.errorMsg("Make sure Ceph backup on OSD nodes is enabled")
66 throw new InterruptedException()
Jiri Broulik96c867a2017-11-07 16:14:10 +010067 }
Jiri Broulikfd2dcaf2017-12-08 15:19:51 +010068 } else {
69 def _pillar = salt.getGrain(master, 'I@salt:master', 'domain')
70 def domain = _pillar['return'][0].values()[0].values()[0]
71
72 def kvm_pillar = salt.getGrain(master, 'I@salt:control', 'id')
73 def kvm01 = kvm_pillar['return'][0].values()[0].values()[0]
74
75 def target_pillar = salt.getGrain(master, "I@ceph:${target}", 'host')
76 def minions = target_pillar['return'][0].values()
77 for (minion in minions) {
78 def minion_name = minion.values()[0]
79 def provider_pillar = salt.getPillar(master, "${kvm01}", "salt:control:cluster:internal:node:${minion_name}:provider")
80 def minionProvider = provider_pillar['return'][0].values()[0]
81
Mateusz Losa4b024f2019-09-18 21:58:54 +020082 waitForHealthy(master, flags)
Jiri Broulikfd2dcaf2017-12-08 15:19:51 +010083 try {
Michael Vollmanafe91522019-05-07 08:10:00 -040084 salt.cmdRun(master, "${minionProvider}", "[ ! -f ${BACKUP_DIR}/${minion_name}.${domain}.qcow2.bak ] && virsh destroy ${minion_name}.${domain}")
Jiri Broulikfd2dcaf2017-12-08 15:19:51 +010085 } catch (Exception e) {
86 common.warningMsg('Backup already exists')
87 }
88 try {
Michael Vollmanafe91522019-05-07 08:10:00 -040089 salt.cmdRun(master, "${minionProvider}", "[ ! -f ${BACKUP_DIR}/${minion_name}.${domain}.qcow2.bak ] && cp /var/lib/libvirt/images/${minion_name}.${domain}/system.qcow2 ${BACKUP_DIR}/${minion_name}.${domain}.qcow2.bak")
Jiri Broulikfd2dcaf2017-12-08 15:19:51 +010090 } catch (Exception e) {
91 common.warningMsg('Backup already exists')
92 }
93 try {
94 salt.cmdRun(master, "${minionProvider}", "virsh start ${minion_name}.${domain}")
95 } catch (Exception e) {
96 common.warningMsg(e)
97 }
98 salt.minionsReachable(master, 'I@salt:master', "${minion_name}*")
Mateusz Losa4b024f2019-09-18 21:58:54 +020099 waitForHealthy(master, flags)
Jiri Broulik96c867a2017-11-07 16:14:10 +0100100 }
Jiri Broulik96c867a2017-11-07 16:14:10 +0100101 }
102 }
103 return
104}
105
Mateusz Losa4b024f2019-09-18 21:58:54 +0200106def upgrade(master, target, flags) {
Jiri Broulikdc87d722017-11-03 15:43:22 +0100107
108 stage("Change ${target} repos") {
109 salt.runSaltProcessStep(master, "I@ceph:${target}", 'saltutil.refresh_pillar', [], null, true, 5)
110 salt.enforceState(master, "I@ceph:${target}", 'linux.system.repo', true)
111 }
Jiri Broulikdc87d722017-11-03 15:43:22 +0100112 if (target == 'mgr') {
113 stage('Run ceph mgr state') {
114 salt.enforceState(master, "I@ceph:mgr", "ceph.mgr", true)
115 }
116 }
Jiri Broulikdc87d722017-11-03 15:43:22 +0100117 if (target == 'common') {
118 stage('Upgrade ceph-common pkgs') {
Jiri Broulik96c867a2017-11-07 16:14:10 +0100119 runCephCommand(master, "I@ceph:${target}", "apt install ceph-${target} -y")
Jiri Broulikdc87d722017-11-03 15:43:22 +0100120 }
121 } else {
Jiri Broulik96c867a2017-11-07 16:14:10 +0100122 minions = salt.getMinions(master, "I@ceph:${target}")
Jiri Broulikdc87d722017-11-03 15:43:22 +0100123
Jiri Broulik96c867a2017-11-07 16:14:10 +0100124 for (minion in minions) {
125 // upgrade pkgs
126 if (target == 'radosgw') {
127 stage('Upgrade radosgw pkgs') {
128 runCephCommand(master, "I@ceph:${target}", "apt install ${target} -y ")
129 }
130 } else {
131 stage("Upgrade ${target} pkgs on ${minion}") {
132 runCephCommand(master, "${minion}", "apt install ceph-${target} -y")
133 }
134 }
135 // restart services
136 stage("Restart ${target} services on ${minion}") {
Mateusz Losa4b024f2019-09-18 21:58:54 +0200137 if (target == 'osd') {
138 def osds = salt.getGrain(master, "${minion}", 'ceph:ceph_disk').values()[0]
139 osds[0].values()[0].values()[0].each { osd,param ->
140 runCephCommand(master, "${minion}", "systemctl restart ceph-${target}@${osd}")
141 waitForHealthy(master, flags)
142 }
143 } else {
144 runCephCommand(master, "${minion}", "systemctl restart ceph-${target}.target")
145 waitForHealthy(master, flags)
146 }
Jiri Broulik96c867a2017-11-07 16:14:10 +0100147 }
148
149 stage("Verify services for ${minion}") {
150 sleep(10)
Mateusz Lose1ae6002019-05-08 11:55:39 +0200151 runCephCommand(master, "${minion}", "systemctl status ceph-${target}.target")
Jiri Broulik96c867a2017-11-07 16:14:10 +0100152 }
153
154 stage('Ask for manual confirmation') {
Mateusz Lose1ae6002019-05-08 11:55:39 +0200155 runCephCommand(master, ADMIN_HOST, "ceph -s")
Jiri Broulik96c867a2017-11-07 16:14:10 +0100156 input message: "From the verification command above, please check Ceph ${target} joined the cluster correctly. If so, Do you want to continue to upgrade next node?"
157 }
Jiri Broulikdc87d722017-11-03 15:43:22 +0100158 }
159 }
160 runCephCommand(master, ADMIN_HOST, "ceph versions")
161 sleep(5)
162 return
163}
Jakub Josefa63f9862018-01-11 17:58:38 +0100164timeout(time: 12, unit: 'HOURS') {
165 node("python") {
Jiri Broulikdc87d722017-11-03 15:43:22 +0100166
Jakub Josefa63f9862018-01-11 17:58:38 +0100167 // create connection to salt master
168 python.setupPepperVirtualenv(pepperEnv, SALT_MASTER_URL, SALT_MASTER_CREDENTIALS)
Jiri Broulikdc87d722017-11-03 15:43:22 +0100169
Alena Kiseleva30f780c2019-01-22 17:09:33 +0300170 stage ('Check user choices') {
171 if (STAGE_UPGRADE_RGW.toBoolean() == true) {
172 // if rgw, check if other stuff has required version
173 def mon_ok = true
174 if (STAGE_UPGRADE_MON.toBoolean() == false) {
175 def mon_v = runCephCommand(pepperEnv, ADMIN_HOST, "ceph mon versions")['return'][0].values()[0]
176 mon_ok = mon_v.contains("${TARGET_RELEASE}") && !mon_v.contains("${ORIGIN_RELEASE}")
177 }
178 def mgr_ok = true
179 if (STAGE_UPGRADE_MGR.toBoolean() == false) {
180 def mgr_v = runCephCommand(pepperEnv, ADMIN_HOST, "ceph mgr versions")['return'][0].values()[0]
181 mgr_ok = mgr_v.contains("${TARGET_RELEASE}") && !mgr_v.contains("${ORIGIN_RELEASE}")
182 }
183 def osd_ok = true
184 if (STAGE_UPGRADE_OSD.toBoolean() == false) {
185 def osd_v = runCephCommand(pepperEnv, ADMIN_HOST, "ceph osd versions")['return'][0].values()[0]
186 osd_ok = osd_v.contains("${TARGET_RELEASE}") && !osd_v.contains("${ORIGIN_RELEASE}")
187 }
188 if (!mon_ok || !osd_ok || !mgr_ok) {
189 common.errorMsg('You may choose stages in any order, but RGW should be upgraded last')
190 throw new InterruptedException()
191 }
192 }
193 }
194
Jakub Josefa63f9862018-01-11 17:58:38 +0100195 if (BACKUP_ENABLED.toBoolean() == true) {
196 if (STAGE_UPGRADE_MON.toBoolean() == true) {
197 backup(pepperEnv, 'mon')
198 }
199 if (STAGE_UPGRADE_RGW.toBoolean() == true) {
200 backup(pepperEnv, 'radosgw')
201 }
202 if (STAGE_UPGRADE_OSD.toBoolean() == true) {
203 backup(pepperEnv, 'osd')
Jiri Broulikdc87d722017-11-03 15:43:22 +0100204 }
205 }
Jiri Broulikdc87d722017-11-03 15:43:22 +0100206
Jakub Josefa63f9862018-01-11 17:58:38 +0100207 if (flags.size() > 0) {
208 stage('Set cluster flags') {
209 for (flag in flags) {
210 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd set ' + flag)
Jiri Broulikdc87d722017-11-03 15:43:22 +0100211 }
Jiri Broulikdc87d722017-11-03 15:43:22 +0100212 }
213 }
Jiri Broulikdc87d722017-11-03 15:43:22 +0100214
Jakub Josefa63f9862018-01-11 17:58:38 +0100215 if (STAGE_UPGRADE_MON.toBoolean() == true) {
Mateusz Losa4b024f2019-09-18 21:58:54 +0200216 upgrade(pepperEnv, 'mon', flags)
Jakub Josefa63f9862018-01-11 17:58:38 +0100217 }
218
219 if (STAGE_UPGRADE_MGR.toBoolean() == true) {
Mateusz Losa4b024f2019-09-18 21:58:54 +0200220 upgrade(pepperEnv, 'mgr', flags)
Jakub Josefa63f9862018-01-11 17:58:38 +0100221 }
222
223 if (STAGE_UPGRADE_OSD.toBoolean() == true) {
Mateusz Losa4b024f2019-09-18 21:58:54 +0200224 upgrade(pepperEnv, 'osd', flags)
Jakub Josefa63f9862018-01-11 17:58:38 +0100225 }
226
227 if (STAGE_UPGRADE_RGW.toBoolean() == true) {
Mateusz Losa4b024f2019-09-18 21:58:54 +0200228 upgrade(pepperEnv, 'radosgw', flags)
Jakub Josefa63f9862018-01-11 17:58:38 +0100229 }
230
231 if (STAGE_UPGRADE_CLIENT.toBoolean() == true) {
Mateusz Losa4b024f2019-09-18 21:58:54 +0200232 upgrade(pepperEnv, 'common', flags)
Jakub Josefa63f9862018-01-11 17:58:38 +0100233 }
234
235 // remove cluster flags
236 if (flags.size() > 0) {
237 stage('Unset cluster flags') {
238 for (flag in flags) {
239 if (!flag.contains('sortbitwise')) {
240 common.infoMsg('Removing flag ' + flag)
241 runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd unset ' + flag)
242 }
243
244 }
Jiri Broulik96c867a2017-11-07 16:14:10 +0100245 }
Jiri Broulikdc87d722017-11-03 15:43:22 +0100246 }
Jiri Broulikdc87d722017-11-03 15:43:22 +0100247
Jakub Josefa63f9862018-01-11 17:58:38 +0100248 if (STAGE_FINALIZE.toBoolean() == true) {
249 stage("Finalize ceph version upgrade") {
250 runCephCommand(pepperEnv, ADMIN_HOST, "ceph osd require-osd-release ${TARGET_RELEASE}")
251 try {
252 runCephCommand(pepperEnv, ADMIN_HOST, "ceph osd set-require-min-compat-client ${ORIGIN_RELEASE}")
253 } catch (Exception e) {
254 common.warningMsg(e)
255 }
256 try {
257 runCephCommand(pepperEnv, ADMIN_HOST, "ceph osd crush tunables optimal")
258 } catch (Exception e) {
259 common.warningMsg(e)
260 }
261 }
262 }
263
264 // wait for healthy cluster
265 if (WAIT_FOR_HEALTHY.toBoolean() == true) {
Mateusz Losa4b024f2019-09-18 21:58:54 +0200266 waitForHealthy(pepperEnv, flags)
Jakub Josefa63f9862018-01-11 17:58:38 +0100267 }
Jiri Broulikdc87d722017-11-03 15:43:22 +0100268 }
269}