blob: 243bafff168cda8e7bc1aaa84684b732fe249261 [file] [log] [blame]
Martin Polreichaae1b9d2018-12-05 11:12:23 +01001/**
2 * Verify and restore Galera cluster
3 *
4 * Expected parameters:
5 * SALT_MASTER_CREDENTIALS Credentials to the Salt API.
6 * SALT_MASTER_URL Full Salt API address [http://10.10.10.1:8000].
Martin Polreich0d538262019-02-01 14:46:10 +01007 * ASK_CONFIRMATION Ask confirmation for restore
Martin Polreich0d538262019-02-01 14:46:10 +01008 * VERIFICATION_RETRIES Number of restries to verify the restoration.
Martin Polreich2aa74402019-01-21 14:42:48 +01009 * CHECK_TIME_SYNC Set to true to check time synchronization accross selected nodes.
Martin Polreich7ba33592019-03-21 15:12:15 +010010 * RESTORE_TYPE Sets restoration method
Martin Polreichaae1b9d2018-12-05 11:12:23 +010011 *
12**/
13
14def common = new com.mirantis.mk.Common()
15def salt = new com.mirantis.mk.Salt()
Martin Polreich71a08db2019-02-15 10:09:10 +010016def galera = new com.mirantis.mk.Galera()
Martin Polreichaae1b9d2018-12-05 11:12:23 +010017def python = new com.mirantis.mk.Python()
Martin Polreichaae1b9d2018-12-05 11:12:23 +010018def pepperEnv = "pepperEnv"
19def resultCode = 99
Martin Polreich7ba33592019-03-21 15:12:15 +010020def restoreType = env.RESTORE_TYPE
21def runRestoreDb = false
22def runBackupDb = false
Denis Egorenkoc4da1a12019-08-12 18:17:02 +040023def restartCluster = false
Martin Polreichaae1b9d2018-12-05 11:12:23 +010024
Martin Polreich0d538262019-02-01 14:46:10 +010025askConfirmation = (env.getProperty('ASK_CONFIRMATION') ?: true).toBoolean()
Martin Polreich2aa74402019-01-21 14:42:48 +010026checkTimeSync = (env.getProperty('CHECK_TIME_SYNC') ?: true).toBoolean()
Sergeyc8a8a792019-01-15 17:27:59 +040027
Martin Polreich0d538262019-02-01 14:46:10 +010028if (common.validInputParam(VERIFICATION_RETRIES) && VERIFICATION_RETRIES.isInteger()) {
29 verificationRetries = VERIFICATION_RETRIES.toInteger()
30} else {
31 verificationRetries = 5
32}
Martin Polreich7ba33592019-03-21 15:12:15 +010033if (restoreType.equals("BACKUP_AND_RESTORE") || restoreType.equals("ONLY_RESTORE")) {
34 runRestoreDb = true
35}
36if (restoreType.equals("BACKUP_AND_RESTORE")) {
37 runBackupDb = true
38}
Denis Egorenkoc4da1a12019-08-12 18:17:02 +040039if (restoreType.equals("RESTART_CLUSTER")) {
40 restartCluster = true
41}
Martin Polreich0d538262019-02-01 14:46:10 +010042
Martin Polreichaae1b9d2018-12-05 11:12:23 +010043timeout(time: 12, unit: 'HOURS') {
44 node() {
45 stage('Setup virtualenv for Pepper') {
46 python.setupPepperVirtualenv(pepperEnv, SALT_MASTER_URL, SALT_MASTER_CREDENTIALS)
47 }
Denis Egorenkoc4da1a12019-08-12 18:17:02 +040048
49 def galeraStatus = [:]
Martin Polreich7ba33592019-03-21 15:12:15 +010050 stage('Verify status') {
Ivan Berezovskiyb6d18d52019-07-24 15:30:54 +040051 def sysstatTargets = 'I@xtrabackup:client or I@xtrabackup:server'
52 def sysstatTargetsNodes = salt.getMinions(pepperEnv, sysstatTargets)
53 try {
54 if (!salt.isPackageInstalled(['saltId': pepperEnv, 'target': sysstatTargets, 'packageName': 'sysstat', 'output': false])) {
55 if (askConfirmation) {
56 input message: "Do you want to install 'sysstat' package on targeted nodes: ${sysstatTargetsNodes}? Click to confirm"
57 }
58 salt.runSaltProcessStep(pepperEnv, sysstatTargets, 'pkg.install', ['sysstat'])
59 }
60 } catch (Exception e) {
61 common.errorMsg("Unable to determine status of sysstat package on target nodes: ${sysstatTargetsNodes}.")
Martin Polreich63b40fc2019-08-07 18:07:57 +020062 common.errorMsg(e.getMessage())
Ivan Berezovskiyb6d18d52019-07-24 15:30:54 +040063 if (askConfirmation) {
64 input message: "Do you want to continue? Click to confirm"
65 }
66 }
Denis Egorenko3cb73752019-10-22 18:10:22 +040067 try {
68 common.infoMsg('Checking required xtrabackup pillars...')
69 def xtrabackupRestoreFrom = salt.getPillar(pepperEnv, 'I@galera:master or I@galera:slave', 'xtrabackup:client:restore_from')
70 def xtrabackupRestoreLatest = salt.getPillar(pepperEnv, 'I@galera:master or I@galera:slave', 'xtrabackup:client:restore_full_latest')
71 if ('' in xtrabackupRestoreFrom['return'][0].values() || '' in xtrabackupRestoreLatest['return'][0].values()) {
72 throw new Exception('Pillars xtrabackup:client:restore_from or xtrabackup:client:restore_full_latest are missed for \'I@galera:master or I@galera:slave\' nodes.')
73 }
74 } catch (Exception e) {
75 common.errorMsg(e.getMessage())
76 common.errorMsg('Please fix your pillar data. For more information check docs: https://docs.mirantis.com/mcp/latest/mcp-operations-guide/backup-restore/openstack/database/xtrabackup-restore-database.html')
77 return
78 }
Denis Egorenkoc4da1a12019-08-12 18:17:02 +040079 galeraStatus = galera.verifyGaleraStatus(pepperEnv, checkTimeSync)
80
81 switch (galeraStatus.error) {
82 case 128:
83 common.errorMsg("Unable to obtain Galera members minions list. Without fixing this issue, pipeline cannot continue in verification, backup and restoration. This may be caused by wrong Galera configuration or corrupted pillar data.")
Martin Polreichaae1b9d2018-12-05 11:12:23 +010084 currentBuild.result = "FAILURE"
Martin Polreich0d538262019-02-01 14:46:10 +010085 return
Denis Egorenkoc4da1a12019-08-12 18:17:02 +040086 case 130:
Martin Polreich63b40fc2019-08-07 18:07:57 +020087 common.errorMsg("Neither master or slaves are reachable. Without fixing this issue, pipeline cannot continue in verification, backup and restoration. Is at least one member of the Galera cluster up and running?")
Martin Polreichaae1b9d2018-12-05 11:12:23 +010088 currentBuild.result = "FAILURE"
Martin Polreich0d538262019-02-01 14:46:10 +010089 return
Denis Egorenkoc4da1a12019-08-12 18:17:02 +040090 case 131:
91 common.errorMsg("Time desynced - Please fix this issue and rerun the pipeline.")
92 currentBuild.result = "FAILURE"
Ivan Berezovskiy6ef32f02019-07-26 15:55:24 +040093 return
Denis Egorenkoc4da1a12019-08-12 18:17:02 +040094 case 140..141:
95 common.errorMsg("Disk utilization check failed - Please fix this issue and rerun the pipeline.")
96 currentBuild.result = "FAILURE"
97 return
98 case 1:
99 if (askConfirmation) {
100 input message: "There was a problem with parsing the status output or with determining it. Do you want to run a next action: ${restoreType}?"
101 } else {
102 common.warningMsg("There was a problem with parsing the status output or with determining it. Trying to perform action: ${restoreType}.")
103 }
104 break
105 case 0:
106 if (askConfirmation) {
107 input message: "There seems to be everything alright with the cluster, do you still want to continue with next action: ${restoreType}?"
108 break
109 } else {
110 common.warningMsg("There seems to be everything alright with the cluster, no backup and no restoration will be done.")
111 currentBuild.result = "SUCCESS"
112 return
113 }
114 default:
115 if (askConfirmation) {
116 input message: "There's something wrong with the cluster, do you want to continue with action: ${restoreType}?"
117 } else {
118 common.warningMsg("There's something wrong with the cluster, trying to perform action: ${restoreType}")
119 }
120 break
Martin Polreichaae1b9d2018-12-05 11:12:23 +0100121 }
Martin Polreich7ba33592019-03-21 15:12:15 +0100122 }
123 if (runBackupDb) {
Ivan Berezovskiy6ef32f02019-07-26 15:55:24 +0400124 if (askConfirmation) {
125 input message: "Are you sure you want to run a backup? Click to confirm"
126 }
Martin Polreich7ba33592019-03-21 15:12:15 +0100127 stage('Backup') {
Ivan Berezovskiy6ef32f02019-07-26 15:55:24 +0400128 deployBuild = build(job: 'galera_backup_database', parameters: [
129 [$class: 'StringParameterValue', name: 'SALT_MASTER_URL', value: SALT_MASTER_URL],
130 [$class: 'StringParameterValue', name: 'SALT_MASTER_CREDENTIALS', value: SALT_MASTER_CREDENTIALS],
131 [$class: 'StringParameterValue', name: 'OVERRIDE_BACKUP_NODE', value: "none"],
132 ]
Martin Polreich7ba33592019-03-21 15:12:15 +0100133 )
134 }
135 }
Denis Egorenkoc4da1a12019-08-12 18:17:02 +0400136 if (runRestoreDb || restartCluster) {
137 if (runRestoreDb) {
138 stage('Restore') {
139 if (askConfirmation) {
140 input message: "Are you sure you want to run a restore? Click to confirm"
Ivan Berezovskiy6ef32f02019-07-26 15:55:24 +0400141 }
Denis Egorenkoc4da1a12019-08-12 18:17:02 +0400142 try {
143 if ((!askConfirmation && resultCode > 0) || askConfirmation) {
144 galera.restoreGaleraCluster(pepperEnv, galeraStatus)
145 }
146 } catch (Exception e) {
147 common.errorMsg("Restoration process has failed.")
148 common.errorMsg(e.getMessage())
149 }
150 }
151 }
152 if (restartCluster) {
153 stage('Restart cluster') {
154 if (askConfirmation) {
155 input message: "Are you sure you want to run a restart? Click to confirm"
156 }
157 try {
158 if ((!askConfirmation && resultCode > 0) || askConfirmation) {
159 galera.restoreGaleraCluster(pepperEnv, galeraStatus, false)
160 }
161 } catch (Exception e) {
162 common.errorMsg("Restart process has failed.")
163 common.errorMsg(e.getMessage())
164 }
Ivan Berezovskiy6ef32f02019-07-26 15:55:24 +0400165 }
Martin Polreichaae1b9d2018-12-05 11:12:23 +0100166 }
Ivan Berezovskiy6ef32f02019-07-26 15:55:24 +0400167 stage('Verify restoration result') {
168 common.retry(verificationRetries, 15) {
Denis Egorenkoc4da1a12019-08-12 18:17:02 +0400169 def status = galera.verifyGaleraStatus(pepperEnv, false)
170 if (status.error >= 1) {
Ivan Berezovskiy6ef32f02019-07-26 15:55:24 +0400171 error("Verification attempt finished with an error. This may be caused by cluster not having enough time to come up or to sync. Next verification attempt in 5 seconds.")
172 } else {
173 common.infoMsg("Restoration procedure seems to be successful. See verification report to be sure.")
174 currentBuild.result = "SUCCESS"
175 }
Martin Polreich0d538262019-02-01 14:46:10 +0100176 }
Martin Polreichc9466c72019-01-18 14:17:52 +0100177 }
178 }
Martin Polreichaae1b9d2018-12-05 11:12:23 +0100179 }
180}