blob: 6f2531560d75dfec131ac2f1ca8fa28e423d9fcb [file] [log] [blame]
Martin Polreich8f0f3ac2019-02-15 10:03:33 +01001package com.mirantis.mk
2
3/**
4 *
5 * Galera functions
6 *
7 */
8
9
10/**
11 * Returns parameters from mysql.status output on given target node
12 *
13 * @param env Salt Connection object or pepperEnv
14 * @param target Targeted node
15 * @param parameters Parameters to be retruned (String or list of Strings). If no parameters are provided or is set to '[]', it returns all of them.
16 * @return result List of parameters with its values
17 */
18
19def getWsrepParameters(env, target, parameters=[], print=false) {
Martin Polreich1281cde2019-02-28 11:39:49 +010020 def salt = new com.mirantis.mk.Salt()
21 def common = new com.mirantis.mk.Common()
22 result = [:]
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010023 out = salt.runSaltProcessStep(env, "${target}", "mysql.status", [], null, false)
24 outlist = out['return'][0]
25 resultYaml = outlist.get(outlist.keySet()[0]).sort()
26 if (print) {
27 common.prettyPrint(resultYaml)
28 }
29 if (parameters instanceof String) {
Martin Polreich1fbda522019-02-26 14:46:33 +010030 parameters = [parameters]
31 }
32 if (parameters == [] || parameters == ['']) {
33 result = resultYaml
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010034 } else {
Martin Polreich1281cde2019-02-28 11:39:49 +010035 for (String param in parameters) {
36 value = resultYaml[param]
Martin Polreich1fbda522019-02-26 14:46:33 +010037 if (value instanceof String && value.isBigDecimal()) {
38 value = value.toBigDecimal()
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010039 }
Martin Polreich1281cde2019-02-28 11:39:49 +010040 result[param] = value
Martin Polreich1fbda522019-02-26 14:46:33 +010041 }
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010042 }
43 return result
44}
45
46/**
47 * Verifies Galera database
48 *
49 * This function checks for Galera master, tests connection and if reachable, it obtains the result
50 * of Salt mysql.status function. The result is then parsed, validated and outputed to the user.
51 *
52 * @param env Salt Connection object or pepperEnv
53 * @param slave Boolean value to enable slave checking (if master in unreachable)
54 * @param checkTimeSync Boolean value to enable time sync check
55 * @return resultCode int values used to determine exit status in the calling function
56 */
57def verifyGaleraStatus(env, slave=false, checkTimeSync=false) {
58 def salt = new com.mirantis.mk.Salt()
59 def common = new com.mirantis.mk.Common()
60 def out = ""
61 def status = "unknown"
62 def testNode = ""
63 if (!slave) {
64 try {
65 galeraMaster = salt.getMinions(env, "I@galera:master")
66 common.infoMsg("Current Galera master is: ${galeraMaster}")
67 salt.minionsReachable(env, "I@salt:master", "I@galera:master")
68 testNode = "I@galera:master"
69 } catch (Exception e) {
70 common.errorMsg('Galera master is not reachable.')
71 return 128
72 }
73 } else {
74 try {
75 galeraSlaves = salt.getMinions(env, "I@galera:slave")
76 common.infoMsg("Testing Galera slave minions: ${galeraSlaves}")
77 } catch (Exception e) {
78 common.errorMsg("Cannot obtain Galera slave minions list.")
79 return 129
80 }
Martin Polreich5df75782019-02-19 14:29:24 +010081 for (minion in galeraSlaves) {
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010082 try {
Martin Polreich5df75782019-02-19 14:29:24 +010083 salt.minionsReachable(env, "I@salt:master", minion)
84 testNode = minion
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010085 break
86 } catch (Exception e) {
Martin Polreich5df75782019-02-19 14:29:24 +010087 common.warningMsg("Slave '${minion}' is not reachable.")
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010088 }
89 }
90 }
91 if (!testNode) {
92 common.errorMsg("No Galera slave was reachable.")
93 return 130
94 }
95 if (checkTimeSync && !salt.checkClusterTimeSync(env, "I@galera:master or I@galera:slave")) {
96 common.errorMsg("Time in cluster is desynchronized or it couldn't be detemined. You should fix this issue manually before proceeding.")
97 return 131
98 }
99 try {
100 out = salt.runSaltProcessStep(env, "${testNode}", "mysql.status", [], null, false)
101 } catch (Exception e) {
102 common.errorMsg('Could not determine mysql status.')
103 return 256
104 }
105 if (out) {
106 try {
107 status = validateAndPrintGaleraStatusReport(env, out, testNode)
108 } catch (Exception e) {
109 common.errorMsg('Could not parse the mysql status output. Check it manually.')
110 return 1
111 }
112 } else {
113 common.errorMsg("Mysql status response unrecognized or is empty. Response: ${out}")
114 return 1024
115 }
116 if (status == "OK") {
117 common.infoMsg("No errors found - MySQL status is ${status}.")
118 return 0
119 } else if (status == "unknown") {
120 common.warningMsg('MySQL status cannot be detemined')
121 return 1
122 } else {
123 common.errorMsg("Errors found.")
124 return 2
125 }
126}
127
128/** Validates and prints result of verifyGaleraStatus function
129@param env Salt Connection object or pepperEnv
130@param out Output of the mysql.status Salt function
131@return status "OK", "ERROR" or "uknown" depending on result of validation
132*/
133
134def validateAndPrintGaleraStatusReport(env, out, minion) {
135 def salt = new com.mirantis.mk.Salt()
136 def common = new com.mirantis.mk.Common()
137 if (minion == "I@galera:master") {
138 role = "master"
139 } else {
140 role = "slave"
141 }
142 sizeOut = salt.getReturnValues(salt.getPillar(env, minion, "galera:${role}:members"))
143 expected_cluster_size = sizeOut.size()
144 outlist = out['return'][0]
145 resultYaml = outlist.get(outlist.keySet()[0]).sort()
146 common.prettyPrint(resultYaml)
147 parameters = [
148 wsrep_cluster_status: [title: 'Cluster status', expectedValues: ['Primary'], description: ''],
149 wsrep_cluster_size: [title: 'Current cluster size', expectedValues: [expected_cluster_size], description: ''],
150 wsrep_ready: [title: 'Node status', expectedValues: ['ON', true], description: ''],
151 wsrep_local_state_comment: [title: 'Node status comment', expectedValues: ['Joining', 'Waiting on SST', 'Joined', 'Synced', 'Donor'], description: ''],
152 wsrep_connected: [title: 'Node connectivity', expectedValues: ['ON', true], description: ''],
153 wsrep_local_recv_queue_avg: [title: 'Average size of local reveived queue', expectedThreshold: [warn: 0.5, error: 1.0], description: '(Value above 0 means that the node cannot apply write-sets as fast as it receives them, which can lead to replication throttling)'],
154 wsrep_local_send_queue_avg: [title: 'Average size of local send queue', expectedThreshold: [warn: 0.5, error: 1.0], description: '(Value above 0 indicate replication throttling or network throughput issues, such as a bottleneck on the network link.)']
155 ]
156 for (key in parameters.keySet()) {
157 value = resultYaml[key]
158 if (value instanceof String && value.isBigDecimal()) {
159 value = value.toBigDecimal()
160 }
161 parameters.get(key) << [actualValue: value]
162 }
163 for (key in parameters.keySet()) {
164 param = parameters.get(key)
165 if (key == 'wsrep_local_recv_queue_avg' || key == 'wsrep_local_send_queue_avg') {
Martin Polreich79810262019-02-25 12:51:11 +0100166 if (param.get('actualValue') == null || (param.get('actualValue') > param.get('expectedThreshold').get('error'))) {
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100167 param << [match: 'error']
168 } else if (param.get('actualValue') > param.get('expectedThreshold').get('warn')) {
169 param << [match: 'warn']
170 } else {
171 param << [match: 'ok']
172 }
173 } else {
174 for (expValue in param.get('expectedValues')) {
175 if (expValue == param.get('actualValue')) {
176 param << [match: 'ok']
177 break
178 } else {
179 param << [match: 'error']
180 }
181 }
182 }
183 }
184 cluster_info_report = []
185 cluster_warning_report = []
186 cluster_error_report = []
187 for (key in parameters.keySet()) {
188 param = parameters.get(key)
189 if (param.containsKey('expectedThreshold')) {
190 expValues = "below ${param.get('expectedThreshold').get('warn')}"
191 } else {
192 if (param.get('expectedValues').size() > 1) {
193 expValues = param.get('expectedValues').join(' or ')
194 } else {
195 expValues = param.get('expectedValues')[0]
196 }
197 }
198 reportString = "${param.title}: ${param.actualValue} (Expected: ${expValues}) ${param.description}"
199 if (param.get('match').equals('ok')) {
200 cluster_info_report.add("[OK ] ${reportString}")
201 } else if (param.get('match').equals('warn')) {
202 cluster_warning_report.add("[WARNING] ${reportString}")
203 } else {
204 cluster_error_report.add("[ ERROR] ${reportString})")
205 }
206 }
207 common.infoMsg("CLUSTER STATUS REPORT: ${cluster_info_report.size()} expected values, ${cluster_warning_report.size()} warnings and ${cluster_error_report.size()} error found:")
208 if (cluster_info_report.size() > 0) {
209 common.infoMsg(cluster_info_report.join('\n'))
210 }
211 if (cluster_warning_report.size() > 0) {
212 common.warningMsg(cluster_warning_report.join('\n'))
213 }
214 if (cluster_error_report.size() > 0) {
215 common.errorMsg(cluster_error_report.join('\n'))
216 return "ERROR"
217 } else {
218 return "OK"
219 }
220}
221
Martin Polreich1281cde2019-02-28 11:39:49 +0100222/** Returns last shutdown node of Galera cluster
223@param env Salt Connection object or pepperEnv
224@param nodes List of nodes to check only (defaults to []). If not provided, it will check all nodes.
225 Use this parameter if the cluster splits to several components and you only want to check one fo them.
226@return status ip address or hostname of last shutdown node
227*/
228
229def getGaleraLastShutdownNode(env, nodes = []) {
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100230 def salt = new com.mirantis.mk.Salt()
231 def common = new com.mirantis.mk.Common()
Martin Polreich1281cde2019-02-28 11:39:49 +0100232 members = []
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100233 lastNode = [ip: '', seqno: -2]
234 try {
Martin Polreich1281cde2019-02-28 11:39:49 +0100235 if (nodes) {
236 nodes = salt.getIPAddressesForNodenames(env, nodes)
237 for (node in nodes) {
238 members = [host: "${node.get(node.keySet()[0])}"] + members
239 }
240 } else {
241 members = salt.getReturnValues(salt.getPillar(env, "I@galera:master", "galera:master:members"))
242 }
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100243 } catch (Exception er) {
244 common.errorMsg('Could not retrieve members list')
245 return 'I@galera:master'
246 }
247 if (members) {
248 for (member in members) {
249 try {
250 salt.minionsReachable(env, 'I@salt:master', "S@${member.host}")
251 out = salt.getReturnValues(salt.cmdRun(env, "S@${member.host}", 'cat /var/lib/mysql/grastate.dat | grep "seqno" | cut -d ":" -f2', true, null, false))
252 seqno = out.tokenize('\n')[0].trim()
253 if (seqno.isNumber()) {
254 seqno = seqno.toInteger()
255 } else {
256 seqno = -2
257 }
258 highestSeqno = lastNode.get('seqno')
259 if (seqno > highestSeqno) {
260 lastNode << [ip: "${member.host}", seqno: seqno]
261 }
262 } catch (Exception er) {
263 common.warningMsg("Could not determine 'seqno' value for node ${member.host} ")
264 }
265 }
266 }
267 if (lastNode.get('ip') != '') {
268 return "S@${lastNode.ip}"
269 } else {
270 return "I@galera:master"
271 }
272}
273
274/**
Martin Polreich9044fe42019-03-21 16:00:23 +0100275 * Restores Galera cluster
276 * @param env Salt Connection object or pepperEnv
277 * @param runRestoreDb Boolean to determine if the restoration of DB should be run as well
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100278 * @return output of salt commands
279 */
Martin Polreich9044fe42019-03-21 16:00:23 +0100280def restoreGaleraCluster(env, runRestoreDb=true) {
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100281 def salt = new com.mirantis.mk.Salt()
282 def common = new com.mirantis.mk.Common()
283 try {
284 salt.runSaltProcessStep(env, 'I@galera:slave', 'service.stop', ['mysql'])
285 } catch (Exception er) {
286 common.warningMsg('Mysql service already stopped')
287 }
288 try {
289 salt.runSaltProcessStep(env, 'I@galera:master', 'service.stop', ['mysql'])
290 } catch (Exception er) {
291 common.warningMsg('Mysql service already stopped')
292 }
293 lastNodeTarget = getGaleraLastShutdownNode(env)
294 try {
295 salt.cmdRun(env, 'I@galera:slave', "rm /var/lib/mysql/ib_logfile*")
296 } catch (Exception er) {
297 common.warningMsg('Files are not present')
298 }
299 try {
300 salt.cmdRun(env, 'I@galera:slave', "rm /var/lib/mysql/grastate.dat")
301 } catch (Exception er) {
302 common.warningMsg('Files are not present')
303 }
304 try {
305 salt.cmdRun(env, lastNodeTarget, "mkdir /root/mysql/mysql.bak")
306 } catch (Exception er) {
307 common.warningMsg('Directory already exists')
308 }
309 try {
310 salt.cmdRun(env, lastNodeTarget, "rm -rf /root/mysql/mysql.bak/*")
311 } catch (Exception er) {
312 common.warningMsg('Directory already empty')
313 }
314 try {
315 salt.cmdRun(env, lastNodeTarget, "mv /var/lib/mysql/* /root/mysql/mysql.bak")
316 } catch (Exception er) {
317 common.warningMsg('Files were already moved')
318 }
319 try {
320 salt.runSaltProcessStep(env, lastNodeTarget, 'file.remove', ["/var/lib/mysql/.galera_bootstrap"])
321 } catch (Exception er) {
322 common.warningMsg('File is not present')
323 }
Martin Polreich9044fe42019-03-21 16:00:23 +0100324
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100325 salt.cmdRun(env, lastNodeTarget, "sed -i '/gcomm/c\\wsrep_cluster_address=\"gcomm://\"' /etc/mysql/my.cnf")
Martin Polreich9044fe42019-03-21 16:00:23 +0100326
327 if (runRestoreDb) {
328 restoreGaleraDb(env, lastNodeTarget)
329 }
330
Pavel Cizinsky154c1172019-03-11 10:30:05 +0100331 salt.enforceState(env, lastNodeTarget, 'galera')
Martin Polreich9044fe42019-03-21 16:00:23 +0100332
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100333 // wait until mysql service on galera master is up
334 try {
335 salt.commandStatus(env, lastNodeTarget, 'service mysql status', 'running')
336 } catch (Exception er) {
337 input message: "Database is not running please fix it first and only then click on PROCEED."
338 }
339
340 salt.runSaltProcessStep(env, "I@galera:master and not ${lastNodeTarget}", 'service.start', ['mysql'])
341 salt.runSaltProcessStep(env, "I@galera:slave and not ${lastNodeTarget}", 'service.start', ['mysql'])
342}
Martin Polreich9044fe42019-03-21 16:00:23 +0100343
344/**
345 * Restores Galera database
346 * @param env Salt Connection object or pepperEnv
347 * @param targetNode Node to be targeted
348 */
349def restoreGaleraDb(env, targetNode) {
350 def backup_dir = salt.getReturnValues(salt.getPillar(env, targetNode, 'xtrabackup:client:backup_dir'))
351 if(backup_dir == null || backup_dir.isEmpty()) { backup_dir='/var/backups/mysql/xtrabackup' }
352 salt.runSaltProcessStep(env, targetNode, 'file.remove', ["${backup_dir}/dbrestored"])
353 salt.cmdRun(env, 'I@xtrabackup:client', "su root -c 'salt-call state.sls xtrabackup'")
354}
355
356def restoreGaleraDb(env) {
357 common.warningMsg("This method was renamed to 'restoreGaleraCluster'. Please change your pipeline to use this call instead! If you think that you really wanted to call 'restoreGaleraDb' you may be missing 'targetNode' parameter in you call.")
358 return restoreGaleraCluster(env)
359}