blob: 3023bce52fb77f35df24ec6657220607a3384fc9 [file] [log] [blame]
Martin Polreich8f0f3ac2019-02-15 10:03:33 +01001package com.mirantis.mk
2
3/**
4 *
5 * Galera functions
6 *
7 */
8
9
10/**
11 * Returns parameters from mysql.status output on given target node
12 *
13 * @param env Salt Connection object or pepperEnv
14 * @param target Targeted node
15 * @param parameters Parameters to be retruned (String or list of Strings). If no parameters are provided or is set to '[]', it returns all of them.
16 * @return result List of parameters with its values
17 */
18
19def getWsrepParameters(env, target, parameters=[], print=false) {
Martin Polreich1281cde2019-02-28 11:39:49 +010020 def salt = new com.mirantis.mk.Salt()
21 def common = new com.mirantis.mk.Common()
22 result = [:]
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010023 out = salt.runSaltProcessStep(env, "${target}", "mysql.status", [], null, false)
24 outlist = out['return'][0]
25 resultYaml = outlist.get(outlist.keySet()[0]).sort()
26 if (print) {
27 common.prettyPrint(resultYaml)
28 }
29 if (parameters instanceof String) {
Martin Polreich1fbda522019-02-26 14:46:33 +010030 parameters = [parameters]
31 }
32 if (parameters == [] || parameters == ['']) {
33 result = resultYaml
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010034 } else {
Martin Polreich1281cde2019-02-28 11:39:49 +010035 for (String param in parameters) {
36 value = resultYaml[param]
Martin Polreich1fbda522019-02-26 14:46:33 +010037 if (value instanceof String && value.isBigDecimal()) {
38 value = value.toBigDecimal()
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010039 }
Martin Polreich1281cde2019-02-28 11:39:49 +010040 result[param] = value
Martin Polreich1fbda522019-02-26 14:46:33 +010041 }
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010042 }
43 return result
44}
45
46/**
47 * Verifies Galera database
48 *
49 * This function checks for Galera master, tests connection and if reachable, it obtains the result
50 * of Salt mysql.status function. The result is then parsed, validated and outputed to the user.
51 *
52 * @param env Salt Connection object or pepperEnv
53 * @param slave Boolean value to enable slave checking (if master in unreachable)
54 * @param checkTimeSync Boolean value to enable time sync check
55 * @return resultCode int values used to determine exit status in the calling function
56 */
57def verifyGaleraStatus(env, slave=false, checkTimeSync=false) {
58 def salt = new com.mirantis.mk.Salt()
59 def common = new com.mirantis.mk.Common()
60 def out = ""
61 def status = "unknown"
62 def testNode = ""
63 if (!slave) {
64 try {
65 galeraMaster = salt.getMinions(env, "I@galera:master")
66 common.infoMsg("Current Galera master is: ${galeraMaster}")
67 salt.minionsReachable(env, "I@salt:master", "I@galera:master")
68 testNode = "I@galera:master"
69 } catch (Exception e) {
70 common.errorMsg('Galera master is not reachable.')
Martin Polreich71307db2019-04-03 09:14:51 +020071 common.errorMsg(e.getMessage())
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010072 return 128
73 }
74 } else {
75 try {
76 galeraSlaves = salt.getMinions(env, "I@galera:slave")
77 common.infoMsg("Testing Galera slave minions: ${galeraSlaves}")
78 } catch (Exception e) {
79 common.errorMsg("Cannot obtain Galera slave minions list.")
Martin Polreich71307db2019-04-03 09:14:51 +020080 common.errorMsg(e.getMessage())
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010081 return 129
82 }
Martin Polreich5df75782019-02-19 14:29:24 +010083 for (minion in galeraSlaves) {
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010084 try {
Martin Polreich5df75782019-02-19 14:29:24 +010085 salt.minionsReachable(env, "I@salt:master", minion)
86 testNode = minion
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010087 break
88 } catch (Exception e) {
Martin Polreich5df75782019-02-19 14:29:24 +010089 common.warningMsg("Slave '${minion}' is not reachable.")
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010090 }
91 }
92 }
93 if (!testNode) {
94 common.errorMsg("No Galera slave was reachable.")
95 return 130
96 }
97 if (checkTimeSync && !salt.checkClusterTimeSync(env, "I@galera:master or I@galera:slave")) {
98 common.errorMsg("Time in cluster is desynchronized or it couldn't be detemined. You should fix this issue manually before proceeding.")
99 return 131
100 }
101 try {
102 out = salt.runSaltProcessStep(env, "${testNode}", "mysql.status", [], null, false)
103 } catch (Exception e) {
104 common.errorMsg('Could not determine mysql status.')
Martin Polreich71307db2019-04-03 09:14:51 +0200105 common.errorMsg(e.getMessage())
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100106 return 256
107 }
108 if (out) {
109 try {
110 status = validateAndPrintGaleraStatusReport(env, out, testNode)
111 } catch (Exception e) {
112 common.errorMsg('Could not parse the mysql status output. Check it manually.')
Martin Polreich71307db2019-04-03 09:14:51 +0200113 common.errorMsg(e.getMessage())
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100114 return 1
115 }
116 } else {
117 common.errorMsg("Mysql status response unrecognized or is empty. Response: ${out}")
118 return 1024
119 }
120 if (status == "OK") {
121 common.infoMsg("No errors found - MySQL status is ${status}.")
122 return 0
123 } else if (status == "unknown") {
124 common.warningMsg('MySQL status cannot be detemined')
125 return 1
126 } else {
127 common.errorMsg("Errors found.")
128 return 2
129 }
130}
131
132/** Validates and prints result of verifyGaleraStatus function
133@param env Salt Connection object or pepperEnv
134@param out Output of the mysql.status Salt function
135@return status "OK", "ERROR" or "uknown" depending on result of validation
136*/
137
138def validateAndPrintGaleraStatusReport(env, out, minion) {
139 def salt = new com.mirantis.mk.Salt()
140 def common = new com.mirantis.mk.Common()
141 if (minion == "I@galera:master") {
142 role = "master"
143 } else {
144 role = "slave"
145 }
146 sizeOut = salt.getReturnValues(salt.getPillar(env, minion, "galera:${role}:members"))
147 expected_cluster_size = sizeOut.size()
148 outlist = out['return'][0]
149 resultYaml = outlist.get(outlist.keySet()[0]).sort()
150 common.prettyPrint(resultYaml)
151 parameters = [
152 wsrep_cluster_status: [title: 'Cluster status', expectedValues: ['Primary'], description: ''],
153 wsrep_cluster_size: [title: 'Current cluster size', expectedValues: [expected_cluster_size], description: ''],
154 wsrep_ready: [title: 'Node status', expectedValues: ['ON', true], description: ''],
155 wsrep_local_state_comment: [title: 'Node status comment', expectedValues: ['Joining', 'Waiting on SST', 'Joined', 'Synced', 'Donor'], description: ''],
156 wsrep_connected: [title: 'Node connectivity', expectedValues: ['ON', true], description: ''],
157 wsrep_local_recv_queue_avg: [title: 'Average size of local reveived queue', expectedThreshold: [warn: 0.5, error: 1.0], description: '(Value above 0 means that the node cannot apply write-sets as fast as it receives them, which can lead to replication throttling)'],
158 wsrep_local_send_queue_avg: [title: 'Average size of local send queue', expectedThreshold: [warn: 0.5, error: 1.0], description: '(Value above 0 indicate replication throttling or network throughput issues, such as a bottleneck on the network link.)']
159 ]
160 for (key in parameters.keySet()) {
161 value = resultYaml[key]
162 if (value instanceof String && value.isBigDecimal()) {
163 value = value.toBigDecimal()
164 }
165 parameters.get(key) << [actualValue: value]
166 }
167 for (key in parameters.keySet()) {
168 param = parameters.get(key)
169 if (key == 'wsrep_local_recv_queue_avg' || key == 'wsrep_local_send_queue_avg') {
Martin Polreich79810262019-02-25 12:51:11 +0100170 if (param.get('actualValue') == null || (param.get('actualValue') > param.get('expectedThreshold').get('error'))) {
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100171 param << [match: 'error']
172 } else if (param.get('actualValue') > param.get('expectedThreshold').get('warn')) {
173 param << [match: 'warn']
174 } else {
175 param << [match: 'ok']
176 }
177 } else {
178 for (expValue in param.get('expectedValues')) {
179 if (expValue == param.get('actualValue')) {
180 param << [match: 'ok']
181 break
182 } else {
183 param << [match: 'error']
184 }
185 }
186 }
187 }
188 cluster_info_report = []
189 cluster_warning_report = []
190 cluster_error_report = []
191 for (key in parameters.keySet()) {
192 param = parameters.get(key)
193 if (param.containsKey('expectedThreshold')) {
194 expValues = "below ${param.get('expectedThreshold').get('warn')}"
195 } else {
196 if (param.get('expectedValues').size() > 1) {
197 expValues = param.get('expectedValues').join(' or ')
198 } else {
199 expValues = param.get('expectedValues')[0]
200 }
201 }
202 reportString = "${param.title}: ${param.actualValue} (Expected: ${expValues}) ${param.description}"
203 if (param.get('match').equals('ok')) {
204 cluster_info_report.add("[OK ] ${reportString}")
205 } else if (param.get('match').equals('warn')) {
206 cluster_warning_report.add("[WARNING] ${reportString}")
207 } else {
208 cluster_error_report.add("[ ERROR] ${reportString})")
209 }
210 }
211 common.infoMsg("CLUSTER STATUS REPORT: ${cluster_info_report.size()} expected values, ${cluster_warning_report.size()} warnings and ${cluster_error_report.size()} error found:")
212 if (cluster_info_report.size() > 0) {
213 common.infoMsg(cluster_info_report.join('\n'))
214 }
215 if (cluster_warning_report.size() > 0) {
216 common.warningMsg(cluster_warning_report.join('\n'))
217 }
218 if (cluster_error_report.size() > 0) {
219 common.errorMsg(cluster_error_report.join('\n'))
220 return "ERROR"
221 } else {
222 return "OK"
223 }
224}
225
Martin Polreich1281cde2019-02-28 11:39:49 +0100226/** Returns last shutdown node of Galera cluster
227@param env Salt Connection object or pepperEnv
228@param nodes List of nodes to check only (defaults to []). If not provided, it will check all nodes.
229 Use this parameter if the cluster splits to several components and you only want to check one fo them.
230@return status ip address or hostname of last shutdown node
231*/
232
233def getGaleraLastShutdownNode(env, nodes = []) {
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100234 def salt = new com.mirantis.mk.Salt()
235 def common = new com.mirantis.mk.Common()
Martin Polreich1281cde2019-02-28 11:39:49 +0100236 members = []
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100237 lastNode = [ip: '', seqno: -2]
238 try {
Martin Polreich1281cde2019-02-28 11:39:49 +0100239 if (nodes) {
240 nodes = salt.getIPAddressesForNodenames(env, nodes)
241 for (node in nodes) {
242 members = [host: "${node.get(node.keySet()[0])}"] + members
243 }
244 } else {
245 members = salt.getReturnValues(salt.getPillar(env, "I@galera:master", "galera:master:members"))
246 }
Martin Polreich71307db2019-04-03 09:14:51 +0200247 } catch (Exception e) {
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100248 common.errorMsg('Could not retrieve members list')
Martin Polreich71307db2019-04-03 09:14:51 +0200249 common.errorMsg(e.getMessage())
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100250 return 'I@galera:master'
251 }
252 if (members) {
253 for (member in members) {
254 try {
255 salt.minionsReachable(env, 'I@salt:master', "S@${member.host}")
256 out = salt.getReturnValues(salt.cmdRun(env, "S@${member.host}", 'cat /var/lib/mysql/grastate.dat | grep "seqno" | cut -d ":" -f2', true, null, false))
257 seqno = out.tokenize('\n')[0].trim()
258 if (seqno.isNumber()) {
259 seqno = seqno.toInteger()
260 } else {
261 seqno = -2
262 }
263 highestSeqno = lastNode.get('seqno')
264 if (seqno > highestSeqno) {
265 lastNode << [ip: "${member.host}", seqno: seqno]
266 }
Martin Polreich71307db2019-04-03 09:14:51 +0200267 } catch (Exception e) {
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100268 common.warningMsg("Could not determine 'seqno' value for node ${member.host} ")
Martin Polreich71307db2019-04-03 09:14:51 +0200269 common.warningMsg(e.getMessage())
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100270 }
271 }
272 }
273 if (lastNode.get('ip') != '') {
274 return "S@${lastNode.ip}"
275 } else {
276 return "I@galera:master"
277 }
278}
279
280/**
Martin Polreich9044fe42019-03-21 16:00:23 +0100281 * Restores Galera cluster
282 * @param env Salt Connection object or pepperEnv
283 * @param runRestoreDb Boolean to determine if the restoration of DB should be run as well
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100284 * @return output of salt commands
285 */
Martin Polreich9044fe42019-03-21 16:00:23 +0100286def restoreGaleraCluster(env, runRestoreDb=true) {
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100287 def salt = new com.mirantis.mk.Salt()
288 def common = new com.mirantis.mk.Common()
289 try {
290 salt.runSaltProcessStep(env, 'I@galera:slave', 'service.stop', ['mysql'])
291 } catch (Exception er) {
292 common.warningMsg('Mysql service already stopped')
293 }
294 try {
295 salt.runSaltProcessStep(env, 'I@galera:master', 'service.stop', ['mysql'])
296 } catch (Exception er) {
297 common.warningMsg('Mysql service already stopped')
298 }
299 lastNodeTarget = getGaleraLastShutdownNode(env)
300 try {
301 salt.cmdRun(env, 'I@galera:slave', "rm /var/lib/mysql/ib_logfile*")
302 } catch (Exception er) {
303 common.warningMsg('Files are not present')
304 }
305 try {
306 salt.cmdRun(env, 'I@galera:slave', "rm /var/lib/mysql/grastate.dat")
307 } catch (Exception er) {
308 common.warningMsg('Files are not present')
309 }
310 try {
311 salt.cmdRun(env, lastNodeTarget, "mkdir /root/mysql/mysql.bak")
312 } catch (Exception er) {
313 common.warningMsg('Directory already exists')
314 }
315 try {
316 salt.cmdRun(env, lastNodeTarget, "rm -rf /root/mysql/mysql.bak/*")
317 } catch (Exception er) {
318 common.warningMsg('Directory already empty')
319 }
320 try {
321 salt.cmdRun(env, lastNodeTarget, "mv /var/lib/mysql/* /root/mysql/mysql.bak")
322 } catch (Exception er) {
323 common.warningMsg('Files were already moved')
324 }
325 try {
326 salt.runSaltProcessStep(env, lastNodeTarget, 'file.remove', ["/var/lib/mysql/.galera_bootstrap"])
327 } catch (Exception er) {
328 common.warningMsg('File is not present')
329 }
Martin Polreich9044fe42019-03-21 16:00:23 +0100330
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100331 salt.cmdRun(env, lastNodeTarget, "sed -i '/gcomm/c\\wsrep_cluster_address=\"gcomm://\"' /etc/mysql/my.cnf")
Martin Polreich9044fe42019-03-21 16:00:23 +0100332
333 if (runRestoreDb) {
334 restoreGaleraDb(env, lastNodeTarget)
335 }
336
Pavel Cizinsky154c1172019-03-11 10:30:05 +0100337 salt.enforceState(env, lastNodeTarget, 'galera')
Martin Polreich9044fe42019-03-21 16:00:23 +0100338
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100339 // wait until mysql service on galera master is up
340 try {
341 salt.commandStatus(env, lastNodeTarget, 'service mysql status', 'running')
342 } catch (Exception er) {
343 input message: "Database is not running please fix it first and only then click on PROCEED."
344 }
345
346 salt.runSaltProcessStep(env, "I@galera:master and not ${lastNodeTarget}", 'service.start', ['mysql'])
347 salt.runSaltProcessStep(env, "I@galera:slave and not ${lastNodeTarget}", 'service.start', ['mysql'])
348}
Martin Polreich9044fe42019-03-21 16:00:23 +0100349
350/**
351 * Restores Galera database
352 * @param env Salt Connection object or pepperEnv
353 * @param targetNode Node to be targeted
354 */
355def restoreGaleraDb(env, targetNode) {
356 def backup_dir = salt.getReturnValues(salt.getPillar(env, targetNode, 'xtrabackup:client:backup_dir'))
357 if(backup_dir == null || backup_dir.isEmpty()) { backup_dir='/var/backups/mysql/xtrabackup' }
358 salt.runSaltProcessStep(env, targetNode, 'file.remove', ["${backup_dir}/dbrestored"])
359 salt.cmdRun(env, 'I@xtrabackup:client', "su root -c 'salt-call state.sls xtrabackup'")
360}
361
362def restoreGaleraDb(env) {
363 common.warningMsg("This method was renamed to 'restoreGaleraCluster'. Please change your pipeline to use this call instead! If you think that you really wanted to call 'restoreGaleraDb' you may be missing 'targetNode' parameter in you call.")
364 return restoreGaleraCluster(env)
365}