blob: 09b9f1a3722976e6555b06362f8ff2c204e27ed7 [file] [log] [blame]
Martin Polreich8f0f3ac2019-02-15 10:03:33 +01001package com.mirantis.mk
2
3/**
4 *
5 * Galera functions
6 *
7 */
8
9
10/**
11 * Returns parameters from mysql.status output on given target node
12 *
13 * @param env Salt Connection object or pepperEnv
14 * @param target Targeted node
15 * @param parameters Parameters to be retruned (String or list of Strings). If no parameters are provided or is set to '[]', it returns all of them.
16 * @return result List of parameters with its values
17 */
18
19def getWsrepParameters(env, target, parameters=[], print=false) {
Martin Polreich1281cde2019-02-28 11:39:49 +010020 def salt = new com.mirantis.mk.Salt()
21 def common = new com.mirantis.mk.Common()
22 result = [:]
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010023 out = salt.runSaltProcessStep(env, "${target}", "mysql.status", [], null, false)
24 outlist = out['return'][0]
25 resultYaml = outlist.get(outlist.keySet()[0]).sort()
26 if (print) {
27 common.prettyPrint(resultYaml)
28 }
29 if (parameters instanceof String) {
Martin Polreich1fbda522019-02-26 14:46:33 +010030 parameters = [parameters]
31 }
32 if (parameters == [] || parameters == ['']) {
33 result = resultYaml
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010034 } else {
Martin Polreich1281cde2019-02-28 11:39:49 +010035 for (String param in parameters) {
36 value = resultYaml[param]
Martin Polreich1fbda522019-02-26 14:46:33 +010037 if (value instanceof String && value.isBigDecimal()) {
38 value = value.toBigDecimal()
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010039 }
Martin Polreich1281cde2019-02-28 11:39:49 +010040 result[param] = value
Martin Polreich1fbda522019-02-26 14:46:33 +010041 }
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010042 }
43 return result
44}
45
46/**
47 * Verifies Galera database
48 *
49 * This function checks for Galera master, tests connection and if reachable, it obtains the result
50 * of Salt mysql.status function. The result is then parsed, validated and outputed to the user.
51 *
52 * @param env Salt Connection object or pepperEnv
53 * @param slave Boolean value to enable slave checking (if master in unreachable)
54 * @param checkTimeSync Boolean value to enable time sync check
55 * @return resultCode int values used to determine exit status in the calling function
56 */
57def verifyGaleraStatus(env, slave=false, checkTimeSync=false) {
58 def salt = new com.mirantis.mk.Salt()
59 def common = new com.mirantis.mk.Common()
60 def out = ""
61 def status = "unknown"
62 def testNode = ""
63 if (!slave) {
64 try {
65 galeraMaster = salt.getMinions(env, "I@galera:master")
66 common.infoMsg("Current Galera master is: ${galeraMaster}")
67 salt.minionsReachable(env, "I@salt:master", "I@galera:master")
68 testNode = "I@galera:master"
69 } catch (Exception e) {
70 common.errorMsg('Galera master is not reachable.')
Martin Polreich71307db2019-04-03 09:14:51 +020071 common.errorMsg(e.getMessage())
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010072 return 128
73 }
74 } else {
75 try {
76 galeraSlaves = salt.getMinions(env, "I@galera:slave")
77 common.infoMsg("Testing Galera slave minions: ${galeraSlaves}")
78 } catch (Exception e) {
79 common.errorMsg("Cannot obtain Galera slave minions list.")
Martin Polreich71307db2019-04-03 09:14:51 +020080 common.errorMsg(e.getMessage())
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010081 return 129
82 }
Martin Polreich5df75782019-02-19 14:29:24 +010083 for (minion in galeraSlaves) {
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010084 try {
Martin Polreich5df75782019-02-19 14:29:24 +010085 salt.minionsReachable(env, "I@salt:master", minion)
86 testNode = minion
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010087 break
88 } catch (Exception e) {
Martin Polreich5df75782019-02-19 14:29:24 +010089 common.warningMsg("Slave '${minion}' is not reachable.")
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010090 }
91 }
92 }
93 if (!testNode) {
94 common.errorMsg("No Galera slave was reachable.")
95 return 130
96 }
Martin Polreichda7ff402019-04-02 14:12:58 +020097 def checkTargets = salt.getMinions(env, "I@salt:master or I@salt:minion")
98 for (checkTarget in checkTargets) {
99 def iostatRes = salt.getIostatValues(['saltId': env, 'target': checkTarget, 'parameterName': "%util", 'output': true])
100 if (iostatRes == [:]) {
101 common.errorMsg("Recevived empty response from iostat call on ${checkTarget}. Maybe 'sysstat' package is not installed?")
102 return 140
103 }
104 for (int i = 0; i < iostatRes.size(); i++) {
105 def diskKey = iostatRes.keySet()[i]
106 if (!(iostatRes[diskKey].toString().isBigDecimal() && (iostatRes[diskKey].toBigDecimal() < 0.5 ))) {
107 common.errorMsg("Disk ${diskKey} has to high i/o utilization. Maximum value is 0.5 and current value is ${iostatRes[diskKey]}.")
108 return 141
109 }
110 }
111 }
112 common.infoMsg("Disk i/o utilization was checked and everything seems to be in order.")
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100113 if (checkTimeSync && !salt.checkClusterTimeSync(env, "I@galera:master or I@galera:slave")) {
114 common.errorMsg("Time in cluster is desynchronized or it couldn't be detemined. You should fix this issue manually before proceeding.")
115 return 131
116 }
117 try {
118 out = salt.runSaltProcessStep(env, "${testNode}", "mysql.status", [], null, false)
119 } catch (Exception e) {
120 common.errorMsg('Could not determine mysql status.')
Martin Polreich71307db2019-04-03 09:14:51 +0200121 common.errorMsg(e.getMessage())
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100122 return 256
123 }
124 if (out) {
125 try {
126 status = validateAndPrintGaleraStatusReport(env, out, testNode)
127 } catch (Exception e) {
128 common.errorMsg('Could not parse the mysql status output. Check it manually.')
Martin Polreich71307db2019-04-03 09:14:51 +0200129 common.errorMsg(e.getMessage())
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100130 return 1
131 }
132 } else {
133 common.errorMsg("Mysql status response unrecognized or is empty. Response: ${out}")
134 return 1024
135 }
136 if (status == "OK") {
137 common.infoMsg("No errors found - MySQL status is ${status}.")
138 return 0
139 } else if (status == "unknown") {
140 common.warningMsg('MySQL status cannot be detemined')
141 return 1
142 } else {
143 common.errorMsg("Errors found.")
144 return 2
145 }
146}
147
148/** Validates and prints result of verifyGaleraStatus function
149@param env Salt Connection object or pepperEnv
150@param out Output of the mysql.status Salt function
151@return status "OK", "ERROR" or "uknown" depending on result of validation
152*/
153
154def validateAndPrintGaleraStatusReport(env, out, minion) {
155 def salt = new com.mirantis.mk.Salt()
156 def common = new com.mirantis.mk.Common()
157 if (minion == "I@galera:master") {
158 role = "master"
159 } else {
160 role = "slave"
161 }
162 sizeOut = salt.getReturnValues(salt.getPillar(env, minion, "galera:${role}:members"))
163 expected_cluster_size = sizeOut.size()
164 outlist = out['return'][0]
165 resultYaml = outlist.get(outlist.keySet()[0]).sort()
166 common.prettyPrint(resultYaml)
167 parameters = [
168 wsrep_cluster_status: [title: 'Cluster status', expectedValues: ['Primary'], description: ''],
169 wsrep_cluster_size: [title: 'Current cluster size', expectedValues: [expected_cluster_size], description: ''],
170 wsrep_ready: [title: 'Node status', expectedValues: ['ON', true], description: ''],
171 wsrep_local_state_comment: [title: 'Node status comment', expectedValues: ['Joining', 'Waiting on SST', 'Joined', 'Synced', 'Donor'], description: ''],
172 wsrep_connected: [title: 'Node connectivity', expectedValues: ['ON', true], description: ''],
173 wsrep_local_recv_queue_avg: [title: 'Average size of local reveived queue', expectedThreshold: [warn: 0.5, error: 1.0], description: '(Value above 0 means that the node cannot apply write-sets as fast as it receives them, which can lead to replication throttling)'],
174 wsrep_local_send_queue_avg: [title: 'Average size of local send queue', expectedThreshold: [warn: 0.5, error: 1.0], description: '(Value above 0 indicate replication throttling or network throughput issues, such as a bottleneck on the network link.)']
175 ]
176 for (key in parameters.keySet()) {
177 value = resultYaml[key]
178 if (value instanceof String && value.isBigDecimal()) {
179 value = value.toBigDecimal()
180 }
181 parameters.get(key) << [actualValue: value]
182 }
183 for (key in parameters.keySet()) {
184 param = parameters.get(key)
185 if (key == 'wsrep_local_recv_queue_avg' || key == 'wsrep_local_send_queue_avg') {
Martin Polreich79810262019-02-25 12:51:11 +0100186 if (param.get('actualValue') == null || (param.get('actualValue') > param.get('expectedThreshold').get('error'))) {
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100187 param << [match: 'error']
188 } else if (param.get('actualValue') > param.get('expectedThreshold').get('warn')) {
189 param << [match: 'warn']
190 } else {
191 param << [match: 'ok']
192 }
193 } else {
194 for (expValue in param.get('expectedValues')) {
195 if (expValue == param.get('actualValue')) {
196 param << [match: 'ok']
197 break
198 } else {
199 param << [match: 'error']
200 }
201 }
202 }
203 }
204 cluster_info_report = []
205 cluster_warning_report = []
206 cluster_error_report = []
207 for (key in parameters.keySet()) {
208 param = parameters.get(key)
209 if (param.containsKey('expectedThreshold')) {
210 expValues = "below ${param.get('expectedThreshold').get('warn')}"
211 } else {
212 if (param.get('expectedValues').size() > 1) {
213 expValues = param.get('expectedValues').join(' or ')
214 } else {
215 expValues = param.get('expectedValues')[0]
216 }
217 }
218 reportString = "${param.title}: ${param.actualValue} (Expected: ${expValues}) ${param.description}"
219 if (param.get('match').equals('ok')) {
220 cluster_info_report.add("[OK ] ${reportString}")
221 } else if (param.get('match').equals('warn')) {
222 cluster_warning_report.add("[WARNING] ${reportString}")
223 } else {
224 cluster_error_report.add("[ ERROR] ${reportString})")
225 }
226 }
227 common.infoMsg("CLUSTER STATUS REPORT: ${cluster_info_report.size()} expected values, ${cluster_warning_report.size()} warnings and ${cluster_error_report.size()} error found:")
228 if (cluster_info_report.size() > 0) {
229 common.infoMsg(cluster_info_report.join('\n'))
230 }
231 if (cluster_warning_report.size() > 0) {
232 common.warningMsg(cluster_warning_report.join('\n'))
233 }
234 if (cluster_error_report.size() > 0) {
235 common.errorMsg(cluster_error_report.join('\n'))
236 return "ERROR"
237 } else {
238 return "OK"
239 }
240}
241
Martin Polreich1281cde2019-02-28 11:39:49 +0100242/** Returns last shutdown node of Galera cluster
243@param env Salt Connection object or pepperEnv
244@param nodes List of nodes to check only (defaults to []). If not provided, it will check all nodes.
245 Use this parameter if the cluster splits to several components and you only want to check one fo them.
246@return status ip address or hostname of last shutdown node
247*/
248
249def getGaleraLastShutdownNode(env, nodes = []) {
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100250 def salt = new com.mirantis.mk.Salt()
251 def common = new com.mirantis.mk.Common()
Martin Polreich1281cde2019-02-28 11:39:49 +0100252 members = []
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100253 lastNode = [ip: '', seqno: -2]
254 try {
Martin Polreich1281cde2019-02-28 11:39:49 +0100255 if (nodes) {
256 nodes = salt.getIPAddressesForNodenames(env, nodes)
257 for (node in nodes) {
258 members = [host: "${node.get(node.keySet()[0])}"] + members
259 }
260 } else {
261 members = salt.getReturnValues(salt.getPillar(env, "I@galera:master", "galera:master:members"))
262 }
Martin Polreich71307db2019-04-03 09:14:51 +0200263 } catch (Exception e) {
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100264 common.errorMsg('Could not retrieve members list')
Martin Polreich71307db2019-04-03 09:14:51 +0200265 common.errorMsg(e.getMessage())
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100266 return 'I@galera:master'
267 }
268 if (members) {
269 for (member in members) {
270 try {
271 salt.minionsReachable(env, 'I@salt:master', "S@${member.host}")
272 out = salt.getReturnValues(salt.cmdRun(env, "S@${member.host}", 'cat /var/lib/mysql/grastate.dat | grep "seqno" | cut -d ":" -f2', true, null, false))
273 seqno = out.tokenize('\n')[0].trim()
274 if (seqno.isNumber()) {
275 seqno = seqno.toInteger()
276 } else {
277 seqno = -2
278 }
279 highestSeqno = lastNode.get('seqno')
280 if (seqno > highestSeqno) {
281 lastNode << [ip: "${member.host}", seqno: seqno]
282 }
Martin Polreich71307db2019-04-03 09:14:51 +0200283 } catch (Exception e) {
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100284 common.warningMsg("Could not determine 'seqno' value for node ${member.host} ")
Martin Polreich71307db2019-04-03 09:14:51 +0200285 common.warningMsg(e.getMessage())
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100286 }
287 }
288 }
289 if (lastNode.get('ip') != '') {
290 return "S@${lastNode.ip}"
291 } else {
292 return "I@galera:master"
293 }
294}
295
296/**
Martin Polreich9044fe42019-03-21 16:00:23 +0100297 * Restores Galera cluster
298 * @param env Salt Connection object or pepperEnv
299 * @param runRestoreDb Boolean to determine if the restoration of DB should be run as well
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100300 * @return output of salt commands
301 */
Martin Polreich9044fe42019-03-21 16:00:23 +0100302def restoreGaleraCluster(env, runRestoreDb=true) {
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100303 def salt = new com.mirantis.mk.Salt()
304 def common = new com.mirantis.mk.Common()
305 try {
306 salt.runSaltProcessStep(env, 'I@galera:slave', 'service.stop', ['mysql'])
307 } catch (Exception er) {
308 common.warningMsg('Mysql service already stopped')
309 }
310 try {
311 salt.runSaltProcessStep(env, 'I@galera:master', 'service.stop', ['mysql'])
312 } catch (Exception er) {
313 common.warningMsg('Mysql service already stopped')
314 }
315 lastNodeTarget = getGaleraLastShutdownNode(env)
316 try {
317 salt.cmdRun(env, 'I@galera:slave', "rm /var/lib/mysql/ib_logfile*")
318 } catch (Exception er) {
319 common.warningMsg('Files are not present')
320 }
321 try {
322 salt.cmdRun(env, 'I@galera:slave', "rm /var/lib/mysql/grastate.dat")
323 } catch (Exception er) {
324 common.warningMsg('Files are not present')
325 }
326 try {
327 salt.cmdRun(env, lastNodeTarget, "mkdir /root/mysql/mysql.bak")
328 } catch (Exception er) {
329 common.warningMsg('Directory already exists')
330 }
331 try {
332 salt.cmdRun(env, lastNodeTarget, "rm -rf /root/mysql/mysql.bak/*")
333 } catch (Exception er) {
334 common.warningMsg('Directory already empty')
335 }
336 try {
337 salt.cmdRun(env, lastNodeTarget, "mv /var/lib/mysql/* /root/mysql/mysql.bak")
338 } catch (Exception er) {
339 common.warningMsg('Files were already moved')
340 }
341 try {
342 salt.runSaltProcessStep(env, lastNodeTarget, 'file.remove', ["/var/lib/mysql/.galera_bootstrap"])
343 } catch (Exception er) {
344 common.warningMsg('File is not present')
345 }
Martin Polreich9044fe42019-03-21 16:00:23 +0100346
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100347 salt.cmdRun(env, lastNodeTarget, "sed -i '/gcomm/c\\wsrep_cluster_address=\"gcomm://\"' /etc/mysql/my.cnf")
Martin Polreich9044fe42019-03-21 16:00:23 +0100348
349 if (runRestoreDb) {
350 restoreGaleraDb(env, lastNodeTarget)
351 }
352
Pavel Cizinsky154c1172019-03-11 10:30:05 +0100353 salt.enforceState(env, lastNodeTarget, 'galera')
Martin Polreich9044fe42019-03-21 16:00:23 +0100354
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100355 // wait until mysql service on galera master is up
356 try {
357 salt.commandStatus(env, lastNodeTarget, 'service mysql status', 'running')
358 } catch (Exception er) {
359 input message: "Database is not running please fix it first and only then click on PROCEED."
360 }
361
362 salt.runSaltProcessStep(env, "I@galera:master and not ${lastNodeTarget}", 'service.start', ['mysql'])
363 salt.runSaltProcessStep(env, "I@galera:slave and not ${lastNodeTarget}", 'service.start', ['mysql'])
364}
Martin Polreich9044fe42019-03-21 16:00:23 +0100365
366/**
367 * Restores Galera database
368 * @param env Salt Connection object or pepperEnv
369 * @param targetNode Node to be targeted
370 */
371def restoreGaleraDb(env, targetNode) {
372 def backup_dir = salt.getReturnValues(salt.getPillar(env, targetNode, 'xtrabackup:client:backup_dir'))
373 if(backup_dir == null || backup_dir.isEmpty()) { backup_dir='/var/backups/mysql/xtrabackup' }
374 salt.runSaltProcessStep(env, targetNode, 'file.remove', ["${backup_dir}/dbrestored"])
375 salt.cmdRun(env, 'I@xtrabackup:client', "su root -c 'salt-call state.sls xtrabackup'")
376}
377
378def restoreGaleraDb(env) {
379 common.warningMsg("This method was renamed to 'restoreGaleraCluster'. Please change your pipeline to use this call instead! If you think that you really wanted to call 'restoreGaleraDb' you may be missing 'targetNode' parameter in you call.")
380 return restoreGaleraCluster(env)
381}