blob: 3a10a1c7ed211658d4eacfafb9a341a8ef8b2298 [file] [log] [blame]
Martin Polreichf89f9b42019-05-07 15:37:13 +02001package com.mirantis.mk
2
3/**
4 *
5 * Galera functions
6 *
7 */
8
9
10/**
11 * Returns parameters from mysql.status output on given target node
12 *
13 * @param env Salt Connection object or pepperEnv
14 * @param target Targeted node
15 * @param parameters Parameters to be retruned (String or list of Strings). If no parameters are provided or is set to '[]', it returns all of them.
16 * @return result List of parameters with its values
17 */
18
19def getWsrepParameters(env, target, parameters=[], print=false) {
Martin Polreichac8bcce2019-07-19 13:41:12 +020020 def salt = new com.mirantis.mk.Salt()
21 def common = new com.mirantis.mk.Common()
22 result = [:]
Martin Polreichf89f9b42019-05-07 15:37:13 +020023 out = salt.runSaltProcessStep(env, "${target}", "mysql.status", [], null, false)
24 outlist = out['return'][0]
25 resultYaml = outlist.get(outlist.keySet()[0]).sort()
26 if (print) {
27 common.prettyPrint(resultYaml)
28 }
29 if (parameters instanceof String) {
Martin Polreichac8bcce2019-07-19 13:41:12 +020030 parameters = [parameters]
31 }
32 if (parameters == [] || parameters == ['']) {
33 result = resultYaml
Martin Polreichf89f9b42019-05-07 15:37:13 +020034 } else {
Martin Polreichac8bcce2019-07-19 13:41:12 +020035 for (String param in parameters) {
36 value = resultYaml[param]
37 if (value instanceof String && value.isBigDecimal()) {
38 value = value.toBigDecimal()
Martin Polreichf89f9b42019-05-07 15:37:13 +020039 }
Martin Polreichac8bcce2019-07-19 13:41:12 +020040 result[param] = value
41 }
Martin Polreichf89f9b42019-05-07 15:37:13 +020042 }
43 return result
44}
45
46/**
47 * Verifies Galera database
48 *
49 * This function checks for Galera master, tests connection and if reachable, it obtains the result
50 * of Salt mysql.status function. The result is then parsed, validated and outputed to the user.
51 *
52 * @param env Salt Connection object or pepperEnv
53 * @param slave Boolean value to enable slave checking (if master in unreachable)
54 * @param checkTimeSync Boolean value to enable time sync check
55 * @return resultCode int values used to determine exit status in the calling function
56 */
57def verifyGaleraStatus(env, slave=false, checkTimeSync=false) {
58 def salt = new com.mirantis.mk.Salt()
59 def common = new com.mirantis.mk.Common()
60 def out = ""
61 def status = "unknown"
62 def testNode = ""
63 if (!slave) {
64 try {
65 galeraMaster = salt.getMinions(env, "I@galera:master")
66 common.infoMsg("Current Galera master is: ${galeraMaster}")
67 salt.minionsReachable(env, "I@salt:master", "I@galera:master")
68 testNode = "I@galera:master"
69 } catch (Exception e) {
70 common.errorMsg('Galera master is not reachable.')
Martin Polreich7c8ac9a2019-05-16 13:41:09 +020071 common.errorMsg(e.getMessage())
Martin Polreichf89f9b42019-05-07 15:37:13 +020072 return 128
73 }
74 } else {
75 try {
76 galeraSlaves = salt.getMinions(env, "I@galera:slave")
77 common.infoMsg("Testing Galera slave minions: ${galeraSlaves}")
78 } catch (Exception e) {
79 common.errorMsg("Cannot obtain Galera slave minions list.")
Martin Polreich7c8ac9a2019-05-16 13:41:09 +020080 common.errorMsg(e.getMessage())
Martin Polreichf89f9b42019-05-07 15:37:13 +020081 return 129
82 }
83 for (minion in galeraSlaves) {
84 try {
85 salt.minionsReachable(env, "I@salt:master", minion)
86 testNode = minion
87 break
88 } catch (Exception e) {
89 common.warningMsg("Slave '${minion}' is not reachable.")
90 }
91 }
92 }
93 if (!testNode) {
94 common.errorMsg("No Galera slave was reachable.")
95 return 130
96 }
Martin Polreichf48bb102019-04-02 14:12:58 +020097 def checkTargets = salt.getMinions(env, "I@xtrabackup:client or I@xtrabackup:server")
98 for (checkTarget in checkTargets) {
99 def nodeStatus = salt.minionsReachable(env, 'I@salt:master', checkTarget, null, 10, 5)
100 if (nodeStatus != null) {
101 def iostatRes = salt.getIostatValues(['saltId': env, 'target': checkTarget, 'parameterName': "%util", 'output': true])
102 if (iostatRes == [:]) {
103 common.errorMsg("Recevived empty response from iostat call on ${checkTarget}. Maybe 'sysstat' package is not installed?")
104 return 140
105 }
106 for (int i = 0; i < iostatRes.size(); i++) {
107 def diskKey = iostatRes.keySet()[i]
108 if (!(iostatRes[diskKey].toString().isBigDecimal() && (iostatRes[diskKey].toBigDecimal() < 50 ))) {
109 common.errorMsg("Disk ${diskKey} has to high i/o utilization. Maximum value is 50 and current value is ${iostatRes[diskKey]}.")
110 return 141
111 }
112 }
113 }
114 }
115 common.infoMsg("Disk i/o utilization was checked and everything seems to be in order.")
Martin Polreichf89f9b42019-05-07 15:37:13 +0200116 if (checkTimeSync && !salt.checkClusterTimeSync(env, "I@galera:master or I@galera:slave")) {
117 common.errorMsg("Time in cluster is desynchronized or it couldn't be detemined. You should fix this issue manually before proceeding.")
118 return 131
119 }
120 try {
121 out = salt.runSaltProcessStep(env, "${testNode}", "mysql.status", [], null, false)
122 } catch (Exception e) {
123 common.errorMsg('Could not determine mysql status.')
Martin Polreich7c8ac9a2019-05-16 13:41:09 +0200124 common.errorMsg(e.getMessage())
Martin Polreichf89f9b42019-05-07 15:37:13 +0200125 return 256
126 }
127 if (out) {
128 try {
129 status = validateAndPrintGaleraStatusReport(env, out, testNode)
130 } catch (Exception e) {
131 common.errorMsg('Could not parse the mysql status output. Check it manually.')
Martin Polreich7c8ac9a2019-05-16 13:41:09 +0200132 common.errorMsg(e.getMessage())
Martin Polreichf89f9b42019-05-07 15:37:13 +0200133 return 1
134 }
135 } else {
136 common.errorMsg("Mysql status response unrecognized or is empty. Response: ${out}")
137 return 1024
138 }
139 if (status == "OK") {
140 common.infoMsg("No errors found - MySQL status is ${status}.")
141 return 0
142 } else if (status == "unknown") {
143 common.warningMsg('MySQL status cannot be detemined')
144 return 1
145 } else {
146 common.errorMsg("Errors found.")
147 return 2
148 }
149}
150
151/** Validates and prints result of verifyGaleraStatus function
152@param env Salt Connection object or pepperEnv
153@param out Output of the mysql.status Salt function
154@return status "OK", "ERROR" or "uknown" depending on result of validation
155*/
156
157def validateAndPrintGaleraStatusReport(env, out, minion) {
158 def salt = new com.mirantis.mk.Salt()
159 def common = new com.mirantis.mk.Common()
160 if (minion == "I@galera:master") {
161 role = "master"
162 } else {
163 role = "slave"
164 }
165 sizeOut = salt.getReturnValues(salt.getPillar(env, minion, "galera:${role}:members"))
166 expected_cluster_size = sizeOut.size()
167 outlist = out['return'][0]
168 resultYaml = outlist.get(outlist.keySet()[0]).sort()
169 common.prettyPrint(resultYaml)
170 parameters = [
171 wsrep_cluster_status: [title: 'Cluster status', expectedValues: ['Primary'], description: ''],
172 wsrep_cluster_size: [title: 'Current cluster size', expectedValues: [expected_cluster_size], description: ''],
173 wsrep_ready: [title: 'Node status', expectedValues: ['ON', true], description: ''],
174 wsrep_local_state_comment: [title: 'Node status comment', expectedValues: ['Joining', 'Waiting on SST', 'Joined', 'Synced', 'Donor'], description: ''],
175 wsrep_connected: [title: 'Node connectivity', expectedValues: ['ON', true], description: ''],
176 wsrep_local_recv_queue_avg: [title: 'Average size of local reveived queue', expectedThreshold: [warn: 0.5, error: 1.0], description: '(Value above 0 means that the node cannot apply write-sets as fast as it receives them, which can lead to replication throttling)'],
177 wsrep_local_send_queue_avg: [title: 'Average size of local send queue', expectedThreshold: [warn: 0.5, error: 1.0], description: '(Value above 0 indicate replication throttling or network throughput issues, such as a bottleneck on the network link.)']
178 ]
179 for (key in parameters.keySet()) {
180 value = resultYaml[key]
181 if (value instanceof String && value.isBigDecimal()) {
182 value = value.toBigDecimal()
183 }
184 parameters.get(key) << [actualValue: value]
185 }
186 for (key in parameters.keySet()) {
187 param = parameters.get(key)
188 if (key == 'wsrep_local_recv_queue_avg' || key == 'wsrep_local_send_queue_avg') {
Martin Polreichfb026be2019-05-16 13:36:23 +0200189 if (param.get('actualValue') == null || (param.get('actualValue') > param.get('expectedThreshold').get('error'))) {
Martin Polreichf89f9b42019-05-07 15:37:13 +0200190 param << [match: 'error']
191 } else if (param.get('actualValue') > param.get('expectedThreshold').get('warn')) {
192 param << [match: 'warn']
193 } else {
194 param << [match: 'ok']
195 }
196 } else {
197 for (expValue in param.get('expectedValues')) {
198 if (expValue == param.get('actualValue')) {
199 param << [match: 'ok']
200 break
201 } else {
202 param << [match: 'error']
203 }
204 }
205 }
206 }
207 cluster_info_report = []
208 cluster_warning_report = []
209 cluster_error_report = []
210 for (key in parameters.keySet()) {
211 param = parameters.get(key)
212 if (param.containsKey('expectedThreshold')) {
213 expValues = "below ${param.get('expectedThreshold').get('warn')}"
214 } else {
215 if (param.get('expectedValues').size() > 1) {
216 expValues = param.get('expectedValues').join(' or ')
217 } else {
218 expValues = param.get('expectedValues')[0]
219 }
220 }
221 reportString = "${param.title}: ${param.actualValue} (Expected: ${expValues}) ${param.description}"
222 if (param.get('match').equals('ok')) {
223 cluster_info_report.add("[OK ] ${reportString}")
224 } else if (param.get('match').equals('warn')) {
225 cluster_warning_report.add("[WARNING] ${reportString}")
226 } else {
227 cluster_error_report.add("[ ERROR] ${reportString})")
228 }
229 }
230 common.infoMsg("CLUSTER STATUS REPORT: ${cluster_info_report.size()} expected values, ${cluster_warning_report.size()} warnings and ${cluster_error_report.size()} error found:")
231 if (cluster_info_report.size() > 0) {
232 common.infoMsg(cluster_info_report.join('\n'))
233 }
234 if (cluster_warning_report.size() > 0) {
235 common.warningMsg(cluster_warning_report.join('\n'))
236 }
237 if (cluster_error_report.size() > 0) {
238 common.errorMsg(cluster_error_report.join('\n'))
239 return "ERROR"
240 } else {
241 return "OK"
242 }
243}
244
Martin Polreichac8bcce2019-07-19 13:41:12 +0200245/** Returns last shutdown node of Galera cluster
246@param env Salt Connection object or pepperEnv
247@param nodes List of nodes to check only (defaults to []). If not provided, it will check all nodes.
248 Use this parameter if the cluster splits to several components and you only want to check one fo them.
249@return status ip address or hostname of last shutdown node
250*/
251
252def getGaleraLastShutdownNode(env, nodes = []) {
Martin Polreichf89f9b42019-05-07 15:37:13 +0200253 def salt = new com.mirantis.mk.Salt()
254 def common = new com.mirantis.mk.Common()
Martin Polreichac8bcce2019-07-19 13:41:12 +0200255 members = []
Martin Polreichf89f9b42019-05-07 15:37:13 +0200256 lastNode = [ip: '', seqno: -2]
257 try {
Martin Polreichac8bcce2019-07-19 13:41:12 +0200258 if (nodes) {
259 nodes = salt.getIPAddressesForNodenames(env, nodes)
260 for (node in nodes) {
261 members = [host: "${node.get(node.keySet()[0])}"] + members
262 }
263 } else {
264 members = salt.getReturnValues(salt.getPillar(env, "I@galera:master", "galera:master:members"))
265 }
Martin Polreich7c8ac9a2019-05-16 13:41:09 +0200266 } catch (Exception e) {
Martin Polreichf89f9b42019-05-07 15:37:13 +0200267 common.errorMsg('Could not retrieve members list')
Martin Polreich7c8ac9a2019-05-16 13:41:09 +0200268 common.errorMsg(e.getMessage())
Martin Polreichf89f9b42019-05-07 15:37:13 +0200269 return 'I@galera:master'
270 }
271 if (members) {
272 for (member in members) {
273 try {
274 salt.minionsReachable(env, 'I@salt:master', "S@${member.host}")
275 out = salt.getReturnValues(salt.cmdRun(env, "S@${member.host}", 'cat /var/lib/mysql/grastate.dat | grep "seqno" | cut -d ":" -f2', true, null, false))
276 seqno = out.tokenize('\n')[0].trim()
277 if (seqno.isNumber()) {
278 seqno = seqno.toInteger()
279 } else {
Denis Egorenko7c3bd952019-08-09 18:22:30 +0400280 // in case if /var/lib/mysql/grastate.dat has no any seqno - set it to 0
281 // thus node will be recovered if no other failed found
282 seqno = 0
Martin Polreichf89f9b42019-05-07 15:37:13 +0200283 }
Martin Polreich7c8ac9a2019-05-16 13:41:09 +0200284 } catch (Exception e) {
Martin Polreichf89f9b42019-05-07 15:37:13 +0200285 common.warningMsg("Could not determine 'seqno' value for node ${member.host} ")
Martin Polreich7c8ac9a2019-05-16 13:41:09 +0200286 common.warningMsg(e.getMessage())
Denis Egorenko7c3bd952019-08-09 18:22:30 +0400287 seqno = 0
288 }
289 highestSeqno = lastNode.get('seqno')
290 if (seqno > highestSeqno) {
291 lastNode << [ip: "${member.host}", seqno: seqno]
Martin Polreichf89f9b42019-05-07 15:37:13 +0200292 }
293 }
294 }
295 if (lastNode.get('ip') != '') {
296 return "S@${lastNode.ip}"
297 } else {
298 return "I@galera:master"
299 }
300}
301
302/**
Denis Egorenko7c3bd952019-08-09 18:22:30 +0400303 * Wrapper around Mysql systemd service
304 * @param env Salt Connection object or pepperEnv
305 * @param targetNode Node to apply changes
306 * @param checkStatus Whether to check status of Mysql
307 * @param checkState State of service to check
308*/
309def manageServiceMysql(env, targetNode, action, checkStatus=true, checkState='running') {
310 def salt = new com.mirantis.mk.Salt()
311 salt.runSaltProcessStep(env, lastNodeTarget, "service.${action}", ['mysql'])
312 if (checkStatus) {
313 try {
314 salt.commandStatus(env, lastNodeTarget, 'service mysql status', checkState)
315 } catch (Exception er) {
316 input message: "Database is not running please fix it first and only then click on PROCEED."
317 }
318 }
319}
320
321/**
Martin Polreiche48741b2019-03-21 16:00:23 +0100322 * Restores Galera cluster
323 * @param env Salt Connection object or pepperEnv
324 * @param runRestoreDb Boolean to determine if the restoration of DB should be run as well
Martin Polreichf89f9b42019-05-07 15:37:13 +0200325 * @return output of salt commands
326 */
Martin Polreiche48741b2019-03-21 16:00:23 +0100327def restoreGaleraCluster(env, runRestoreDb=true) {
Martin Polreichf89f9b42019-05-07 15:37:13 +0200328 def salt = new com.mirantis.mk.Salt()
329 def common = new com.mirantis.mk.Common()
Martin Polreichf89f9b42019-05-07 15:37:13 +0200330 lastNodeTarget = getGaleraLastShutdownNode(env)
Denis Egorenko7c3bd952019-08-09 18:22:30 +0400331 manageServiceMysql(env, lastNodeTarget, 'stop', false)
Martin Polreich8a9e5932019-08-06 16:51:27 +0200332 if (runRestoreDb) {
333 salt.cmdRun(env, lastNodeTarget, "mkdir -p /root/mysql/mysql.bak")
Martin Polreichf89f9b42019-05-07 15:37:13 +0200334 salt.cmdRun(env, lastNodeTarget, "rm -rf /root/mysql/mysql.bak/*")
Martin Polreichf89f9b42019-05-07 15:37:13 +0200335 salt.cmdRun(env, lastNodeTarget, "mv /var/lib/mysql/* /root/mysql/mysql.bak")
Martin Polreichf89f9b42019-05-07 15:37:13 +0200336 }
Martin Polreich8a9e5932019-08-06 16:51:27 +0200337 salt.cmdRun(env, lastNodeTarget, "rm -f /var/lib/mysql/.galera_bootstrap")
Martin Polreiche48741b2019-03-21 16:00:23 +0100338
339 // make sure that gcom parameter is empty
Martin Polreichf89f9b42019-05-07 15:37:13 +0200340 salt.cmdRun(env, lastNodeTarget, "sed -i '/gcomm/c\\wsrep_cluster_address=\"gcomm://\"' /etc/mysql/my.cnf")
Martin Polreiche48741b2019-03-21 16:00:23 +0100341
342 // run restore of DB
343 if (runRestoreDb) {
344 restoreGaleraDb(env, lastNodeTarget)
345 }
346
Denis Egorenko7c3bd952019-08-09 18:22:30 +0400347 manageServiceMysql(env, lastNodeTarget, 'start')
Martin Polreichf89f9b42019-05-07 15:37:13 +0200348
Denis Egorenko7c3bd952019-08-09 18:22:30 +0400349 // apply any changes in configuration and return value to gcom parameter and then restart mysql to catch
Martin Polreich8a9e5932019-08-06 16:51:27 +0200350 salt.enforceState(['saltId': env, 'target': lastNodeTarget, 'state': 'galera'])
Denis Egorenko7c3bd952019-08-09 18:22:30 +0400351 manageServiceMysql(env, lastNodeTarget, 'restart')
Martin Polreichf89f9b42019-05-07 15:37:13 +0200352}
Martin Polreiche48741b2019-03-21 16:00:23 +0100353
354/**
355 * Restores Galera database
356 * @param env Salt Connection object or pepperEnv
357 * @param targetNode Node to be targeted
358 */
359def restoreGaleraDb(env, targetNode) {
Martin Polreichbcf56fb2019-08-08 13:47:45 +0200360 def salt = new com.mirantis.mk.Salt()
Martin Polreiche48741b2019-03-21 16:00:23 +0100361 def backup_dir = salt.getReturnValues(salt.getPillar(env, targetNode, 'xtrabackup:client:backup_dir'))
362 if(backup_dir == null || backup_dir.isEmpty()) { backup_dir='/var/backups/mysql/xtrabackup' }
363 salt.runSaltProcessStep(env, targetNode, 'file.remove', ["${backup_dir}/dbrestored"])
Martin Polreichbcf56fb2019-08-08 13:47:45 +0200364 salt.enforceState(['saltId': env, 'target': targetNode, 'state': 'xtrabackup.client'])
365 salt.enforceState(['saltId': env, 'target': targetNode, 'state': 'xtrabackup.client.restore'])
Martin Polreiche48741b2019-03-21 16:00:23 +0100366}
367
368def restoreGaleraDb(env) {
Martin Polreichbcf56fb2019-08-08 13:47:45 +0200369 def common = new com.mirantis.mk.Common()
Martin Polreiche48741b2019-03-21 16:00:23 +0100370 common.warningMsg("This method was renamed to 'restoreGaleraCluster'. Please change your pipeline to use this call instead! If you think that you really wanted to call 'restoreGaleraDb' you may be missing 'targetNode' parameter in you call.")
371 return restoreGaleraCluster(env)
sgarbuzf2063462019-07-17 12:01:57 +0300372}
373
374/**
375 * Start first node in mysql cluster. Cluster members stay removed in mysql config, additional service restart will be needed once all nodes are up.
376 * https://docs.mirantis.com/mcp/q4-18/mcp-operations-guide/tshooting/
377 * tshoot-mcp-openstack/tshoot-galera/restore-galera-cluster/
378 * restore-galera-manually.html#restore-galera-manually
379 *
380 * @param env Salt Connection object or pepperEnv
381 * @param target last stopped Galera node
382 * @return output of salt commands
383 */
384def startFirstNode(env, target) {
385 def salt = new com.mirantis.mk.Salt()
386 def common = new com.mirantis.mk.Common()
387
388 // make sure that gcom parameter is empty
389 salt.cmdRun(env, target, "sed -i '/wsrep_cluster_address/ s/^#*/#/' /etc/mysql/my.cnf")
390 salt.cmdRun(env, target, "sed -i '/wsrep_cluster_address/a wsrep_cluster_address=\"gcomm://\"' /etc/mysql/my.cnf")
391
392 // start mysql service on the last node
393 salt.runSaltProcessStep(env, target, 'service.start', ['mysql'])
394
395 // wait until mysql service on the last node is up
396
397 common.retry(30, 10) {
398 value = getWsrepParameters(env, target, 'wsrep_evs_state')
399 if (value['wsrep_evs_state'] == 'OPERATIONAL') {
400 common.infoMsg('WSREP state: OPERATIONAL')
401 } else {
402 throw new Exception("Mysql service is not running please fix it.")
403 }
404 }
Martin Polreiche48741b2019-03-21 16:00:23 +0100405}