blob: 57936327be1cd584f933eb1508aa2022189caa28 [file] [log] [blame]
Martin Polreichf89f9b42019-05-07 15:37:13 +02001package com.mirantis.mk
2
3/**
4 *
5 * Galera functions
6 *
7 */
8
9
10/**
11 * Returns parameters from mysql.status output on given target node
12 *
13 * @param env Salt Connection object or pepperEnv
14 * @param target Targeted node
15 * @param parameters Parameters to be retruned (String or list of Strings). If no parameters are provided or is set to '[]', it returns all of them.
16 * @return result List of parameters with its values
17 */
18
19def getWsrepParameters(env, target, parameters=[], print=false) {
20 result = []
21 out = salt.runSaltProcessStep(env, "${target}", "mysql.status", [], null, false)
22 outlist = out['return'][0]
23 resultYaml = outlist.get(outlist.keySet()[0]).sort()
24 if (print) {
25 common.prettyPrint(resultYaml)
26 }
27 if (parameters instanceof String) {
28 value = resultYaml[key]
29 if (value instanceof String && value.isBigDecimal()) {
30 value = value.toBigDecimal()
31 }
32 result = [key: value]
33 } else {
34 if (parameters == []) {
35 result = resultYaml
36 } else {
37 for (key in parameters) {
38 value = resultYaml[key]
39 if (value instanceof String && value.isBigDecimal()) {
40 value = value.toBigDecimal()
41 }
42 result << [key: value]
43 }
44 }
45 }
46 return result
47}
48
49/**
50 * Verifies Galera database
51 *
52 * This function checks for Galera master, tests connection and if reachable, it obtains the result
53 * of Salt mysql.status function. The result is then parsed, validated and outputed to the user.
54 *
55 * @param env Salt Connection object or pepperEnv
56 * @param slave Boolean value to enable slave checking (if master in unreachable)
57 * @param checkTimeSync Boolean value to enable time sync check
58 * @return resultCode int values used to determine exit status in the calling function
59 */
60def verifyGaleraStatus(env, slave=false, checkTimeSync=false) {
61 def salt = new com.mirantis.mk.Salt()
62 def common = new com.mirantis.mk.Common()
63 def out = ""
64 def status = "unknown"
65 def testNode = ""
66 if (!slave) {
67 try {
68 galeraMaster = salt.getMinions(env, "I@galera:master")
69 common.infoMsg("Current Galera master is: ${galeraMaster}")
70 salt.minionsReachable(env, "I@salt:master", "I@galera:master")
71 testNode = "I@galera:master"
72 } catch (Exception e) {
73 common.errorMsg('Galera master is not reachable.')
Martin Polreich7c8ac9a2019-05-16 13:41:09 +020074 common.errorMsg(e.getMessage())
Martin Polreichf89f9b42019-05-07 15:37:13 +020075 return 128
76 }
77 } else {
78 try {
79 galeraSlaves = salt.getMinions(env, "I@galera:slave")
80 common.infoMsg("Testing Galera slave minions: ${galeraSlaves}")
81 } catch (Exception e) {
82 common.errorMsg("Cannot obtain Galera slave minions list.")
Martin Polreich7c8ac9a2019-05-16 13:41:09 +020083 common.errorMsg(e.getMessage())
Martin Polreichf89f9b42019-05-07 15:37:13 +020084 return 129
85 }
86 for (minion in galeraSlaves) {
87 try {
88 salt.minionsReachable(env, "I@salt:master", minion)
89 testNode = minion
90 break
91 } catch (Exception e) {
92 common.warningMsg("Slave '${minion}' is not reachable.")
93 }
94 }
95 }
96 if (!testNode) {
97 common.errorMsg("No Galera slave was reachable.")
98 return 130
99 }
100 if (checkTimeSync && !salt.checkClusterTimeSync(env, "I@galera:master or I@galera:slave")) {
101 common.errorMsg("Time in cluster is desynchronized or it couldn't be detemined. You should fix this issue manually before proceeding.")
102 return 131
103 }
104 try {
105 out = salt.runSaltProcessStep(env, "${testNode}", "mysql.status", [], null, false)
106 } catch (Exception e) {
107 common.errorMsg('Could not determine mysql status.')
Martin Polreich7c8ac9a2019-05-16 13:41:09 +0200108 common.errorMsg(e.getMessage())
Martin Polreichf89f9b42019-05-07 15:37:13 +0200109 return 256
110 }
111 if (out) {
112 try {
113 status = validateAndPrintGaleraStatusReport(env, out, testNode)
114 } catch (Exception e) {
115 common.errorMsg('Could not parse the mysql status output. Check it manually.')
Martin Polreich7c8ac9a2019-05-16 13:41:09 +0200116 common.errorMsg(e.getMessage())
Martin Polreichf89f9b42019-05-07 15:37:13 +0200117 return 1
118 }
119 } else {
120 common.errorMsg("Mysql status response unrecognized or is empty. Response: ${out}")
121 return 1024
122 }
123 if (status == "OK") {
124 common.infoMsg("No errors found - MySQL status is ${status}.")
125 return 0
126 } else if (status == "unknown") {
127 common.warningMsg('MySQL status cannot be detemined')
128 return 1
129 } else {
130 common.errorMsg("Errors found.")
131 return 2
132 }
133}
134
135/** Validates and prints result of verifyGaleraStatus function
136@param env Salt Connection object or pepperEnv
137@param out Output of the mysql.status Salt function
138@return status "OK", "ERROR" or "uknown" depending on result of validation
139*/
140
141def validateAndPrintGaleraStatusReport(env, out, minion) {
142 def salt = new com.mirantis.mk.Salt()
143 def common = new com.mirantis.mk.Common()
144 if (minion == "I@galera:master") {
145 role = "master"
146 } else {
147 role = "slave"
148 }
149 sizeOut = salt.getReturnValues(salt.getPillar(env, minion, "galera:${role}:members"))
150 expected_cluster_size = sizeOut.size()
151 outlist = out['return'][0]
152 resultYaml = outlist.get(outlist.keySet()[0]).sort()
153 common.prettyPrint(resultYaml)
154 parameters = [
155 wsrep_cluster_status: [title: 'Cluster status', expectedValues: ['Primary'], description: ''],
156 wsrep_cluster_size: [title: 'Current cluster size', expectedValues: [expected_cluster_size], description: ''],
157 wsrep_ready: [title: 'Node status', expectedValues: ['ON', true], description: ''],
158 wsrep_local_state_comment: [title: 'Node status comment', expectedValues: ['Joining', 'Waiting on SST', 'Joined', 'Synced', 'Donor'], description: ''],
159 wsrep_connected: [title: 'Node connectivity', expectedValues: ['ON', true], description: ''],
160 wsrep_local_recv_queue_avg: [title: 'Average size of local reveived queue', expectedThreshold: [warn: 0.5, error: 1.0], description: '(Value above 0 means that the node cannot apply write-sets as fast as it receives them, which can lead to replication throttling)'],
161 wsrep_local_send_queue_avg: [title: 'Average size of local send queue', expectedThreshold: [warn: 0.5, error: 1.0], description: '(Value above 0 indicate replication throttling or network throughput issues, such as a bottleneck on the network link.)']
162 ]
163 for (key in parameters.keySet()) {
164 value = resultYaml[key]
165 if (value instanceof String && value.isBigDecimal()) {
166 value = value.toBigDecimal()
167 }
168 parameters.get(key) << [actualValue: value]
169 }
170 for (key in parameters.keySet()) {
171 param = parameters.get(key)
172 if (key == 'wsrep_local_recv_queue_avg' || key == 'wsrep_local_send_queue_avg') {
Martin Polreichfb026be2019-05-16 13:36:23 +0200173 if (param.get('actualValue') == null || (param.get('actualValue') > param.get('expectedThreshold').get('error'))) {
Martin Polreichf89f9b42019-05-07 15:37:13 +0200174 param << [match: 'error']
175 } else if (param.get('actualValue') > param.get('expectedThreshold').get('warn')) {
176 param << [match: 'warn']
177 } else {
178 param << [match: 'ok']
179 }
180 } else {
181 for (expValue in param.get('expectedValues')) {
182 if (expValue == param.get('actualValue')) {
183 param << [match: 'ok']
184 break
185 } else {
186 param << [match: 'error']
187 }
188 }
189 }
190 }
191 cluster_info_report = []
192 cluster_warning_report = []
193 cluster_error_report = []
194 for (key in parameters.keySet()) {
195 param = parameters.get(key)
196 if (param.containsKey('expectedThreshold')) {
197 expValues = "below ${param.get('expectedThreshold').get('warn')}"
198 } else {
199 if (param.get('expectedValues').size() > 1) {
200 expValues = param.get('expectedValues').join(' or ')
201 } else {
202 expValues = param.get('expectedValues')[0]
203 }
204 }
205 reportString = "${param.title}: ${param.actualValue} (Expected: ${expValues}) ${param.description}"
206 if (param.get('match').equals('ok')) {
207 cluster_info_report.add("[OK ] ${reportString}")
208 } else if (param.get('match').equals('warn')) {
209 cluster_warning_report.add("[WARNING] ${reportString}")
210 } else {
211 cluster_error_report.add("[ ERROR] ${reportString})")
212 }
213 }
214 common.infoMsg("CLUSTER STATUS REPORT: ${cluster_info_report.size()} expected values, ${cluster_warning_report.size()} warnings and ${cluster_error_report.size()} error found:")
215 if (cluster_info_report.size() > 0) {
216 common.infoMsg(cluster_info_report.join('\n'))
217 }
218 if (cluster_warning_report.size() > 0) {
219 common.warningMsg(cluster_warning_report.join('\n'))
220 }
221 if (cluster_error_report.size() > 0) {
222 common.errorMsg(cluster_error_report.join('\n'))
223 return "ERROR"
224 } else {
225 return "OK"
226 }
227}
228
229def getGaleraLastShutdownNode(env) {
230 def salt = new com.mirantis.mk.Salt()
231 def common = new com.mirantis.mk.Common()
232 members = ''
233 lastNode = [ip: '', seqno: -2]
234 try {
235 members = salt.getReturnValues(salt.getPillar(env, "I@galera:master", "galera:master:members"))
Martin Polreich7c8ac9a2019-05-16 13:41:09 +0200236 } catch (Exception e) {
Martin Polreichf89f9b42019-05-07 15:37:13 +0200237 common.errorMsg('Could not retrieve members list')
Martin Polreich7c8ac9a2019-05-16 13:41:09 +0200238 common.errorMsg(e.getMessage())
Martin Polreichf89f9b42019-05-07 15:37:13 +0200239 return 'I@galera:master'
240 }
241 if (members) {
242 for (member in members) {
243 try {
244 salt.minionsReachable(env, 'I@salt:master', "S@${member.host}")
245 out = salt.getReturnValues(salt.cmdRun(env, "S@${member.host}", 'cat /var/lib/mysql/grastate.dat | grep "seqno" | cut -d ":" -f2', true, null, false))
246 seqno = out.tokenize('\n')[0].trim()
247 if (seqno.isNumber()) {
248 seqno = seqno.toInteger()
249 } else {
250 seqno = -2
251 }
252 highestSeqno = lastNode.get('seqno')
253 if (seqno > highestSeqno) {
254 lastNode << [ip: "${member.host}", seqno: seqno]
255 }
Martin Polreich7c8ac9a2019-05-16 13:41:09 +0200256 } catch (Exception e) {
Martin Polreichf89f9b42019-05-07 15:37:13 +0200257 common.warningMsg("Could not determine 'seqno' value for node ${member.host} ")
Martin Polreich7c8ac9a2019-05-16 13:41:09 +0200258 common.warningMsg(e.getMessage())
Martin Polreichf89f9b42019-05-07 15:37:13 +0200259 }
260 }
261 }
262 if (lastNode.get('ip') != '') {
263 return "S@${lastNode.ip}"
264 } else {
265 return "I@galera:master"
266 }
267}
268
269/**
270 * Restores Galera database
271 * @param env Salt Connection object or pepperEnv
272 * @return output of salt commands
273 */
274def restoreGaleraDb(env) {
275 def salt = new com.mirantis.mk.Salt()
276 def common = new com.mirantis.mk.Common()
277 try {
278 salt.runSaltProcessStep(env, 'I@galera:slave', 'service.stop', ['mysql'])
279 } catch (Exception er) {
280 common.warningMsg('Mysql service already stopped')
281 }
282 try {
283 salt.runSaltProcessStep(env, 'I@galera:master', 'service.stop', ['mysql'])
284 } catch (Exception er) {
285 common.warningMsg('Mysql service already stopped')
286 }
287 lastNodeTarget = getGaleraLastShutdownNode(env)
288 try {
289 salt.cmdRun(env, 'I@galera:slave', "rm /var/lib/mysql/ib_logfile*")
290 } catch (Exception er) {
291 common.warningMsg('Files are not present')
292 }
293 try {
294 salt.cmdRun(env, 'I@galera:slave', "rm /var/lib/mysql/grastate.dat")
295 } catch (Exception er) {
296 common.warningMsg('Files are not present')
297 }
298 try {
299 salt.cmdRun(env, lastNodeTarget, "mkdir /root/mysql/mysql.bak")
300 } catch (Exception er) {
301 common.warningMsg('Directory already exists')
302 }
303 try {
304 salt.cmdRun(env, lastNodeTarget, "rm -rf /root/mysql/mysql.bak/*")
305 } catch (Exception er) {
306 common.warningMsg('Directory already empty')
307 }
308 try {
309 salt.cmdRun(env, lastNodeTarget, "mv /var/lib/mysql/* /root/mysql/mysql.bak")
310 } catch (Exception er) {
311 common.warningMsg('Files were already moved')
312 }
313 try {
314 salt.runSaltProcessStep(env, lastNodeTarget, 'file.remove', ["/var/lib/mysql/.galera_bootstrap"])
315 } catch (Exception er) {
316 common.warningMsg('File is not present')
317 }
318 salt.cmdRun(env, lastNodeTarget, "sed -i '/gcomm/c\\wsrep_cluster_address=\"gcomm://\"' /etc/mysql/my.cnf")
319 def backup_dir = salt.getReturnValues(salt.getPillar(env, lastNodeTarget, 'xtrabackup:client:backup_dir'))
320 if(backup_dir == null || backup_dir.isEmpty()) { backup_dir='/var/backups/mysql/xtrabackup' }
321 salt.runSaltProcessStep(env, lastNodeTarget, 'file.remove', ["${backup_dir}/dbrestored"])
322 salt.cmdRun(env, 'I@xtrabackup:client', "su root -c 'salt-call state.sls xtrabackup'")
323 salt.runSaltProcessStep(env, lastNodeTarget, 'service.start', ['mysql'])
324
325 // wait until mysql service on galera master is up
326 try {
327 salt.commandStatus(env, lastNodeTarget, 'service mysql status', 'running')
328 } catch (Exception er) {
329 input message: "Database is not running please fix it first and only then click on PROCEED."
330 }
331
332 salt.runSaltProcessStep(env, "I@galera:master and not ${lastNodeTarget}", 'service.start', ['mysql'])
333 salt.runSaltProcessStep(env, "I@galera:slave and not ${lastNodeTarget}", 'service.start', ['mysql'])
334}