blob: 5a15823564e157278d97ee9dc6df5b69d918ff27 [file] [log] [blame]
Martin Polreich8f0f3ac2019-02-15 10:03:33 +01001package com.mirantis.mk
2
3/**
4 *
5 * Galera functions
6 *
7 */
8
9
10/**
11 * Returns parameters from mysql.status output on given target node
12 *
13 * @param env Salt Connection object or pepperEnv
14 * @param target Targeted node
15 * @param parameters Parameters to be retruned (String or list of Strings). If no parameters are provided or is set to '[]', it returns all of them.
16 * @return result List of parameters with its values
17 */
18
19def getWsrepParameters(env, target, parameters=[], print=false) {
Martin Polreich1281cde2019-02-28 11:39:49 +010020 def salt = new com.mirantis.mk.Salt()
21 def common = new com.mirantis.mk.Common()
22 result = [:]
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010023 out = salt.runSaltProcessStep(env, "${target}", "mysql.status", [], null, false)
24 outlist = out['return'][0]
25 resultYaml = outlist.get(outlist.keySet()[0]).sort()
26 if (print) {
27 common.prettyPrint(resultYaml)
28 }
29 if (parameters instanceof String) {
Martin Polreich1fbda522019-02-26 14:46:33 +010030 parameters = [parameters]
31 }
32 if (parameters == [] || parameters == ['']) {
33 result = resultYaml
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010034 } else {
Martin Polreich1281cde2019-02-28 11:39:49 +010035 for (String param in parameters) {
36 value = resultYaml[param]
Martin Polreich1fbda522019-02-26 14:46:33 +010037 if (value instanceof String && value.isBigDecimal()) {
38 value = value.toBigDecimal()
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010039 }
Martin Polreich1281cde2019-02-28 11:39:49 +010040 result[param] = value
Martin Polreich1fbda522019-02-26 14:46:33 +010041 }
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010042 }
43 return result
44}
45
46/**
47 * Verifies Galera database
48 *
49 * This function checks for Galera master, tests connection and if reachable, it obtains the result
50 * of Salt mysql.status function. The result is then parsed, validated and outputed to the user.
51 *
52 * @param env Salt Connection object or pepperEnv
53 * @param slave Boolean value to enable slave checking (if master in unreachable)
54 * @param checkTimeSync Boolean value to enable time sync check
55 * @return resultCode int values used to determine exit status in the calling function
56 */
57def verifyGaleraStatus(env, slave=false, checkTimeSync=false) {
58 def salt = new com.mirantis.mk.Salt()
59 def common = new com.mirantis.mk.Common()
60 def out = ""
61 def status = "unknown"
62 def testNode = ""
63 if (!slave) {
64 try {
65 galeraMaster = salt.getMinions(env, "I@galera:master")
66 common.infoMsg("Current Galera master is: ${galeraMaster}")
67 salt.minionsReachable(env, "I@salt:master", "I@galera:master")
68 testNode = "I@galera:master"
69 } catch (Exception e) {
70 common.errorMsg('Galera master is not reachable.')
71 return 128
72 }
73 } else {
74 try {
75 galeraSlaves = salt.getMinions(env, "I@galera:slave")
76 common.infoMsg("Testing Galera slave minions: ${galeraSlaves}")
77 } catch (Exception e) {
78 common.errorMsg("Cannot obtain Galera slave minions list.")
79 return 129
80 }
Martin Polreich5df75782019-02-19 14:29:24 +010081 for (minion in galeraSlaves) {
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010082 try {
Martin Polreich5df75782019-02-19 14:29:24 +010083 salt.minionsReachable(env, "I@salt:master", minion)
84 testNode = minion
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010085 break
86 } catch (Exception e) {
Martin Polreich5df75782019-02-19 14:29:24 +010087 common.warningMsg("Slave '${minion}' is not reachable.")
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010088 }
89 }
90 }
91 if (!testNode) {
92 common.errorMsg("No Galera slave was reachable.")
93 return 130
94 }
95 if (checkTimeSync && !salt.checkClusterTimeSync(env, "I@galera:master or I@galera:slave")) {
96 common.errorMsg("Time in cluster is desynchronized or it couldn't be detemined. You should fix this issue manually before proceeding.")
97 return 131
98 }
99 try {
100 out = salt.runSaltProcessStep(env, "${testNode}", "mysql.status", [], null, false)
101 } catch (Exception e) {
102 common.errorMsg('Could not determine mysql status.')
103 return 256
104 }
105 if (out) {
106 try {
107 status = validateAndPrintGaleraStatusReport(env, out, testNode)
108 } catch (Exception e) {
109 common.errorMsg('Could not parse the mysql status output. Check it manually.')
110 return 1
111 }
112 } else {
113 common.errorMsg("Mysql status response unrecognized or is empty. Response: ${out}")
114 return 1024
115 }
116 if (status == "OK") {
117 common.infoMsg("No errors found - MySQL status is ${status}.")
118 return 0
119 } else if (status == "unknown") {
120 common.warningMsg('MySQL status cannot be detemined')
121 return 1
122 } else {
123 common.errorMsg("Errors found.")
124 return 2
125 }
126}
127
128/** Validates and prints result of verifyGaleraStatus function
129@param env Salt Connection object or pepperEnv
130@param out Output of the mysql.status Salt function
131@return status "OK", "ERROR" or "uknown" depending on result of validation
132*/
133
134def validateAndPrintGaleraStatusReport(env, out, minion) {
135 def salt = new com.mirantis.mk.Salt()
136 def common = new com.mirantis.mk.Common()
137 if (minion == "I@galera:master") {
138 role = "master"
139 } else {
140 role = "slave"
141 }
142 sizeOut = salt.getReturnValues(salt.getPillar(env, minion, "galera:${role}:members"))
143 expected_cluster_size = sizeOut.size()
144 outlist = out['return'][0]
145 resultYaml = outlist.get(outlist.keySet()[0]).sort()
146 common.prettyPrint(resultYaml)
147 parameters = [
148 wsrep_cluster_status: [title: 'Cluster status', expectedValues: ['Primary'], description: ''],
149 wsrep_cluster_size: [title: 'Current cluster size', expectedValues: [expected_cluster_size], description: ''],
150 wsrep_ready: [title: 'Node status', expectedValues: ['ON', true], description: ''],
151 wsrep_local_state_comment: [title: 'Node status comment', expectedValues: ['Joining', 'Waiting on SST', 'Joined', 'Synced', 'Donor'], description: ''],
152 wsrep_connected: [title: 'Node connectivity', expectedValues: ['ON', true], description: ''],
153 wsrep_local_recv_queue_avg: [title: 'Average size of local reveived queue', expectedThreshold: [warn: 0.5, error: 1.0], description: '(Value above 0 means that the node cannot apply write-sets as fast as it receives them, which can lead to replication throttling)'],
154 wsrep_local_send_queue_avg: [title: 'Average size of local send queue', expectedThreshold: [warn: 0.5, error: 1.0], description: '(Value above 0 indicate replication throttling or network throughput issues, such as a bottleneck on the network link.)']
155 ]
156 for (key in parameters.keySet()) {
157 value = resultYaml[key]
158 if (value instanceof String && value.isBigDecimal()) {
159 value = value.toBigDecimal()
160 }
161 parameters.get(key) << [actualValue: value]
162 }
163 for (key in parameters.keySet()) {
164 param = parameters.get(key)
165 if (key == 'wsrep_local_recv_queue_avg' || key == 'wsrep_local_send_queue_avg') {
Martin Polreich79810262019-02-25 12:51:11 +0100166 if (param.get('actualValue') == null || (param.get('actualValue') > param.get('expectedThreshold').get('error'))) {
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100167 param << [match: 'error']
168 } else if (param.get('actualValue') > param.get('expectedThreshold').get('warn')) {
169 param << [match: 'warn']
170 } else {
171 param << [match: 'ok']
172 }
173 } else {
174 for (expValue in param.get('expectedValues')) {
175 if (expValue == param.get('actualValue')) {
176 param << [match: 'ok']
177 break
178 } else {
179 param << [match: 'error']
180 }
181 }
182 }
183 }
184 cluster_info_report = []
185 cluster_warning_report = []
186 cluster_error_report = []
187 for (key in parameters.keySet()) {
188 param = parameters.get(key)
189 if (param.containsKey('expectedThreshold')) {
190 expValues = "below ${param.get('expectedThreshold').get('warn')}"
191 } else {
192 if (param.get('expectedValues').size() > 1) {
193 expValues = param.get('expectedValues').join(' or ')
194 } else {
195 expValues = param.get('expectedValues')[0]
196 }
197 }
198 reportString = "${param.title}: ${param.actualValue} (Expected: ${expValues}) ${param.description}"
199 if (param.get('match').equals('ok')) {
200 cluster_info_report.add("[OK ] ${reportString}")
201 } else if (param.get('match').equals('warn')) {
202 cluster_warning_report.add("[WARNING] ${reportString}")
203 } else {
204 cluster_error_report.add("[ ERROR] ${reportString})")
205 }
206 }
207 common.infoMsg("CLUSTER STATUS REPORT: ${cluster_info_report.size()} expected values, ${cluster_warning_report.size()} warnings and ${cluster_error_report.size()} error found:")
208 if (cluster_info_report.size() > 0) {
209 common.infoMsg(cluster_info_report.join('\n'))
210 }
211 if (cluster_warning_report.size() > 0) {
212 common.warningMsg(cluster_warning_report.join('\n'))
213 }
214 if (cluster_error_report.size() > 0) {
215 common.errorMsg(cluster_error_report.join('\n'))
216 return "ERROR"
217 } else {
218 return "OK"
219 }
220}
221
Martin Polreich1281cde2019-02-28 11:39:49 +0100222/** Returns last shutdown node of Galera cluster
223@param env Salt Connection object or pepperEnv
224@param nodes List of nodes to check only (defaults to []). If not provided, it will check all nodes.
225 Use this parameter if the cluster splits to several components and you only want to check one fo them.
226@return status ip address or hostname of last shutdown node
227*/
228
229def getGaleraLastShutdownNode(env, nodes = []) {
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100230 def salt = new com.mirantis.mk.Salt()
231 def common = new com.mirantis.mk.Common()
Martin Polreich1281cde2019-02-28 11:39:49 +0100232 members = []
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100233 lastNode = [ip: '', seqno: -2]
234 try {
Martin Polreich1281cde2019-02-28 11:39:49 +0100235 if (nodes) {
236 nodes = salt.getIPAddressesForNodenames(env, nodes)
237 for (node in nodes) {
238 members = [host: "${node.get(node.keySet()[0])}"] + members
239 }
240 } else {
241 members = salt.getReturnValues(salt.getPillar(env, "I@galera:master", "galera:master:members"))
242 }
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100243 } catch (Exception er) {
244 common.errorMsg('Could not retrieve members list')
245 return 'I@galera:master'
246 }
247 if (members) {
248 for (member in members) {
249 try {
250 salt.minionsReachable(env, 'I@salt:master', "S@${member.host}")
251 out = salt.getReturnValues(salt.cmdRun(env, "S@${member.host}", 'cat /var/lib/mysql/grastate.dat | grep "seqno" | cut -d ":" -f2', true, null, false))
252 seqno = out.tokenize('\n')[0].trim()
253 if (seqno.isNumber()) {
254 seqno = seqno.toInteger()
255 } else {
256 seqno = -2
257 }
258 highestSeqno = lastNode.get('seqno')
259 if (seqno > highestSeqno) {
260 lastNode << [ip: "${member.host}", seqno: seqno]
261 }
262 } catch (Exception er) {
263 common.warningMsg("Could not determine 'seqno' value for node ${member.host} ")
264 }
265 }
266 }
267 if (lastNode.get('ip') != '') {
268 return "S@${lastNode.ip}"
269 } else {
270 return "I@galera:master"
271 }
272}
273
274/**
275 * Restores Galera database
276 * @param env Salt Connection object or pepperEnv
277 * @return output of salt commands
278 */
279def restoreGaleraDb(env) {
280 def salt = new com.mirantis.mk.Salt()
281 def common = new com.mirantis.mk.Common()
282 try {
283 salt.runSaltProcessStep(env, 'I@galera:slave', 'service.stop', ['mysql'])
284 } catch (Exception er) {
285 common.warningMsg('Mysql service already stopped')
286 }
287 try {
288 salt.runSaltProcessStep(env, 'I@galera:master', 'service.stop', ['mysql'])
289 } catch (Exception er) {
290 common.warningMsg('Mysql service already stopped')
291 }
292 lastNodeTarget = getGaleraLastShutdownNode(env)
293 try {
294 salt.cmdRun(env, 'I@galera:slave', "rm /var/lib/mysql/ib_logfile*")
295 } catch (Exception er) {
296 common.warningMsg('Files are not present')
297 }
298 try {
299 salt.cmdRun(env, 'I@galera:slave', "rm /var/lib/mysql/grastate.dat")
300 } catch (Exception er) {
301 common.warningMsg('Files are not present')
302 }
303 try {
304 salt.cmdRun(env, lastNodeTarget, "mkdir /root/mysql/mysql.bak")
305 } catch (Exception er) {
306 common.warningMsg('Directory already exists')
307 }
308 try {
309 salt.cmdRun(env, lastNodeTarget, "rm -rf /root/mysql/mysql.bak/*")
310 } catch (Exception er) {
311 common.warningMsg('Directory already empty')
312 }
313 try {
314 salt.cmdRun(env, lastNodeTarget, "mv /var/lib/mysql/* /root/mysql/mysql.bak")
315 } catch (Exception er) {
316 common.warningMsg('Files were already moved')
317 }
318 try {
319 salt.runSaltProcessStep(env, lastNodeTarget, 'file.remove', ["/var/lib/mysql/.galera_bootstrap"])
320 } catch (Exception er) {
321 common.warningMsg('File is not present')
322 }
323 salt.cmdRun(env, lastNodeTarget, "sed -i '/gcomm/c\\wsrep_cluster_address=\"gcomm://\"' /etc/mysql/my.cnf")
324 def backup_dir = salt.getReturnValues(salt.getPillar(env, lastNodeTarget, 'xtrabackup:client:backup_dir'))
325 if(backup_dir == null || backup_dir.isEmpty()) { backup_dir='/var/backups/mysql/xtrabackup' }
326 salt.runSaltProcessStep(env, lastNodeTarget, 'file.remove', ["${backup_dir}/dbrestored"])
327 salt.cmdRun(env, 'I@xtrabackup:client', "su root -c 'salt-call state.sls xtrabackup'")
Pavel Cizinsky154c1172019-03-11 10:30:05 +0100328 salt.enforceState(env, lastNodeTarget, 'galera')
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100329 // wait until mysql service on galera master is up
330 try {
331 salt.commandStatus(env, lastNodeTarget, 'service mysql status', 'running')
332 } catch (Exception er) {
333 input message: "Database is not running please fix it first and only then click on PROCEED."
334 }
335
336 salt.runSaltProcessStep(env, "I@galera:master and not ${lastNodeTarget}", 'service.start', ['mysql'])
337 salt.runSaltProcessStep(env, "I@galera:slave and not ${lastNodeTarget}", 'service.start', ['mysql'])
338}