blob: 5178ce25ffa5fd1c097c3b92cbafb9a6674f796f [file] [log] [blame]
Martin Polreich8f0f3ac2019-02-15 10:03:33 +01001package com.mirantis.mk
2
3/**
4 *
5 * Galera functions
6 *
7 */
8
9
10/**
11 * Returns parameters from mysql.status output on given target node
12 *
13 * @param env Salt Connection object or pepperEnv
14 * @param target Targeted node
15 * @param parameters Parameters to be retruned (String or list of Strings). If no parameters are provided or is set to '[]', it returns all of them.
16 * @return result List of parameters with its values
17 */
18
19def getWsrepParameters(env, target, parameters=[], print=false) {
20 result = []
21 out = salt.runSaltProcessStep(env, "${target}", "mysql.status", [], null, false)
22 outlist = out['return'][0]
23 resultYaml = outlist.get(outlist.keySet()[0]).sort()
24 if (print) {
25 common.prettyPrint(resultYaml)
26 }
27 if (parameters instanceof String) {
Martin Polreich1fbda522019-02-26 14:46:33 +010028 parameters = [parameters]
29 }
30 if (parameters == [] || parameters == ['']) {
31 result = resultYaml
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010032 } else {
Martin Polreich1fbda522019-02-26 14:46:33 +010033 for (key in parameters) {
34 value = resultYaml[key]
35 if (value instanceof String && value.isBigDecimal()) {
36 value = value.toBigDecimal()
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010037 }
Martin Polreich1fbda522019-02-26 14:46:33 +010038 result = ["${key}": value] + result
39 }
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010040 }
41 return result
42}
43
44/**
45 * Verifies Galera database
46 *
47 * This function checks for Galera master, tests connection and if reachable, it obtains the result
48 * of Salt mysql.status function. The result is then parsed, validated and outputed to the user.
49 *
50 * @param env Salt Connection object or pepperEnv
51 * @param slave Boolean value to enable slave checking (if master in unreachable)
52 * @param checkTimeSync Boolean value to enable time sync check
53 * @return resultCode int values used to determine exit status in the calling function
54 */
55def verifyGaleraStatus(env, slave=false, checkTimeSync=false) {
56 def salt = new com.mirantis.mk.Salt()
57 def common = new com.mirantis.mk.Common()
58 def out = ""
59 def status = "unknown"
60 def testNode = ""
61 if (!slave) {
62 try {
63 galeraMaster = salt.getMinions(env, "I@galera:master")
64 common.infoMsg("Current Galera master is: ${galeraMaster}")
65 salt.minionsReachable(env, "I@salt:master", "I@galera:master")
66 testNode = "I@galera:master"
67 } catch (Exception e) {
68 common.errorMsg('Galera master is not reachable.')
69 return 128
70 }
71 } else {
72 try {
73 galeraSlaves = salt.getMinions(env, "I@galera:slave")
74 common.infoMsg("Testing Galera slave minions: ${galeraSlaves}")
75 } catch (Exception e) {
76 common.errorMsg("Cannot obtain Galera slave minions list.")
77 return 129
78 }
Martin Polreich5df75782019-02-19 14:29:24 +010079 for (minion in galeraSlaves) {
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010080 try {
Martin Polreich5df75782019-02-19 14:29:24 +010081 salt.minionsReachable(env, "I@salt:master", minion)
82 testNode = minion
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010083 break
84 } catch (Exception e) {
Martin Polreich5df75782019-02-19 14:29:24 +010085 common.warningMsg("Slave '${minion}' is not reachable.")
Martin Polreich8f0f3ac2019-02-15 10:03:33 +010086 }
87 }
88 }
89 if (!testNode) {
90 common.errorMsg("No Galera slave was reachable.")
91 return 130
92 }
93 if (checkTimeSync && !salt.checkClusterTimeSync(env, "I@galera:master or I@galera:slave")) {
94 common.errorMsg("Time in cluster is desynchronized or it couldn't be detemined. You should fix this issue manually before proceeding.")
95 return 131
96 }
97 try {
98 out = salt.runSaltProcessStep(env, "${testNode}", "mysql.status", [], null, false)
99 } catch (Exception e) {
100 common.errorMsg('Could not determine mysql status.')
101 return 256
102 }
103 if (out) {
104 try {
105 status = validateAndPrintGaleraStatusReport(env, out, testNode)
106 } catch (Exception e) {
107 common.errorMsg('Could not parse the mysql status output. Check it manually.')
108 return 1
109 }
110 } else {
111 common.errorMsg("Mysql status response unrecognized or is empty. Response: ${out}")
112 return 1024
113 }
114 if (status == "OK") {
115 common.infoMsg("No errors found - MySQL status is ${status}.")
116 return 0
117 } else if (status == "unknown") {
118 common.warningMsg('MySQL status cannot be detemined')
119 return 1
120 } else {
121 common.errorMsg("Errors found.")
122 return 2
123 }
124}
125
126/** Validates and prints result of verifyGaleraStatus function
127@param env Salt Connection object or pepperEnv
128@param out Output of the mysql.status Salt function
129@return status "OK", "ERROR" or "uknown" depending on result of validation
130*/
131
132def validateAndPrintGaleraStatusReport(env, out, minion) {
133 def salt = new com.mirantis.mk.Salt()
134 def common = new com.mirantis.mk.Common()
135 if (minion == "I@galera:master") {
136 role = "master"
137 } else {
138 role = "slave"
139 }
140 sizeOut = salt.getReturnValues(salt.getPillar(env, minion, "galera:${role}:members"))
141 expected_cluster_size = sizeOut.size()
142 outlist = out['return'][0]
143 resultYaml = outlist.get(outlist.keySet()[0]).sort()
144 common.prettyPrint(resultYaml)
145 parameters = [
146 wsrep_cluster_status: [title: 'Cluster status', expectedValues: ['Primary'], description: ''],
147 wsrep_cluster_size: [title: 'Current cluster size', expectedValues: [expected_cluster_size], description: ''],
148 wsrep_ready: [title: 'Node status', expectedValues: ['ON', true], description: ''],
149 wsrep_local_state_comment: [title: 'Node status comment', expectedValues: ['Joining', 'Waiting on SST', 'Joined', 'Synced', 'Donor'], description: ''],
150 wsrep_connected: [title: 'Node connectivity', expectedValues: ['ON', true], description: ''],
151 wsrep_local_recv_queue_avg: [title: 'Average size of local reveived queue', expectedThreshold: [warn: 0.5, error: 1.0], description: '(Value above 0 means that the node cannot apply write-sets as fast as it receives them, which can lead to replication throttling)'],
152 wsrep_local_send_queue_avg: [title: 'Average size of local send queue', expectedThreshold: [warn: 0.5, error: 1.0], description: '(Value above 0 indicate replication throttling or network throughput issues, such as a bottleneck on the network link.)']
153 ]
154 for (key in parameters.keySet()) {
155 value = resultYaml[key]
156 if (value instanceof String && value.isBigDecimal()) {
157 value = value.toBigDecimal()
158 }
159 parameters.get(key) << [actualValue: value]
160 }
161 for (key in parameters.keySet()) {
162 param = parameters.get(key)
163 if (key == 'wsrep_local_recv_queue_avg' || key == 'wsrep_local_send_queue_avg') {
Martin Polreich79810262019-02-25 12:51:11 +0100164 if (param.get('actualValue') == null || (param.get('actualValue') > param.get('expectedThreshold').get('error'))) {
Martin Polreich8f0f3ac2019-02-15 10:03:33 +0100165 param << [match: 'error']
166 } else if (param.get('actualValue') > param.get('expectedThreshold').get('warn')) {
167 param << [match: 'warn']
168 } else {
169 param << [match: 'ok']
170 }
171 } else {
172 for (expValue in param.get('expectedValues')) {
173 if (expValue == param.get('actualValue')) {
174 param << [match: 'ok']
175 break
176 } else {
177 param << [match: 'error']
178 }
179 }
180 }
181 }
182 cluster_info_report = []
183 cluster_warning_report = []
184 cluster_error_report = []
185 for (key in parameters.keySet()) {
186 param = parameters.get(key)
187 if (param.containsKey('expectedThreshold')) {
188 expValues = "below ${param.get('expectedThreshold').get('warn')}"
189 } else {
190 if (param.get('expectedValues').size() > 1) {
191 expValues = param.get('expectedValues').join(' or ')
192 } else {
193 expValues = param.get('expectedValues')[0]
194 }
195 }
196 reportString = "${param.title}: ${param.actualValue} (Expected: ${expValues}) ${param.description}"
197 if (param.get('match').equals('ok')) {
198 cluster_info_report.add("[OK ] ${reportString}")
199 } else if (param.get('match').equals('warn')) {
200 cluster_warning_report.add("[WARNING] ${reportString}")
201 } else {
202 cluster_error_report.add("[ ERROR] ${reportString})")
203 }
204 }
205 common.infoMsg("CLUSTER STATUS REPORT: ${cluster_info_report.size()} expected values, ${cluster_warning_report.size()} warnings and ${cluster_error_report.size()} error found:")
206 if (cluster_info_report.size() > 0) {
207 common.infoMsg(cluster_info_report.join('\n'))
208 }
209 if (cluster_warning_report.size() > 0) {
210 common.warningMsg(cluster_warning_report.join('\n'))
211 }
212 if (cluster_error_report.size() > 0) {
213 common.errorMsg(cluster_error_report.join('\n'))
214 return "ERROR"
215 } else {
216 return "OK"
217 }
218}
219
220def getGaleraLastShutdownNode(env) {
221 def salt = new com.mirantis.mk.Salt()
222 def common = new com.mirantis.mk.Common()
223 members = ''
224 lastNode = [ip: '', seqno: -2]
225 try {
226 members = salt.getReturnValues(salt.getPillar(env, "I@galera:master", "galera:master:members"))
227 } catch (Exception er) {
228 common.errorMsg('Could not retrieve members list')
229 return 'I@galera:master'
230 }
231 if (members) {
232 for (member in members) {
233 try {
234 salt.minionsReachable(env, 'I@salt:master', "S@${member.host}")
235 out = salt.getReturnValues(salt.cmdRun(env, "S@${member.host}", 'cat /var/lib/mysql/grastate.dat | grep "seqno" | cut -d ":" -f2', true, null, false))
236 seqno = out.tokenize('\n')[0].trim()
237 if (seqno.isNumber()) {
238 seqno = seqno.toInteger()
239 } else {
240 seqno = -2
241 }
242 highestSeqno = lastNode.get('seqno')
243 if (seqno > highestSeqno) {
244 lastNode << [ip: "${member.host}", seqno: seqno]
245 }
246 } catch (Exception er) {
247 common.warningMsg("Could not determine 'seqno' value for node ${member.host} ")
248 }
249 }
250 }
251 if (lastNode.get('ip') != '') {
252 return "S@${lastNode.ip}"
253 } else {
254 return "I@galera:master"
255 }
256}
257
258/**
259 * Restores Galera database
260 * @param env Salt Connection object or pepperEnv
261 * @return output of salt commands
262 */
263def restoreGaleraDb(env) {
264 def salt = new com.mirantis.mk.Salt()
265 def common = new com.mirantis.mk.Common()
266 try {
267 salt.runSaltProcessStep(env, 'I@galera:slave', 'service.stop', ['mysql'])
268 } catch (Exception er) {
269 common.warningMsg('Mysql service already stopped')
270 }
271 try {
272 salt.runSaltProcessStep(env, 'I@galera:master', 'service.stop', ['mysql'])
273 } catch (Exception er) {
274 common.warningMsg('Mysql service already stopped')
275 }
276 lastNodeTarget = getGaleraLastShutdownNode(env)
277 try {
278 salt.cmdRun(env, 'I@galera:slave', "rm /var/lib/mysql/ib_logfile*")
279 } catch (Exception er) {
280 common.warningMsg('Files are not present')
281 }
282 try {
283 salt.cmdRun(env, 'I@galera:slave', "rm /var/lib/mysql/grastate.dat")
284 } catch (Exception er) {
285 common.warningMsg('Files are not present')
286 }
287 try {
288 salt.cmdRun(env, lastNodeTarget, "mkdir /root/mysql/mysql.bak")
289 } catch (Exception er) {
290 common.warningMsg('Directory already exists')
291 }
292 try {
293 salt.cmdRun(env, lastNodeTarget, "rm -rf /root/mysql/mysql.bak/*")
294 } catch (Exception er) {
295 common.warningMsg('Directory already empty')
296 }
297 try {
298 salt.cmdRun(env, lastNodeTarget, "mv /var/lib/mysql/* /root/mysql/mysql.bak")
299 } catch (Exception er) {
300 common.warningMsg('Files were already moved')
301 }
302 try {
303 salt.runSaltProcessStep(env, lastNodeTarget, 'file.remove', ["/var/lib/mysql/.galera_bootstrap"])
304 } catch (Exception er) {
305 common.warningMsg('File is not present')
306 }
307 salt.cmdRun(env, lastNodeTarget, "sed -i '/gcomm/c\\wsrep_cluster_address=\"gcomm://\"' /etc/mysql/my.cnf")
308 def backup_dir = salt.getReturnValues(salt.getPillar(env, lastNodeTarget, 'xtrabackup:client:backup_dir'))
309 if(backup_dir == null || backup_dir.isEmpty()) { backup_dir='/var/backups/mysql/xtrabackup' }
310 salt.runSaltProcessStep(env, lastNodeTarget, 'file.remove', ["${backup_dir}/dbrestored"])
311 salt.cmdRun(env, 'I@xtrabackup:client', "su root -c 'salt-call state.sls xtrabackup'")
312 salt.runSaltProcessStep(env, lastNodeTarget, 'service.start', ['mysql'])
313
314 // wait until mysql service on galera master is up
315 try {
316 salt.commandStatus(env, lastNodeTarget, 'service mysql status', 'running')
317 } catch (Exception er) {
318 input message: "Database is not running please fix it first and only then click on PROCEED."
319 }
320
321 salt.runSaltProcessStep(env, "I@galera:master and not ${lastNodeTarget}", 'service.start', ['mysql'])
322 salt.runSaltProcessStep(env, "I@galera:slave and not ${lastNodeTarget}", 'service.start', ['mysql'])
323}