blob: 96777a7cac51d74c77284e0c8416716617585620 [file] [log] [blame]
Martin Polreichf89f9b42019-05-07 15:37:13 +02001package com.mirantis.mk
2
3/**
4 *
5 * Galera functions
6 *
7 */
8
9
10/**
11 * Returns parameters from mysql.status output on given target node
12 *
13 * @param env Salt Connection object or pepperEnv
14 * @param target Targeted node
15 * @param parameters Parameters to be retruned (String or list of Strings). If no parameters are provided or is set to '[]', it returns all of them.
16 * @return result List of parameters with its values
17 */
18
19def getWsrepParameters(env, target, parameters=[], print=false) {
20 result = []
21 out = salt.runSaltProcessStep(env, "${target}", "mysql.status", [], null, false)
22 outlist = out['return'][0]
23 resultYaml = outlist.get(outlist.keySet()[0]).sort()
24 if (print) {
25 common.prettyPrint(resultYaml)
26 }
27 if (parameters instanceof String) {
28 value = resultYaml[key]
29 if (value instanceof String && value.isBigDecimal()) {
30 value = value.toBigDecimal()
31 }
32 result = [key: value]
33 } else {
34 if (parameters == []) {
35 result = resultYaml
36 } else {
37 for (key in parameters) {
38 value = resultYaml[key]
39 if (value instanceof String && value.isBigDecimal()) {
40 value = value.toBigDecimal()
41 }
42 result << [key: value]
43 }
44 }
45 }
46 return result
47}
48
49/**
50 * Verifies Galera database
51 *
52 * This function checks for Galera master, tests connection and if reachable, it obtains the result
53 * of Salt mysql.status function. The result is then parsed, validated and outputed to the user.
54 *
55 * @param env Salt Connection object or pepperEnv
56 * @param slave Boolean value to enable slave checking (if master in unreachable)
57 * @param checkTimeSync Boolean value to enable time sync check
58 * @return resultCode int values used to determine exit status in the calling function
59 */
60def verifyGaleraStatus(env, slave=false, checkTimeSync=false) {
61 def salt = new com.mirantis.mk.Salt()
62 def common = new com.mirantis.mk.Common()
63 def out = ""
64 def status = "unknown"
65 def testNode = ""
66 if (!slave) {
67 try {
68 galeraMaster = salt.getMinions(env, "I@galera:master")
69 common.infoMsg("Current Galera master is: ${galeraMaster}")
70 salt.minionsReachable(env, "I@salt:master", "I@galera:master")
71 testNode = "I@galera:master"
72 } catch (Exception e) {
73 common.errorMsg('Galera master is not reachable.')
74 return 128
75 }
76 } else {
77 try {
78 galeraSlaves = salt.getMinions(env, "I@galera:slave")
79 common.infoMsg("Testing Galera slave minions: ${galeraSlaves}")
80 } catch (Exception e) {
81 common.errorMsg("Cannot obtain Galera slave minions list.")
82 return 129
83 }
84 for (minion in galeraSlaves) {
85 try {
86 salt.minionsReachable(env, "I@salt:master", minion)
87 testNode = minion
88 break
89 } catch (Exception e) {
90 common.warningMsg("Slave '${minion}' is not reachable.")
91 }
92 }
93 }
94 if (!testNode) {
95 common.errorMsg("No Galera slave was reachable.")
96 return 130
97 }
98 if (checkTimeSync && !salt.checkClusterTimeSync(env, "I@galera:master or I@galera:slave")) {
99 common.errorMsg("Time in cluster is desynchronized or it couldn't be detemined. You should fix this issue manually before proceeding.")
100 return 131
101 }
102 try {
103 out = salt.runSaltProcessStep(env, "${testNode}", "mysql.status", [], null, false)
104 } catch (Exception e) {
105 common.errorMsg('Could not determine mysql status.')
106 return 256
107 }
108 if (out) {
109 try {
110 status = validateAndPrintGaleraStatusReport(env, out, testNode)
111 } catch (Exception e) {
112 common.errorMsg('Could not parse the mysql status output. Check it manually.')
113 return 1
114 }
115 } else {
116 common.errorMsg("Mysql status response unrecognized or is empty. Response: ${out}")
117 return 1024
118 }
119 if (status == "OK") {
120 common.infoMsg("No errors found - MySQL status is ${status}.")
121 return 0
122 } else if (status == "unknown") {
123 common.warningMsg('MySQL status cannot be detemined')
124 return 1
125 } else {
126 common.errorMsg("Errors found.")
127 return 2
128 }
129}
130
131/** Validates and prints result of verifyGaleraStatus function
132@param env Salt Connection object or pepperEnv
133@param out Output of the mysql.status Salt function
134@return status "OK", "ERROR" or "uknown" depending on result of validation
135*/
136
137def validateAndPrintGaleraStatusReport(env, out, minion) {
138 def salt = new com.mirantis.mk.Salt()
139 def common = new com.mirantis.mk.Common()
140 if (minion == "I@galera:master") {
141 role = "master"
142 } else {
143 role = "slave"
144 }
145 sizeOut = salt.getReturnValues(salt.getPillar(env, minion, "galera:${role}:members"))
146 expected_cluster_size = sizeOut.size()
147 outlist = out['return'][0]
148 resultYaml = outlist.get(outlist.keySet()[0]).sort()
149 common.prettyPrint(resultYaml)
150 parameters = [
151 wsrep_cluster_status: [title: 'Cluster status', expectedValues: ['Primary'], description: ''],
152 wsrep_cluster_size: [title: 'Current cluster size', expectedValues: [expected_cluster_size], description: ''],
153 wsrep_ready: [title: 'Node status', expectedValues: ['ON', true], description: ''],
154 wsrep_local_state_comment: [title: 'Node status comment', expectedValues: ['Joining', 'Waiting on SST', 'Joined', 'Synced', 'Donor'], description: ''],
155 wsrep_connected: [title: 'Node connectivity', expectedValues: ['ON', true], description: ''],
156 wsrep_local_recv_queue_avg: [title: 'Average size of local reveived queue', expectedThreshold: [warn: 0.5, error: 1.0], description: '(Value above 0 means that the node cannot apply write-sets as fast as it receives them, which can lead to replication throttling)'],
157 wsrep_local_send_queue_avg: [title: 'Average size of local send queue', expectedThreshold: [warn: 0.5, error: 1.0], description: '(Value above 0 indicate replication throttling or network throughput issues, such as a bottleneck on the network link.)']
158 ]
159 for (key in parameters.keySet()) {
160 value = resultYaml[key]
161 if (value instanceof String && value.isBigDecimal()) {
162 value = value.toBigDecimal()
163 }
164 parameters.get(key) << [actualValue: value]
165 }
166 for (key in parameters.keySet()) {
167 param = parameters.get(key)
168 if (key == 'wsrep_local_recv_queue_avg' || key == 'wsrep_local_send_queue_avg') {
169 if (param.get('actualValue') > param.get('expectedThreshold').get('error')) {
170 param << [match: 'error']
171 } else if (param.get('actualValue') > param.get('expectedThreshold').get('warn')) {
172 param << [match: 'warn']
173 } else {
174 param << [match: 'ok']
175 }
176 } else {
177 for (expValue in param.get('expectedValues')) {
178 if (expValue == param.get('actualValue')) {
179 param << [match: 'ok']
180 break
181 } else {
182 param << [match: 'error']
183 }
184 }
185 }
186 }
187 cluster_info_report = []
188 cluster_warning_report = []
189 cluster_error_report = []
190 for (key in parameters.keySet()) {
191 param = parameters.get(key)
192 if (param.containsKey('expectedThreshold')) {
193 expValues = "below ${param.get('expectedThreshold').get('warn')}"
194 } else {
195 if (param.get('expectedValues').size() > 1) {
196 expValues = param.get('expectedValues').join(' or ')
197 } else {
198 expValues = param.get('expectedValues')[0]
199 }
200 }
201 reportString = "${param.title}: ${param.actualValue} (Expected: ${expValues}) ${param.description}"
202 if (param.get('match').equals('ok')) {
203 cluster_info_report.add("[OK ] ${reportString}")
204 } else if (param.get('match').equals('warn')) {
205 cluster_warning_report.add("[WARNING] ${reportString}")
206 } else {
207 cluster_error_report.add("[ ERROR] ${reportString})")
208 }
209 }
210 common.infoMsg("CLUSTER STATUS REPORT: ${cluster_info_report.size()} expected values, ${cluster_warning_report.size()} warnings and ${cluster_error_report.size()} error found:")
211 if (cluster_info_report.size() > 0) {
212 common.infoMsg(cluster_info_report.join('\n'))
213 }
214 if (cluster_warning_report.size() > 0) {
215 common.warningMsg(cluster_warning_report.join('\n'))
216 }
217 if (cluster_error_report.size() > 0) {
218 common.errorMsg(cluster_error_report.join('\n'))
219 return "ERROR"
220 } else {
221 return "OK"
222 }
223}
224
225def getGaleraLastShutdownNode(env) {
226 def salt = new com.mirantis.mk.Salt()
227 def common = new com.mirantis.mk.Common()
228 members = ''
229 lastNode = [ip: '', seqno: -2]
230 try {
231 members = salt.getReturnValues(salt.getPillar(env, "I@galera:master", "galera:master:members"))
232 } catch (Exception er) {
233 common.errorMsg('Could not retrieve members list')
234 return 'I@galera:master'
235 }
236 if (members) {
237 for (member in members) {
238 try {
239 salt.minionsReachable(env, 'I@salt:master', "S@${member.host}")
240 out = salt.getReturnValues(salt.cmdRun(env, "S@${member.host}", 'cat /var/lib/mysql/grastate.dat | grep "seqno" | cut -d ":" -f2', true, null, false))
241 seqno = out.tokenize('\n')[0].trim()
242 if (seqno.isNumber()) {
243 seqno = seqno.toInteger()
244 } else {
245 seqno = -2
246 }
247 highestSeqno = lastNode.get('seqno')
248 if (seqno > highestSeqno) {
249 lastNode << [ip: "${member.host}", seqno: seqno]
250 }
251 } catch (Exception er) {
252 common.warningMsg("Could not determine 'seqno' value for node ${member.host} ")
253 }
254 }
255 }
256 if (lastNode.get('ip') != '') {
257 return "S@${lastNode.ip}"
258 } else {
259 return "I@galera:master"
260 }
261}
262
263/**
264 * Restores Galera database
265 * @param env Salt Connection object or pepperEnv
266 * @return output of salt commands
267 */
268def restoreGaleraDb(env) {
269 def salt = new com.mirantis.mk.Salt()
270 def common = new com.mirantis.mk.Common()
271 try {
272 salt.runSaltProcessStep(env, 'I@galera:slave', 'service.stop', ['mysql'])
273 } catch (Exception er) {
274 common.warningMsg('Mysql service already stopped')
275 }
276 try {
277 salt.runSaltProcessStep(env, 'I@galera:master', 'service.stop', ['mysql'])
278 } catch (Exception er) {
279 common.warningMsg('Mysql service already stopped')
280 }
281 lastNodeTarget = getGaleraLastShutdownNode(env)
282 try {
283 salt.cmdRun(env, 'I@galera:slave', "rm /var/lib/mysql/ib_logfile*")
284 } catch (Exception er) {
285 common.warningMsg('Files are not present')
286 }
287 try {
288 salt.cmdRun(env, 'I@galera:slave', "rm /var/lib/mysql/grastate.dat")
289 } catch (Exception er) {
290 common.warningMsg('Files are not present')
291 }
292 try {
293 salt.cmdRun(env, lastNodeTarget, "mkdir /root/mysql/mysql.bak")
294 } catch (Exception er) {
295 common.warningMsg('Directory already exists')
296 }
297 try {
298 salt.cmdRun(env, lastNodeTarget, "rm -rf /root/mysql/mysql.bak/*")
299 } catch (Exception er) {
300 common.warningMsg('Directory already empty')
301 }
302 try {
303 salt.cmdRun(env, lastNodeTarget, "mv /var/lib/mysql/* /root/mysql/mysql.bak")
304 } catch (Exception er) {
305 common.warningMsg('Files were already moved')
306 }
307 try {
308 salt.runSaltProcessStep(env, lastNodeTarget, 'file.remove', ["/var/lib/mysql/.galera_bootstrap"])
309 } catch (Exception er) {
310 common.warningMsg('File is not present')
311 }
312 salt.cmdRun(env, lastNodeTarget, "sed -i '/gcomm/c\\wsrep_cluster_address=\"gcomm://\"' /etc/mysql/my.cnf")
313 def backup_dir = salt.getReturnValues(salt.getPillar(env, lastNodeTarget, 'xtrabackup:client:backup_dir'))
314 if(backup_dir == null || backup_dir.isEmpty()) { backup_dir='/var/backups/mysql/xtrabackup' }
315 salt.runSaltProcessStep(env, lastNodeTarget, 'file.remove', ["${backup_dir}/dbrestored"])
316 salt.cmdRun(env, 'I@xtrabackup:client', "su root -c 'salt-call state.sls xtrabackup'")
317 salt.runSaltProcessStep(env, lastNodeTarget, 'service.start', ['mysql'])
318
319 // wait until mysql service on galera master is up
320 try {
321 salt.commandStatus(env, lastNodeTarget, 'service mysql status', 'running')
322 } catch (Exception er) {
323 input message: "Database is not running please fix it first and only then click on PROCEED."
324 }
325
326 salt.runSaltProcessStep(env, "I@galera:master and not ${lastNodeTarget}", 'service.start', ['mysql'])
327 salt.runSaltProcessStep(env, "I@galera:slave and not ${lastNodeTarget}", 'service.start', ['mysql'])
328}