Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 1 | package com.mirantis.mk |
| 2 | |
| 3 | /** |
| 4 | * |
| 5 | * Galera functions |
| 6 | * |
| 7 | */ |
| 8 | |
| 9 | |
| 10 | /** |
| 11 | * Returns parameters from mysql.status output on given target node |
| 12 | * |
| 13 | * @param env Salt Connection object or pepperEnv |
| 14 | * @param target Targeted node |
| 15 | * @param parameters Parameters to be retruned (String or list of Strings). If no parameters are provided or is set to '[]', it returns all of them. |
| 16 | * @return result List of parameters with its values |
| 17 | */ |
| 18 | |
| 19 | def getWsrepParameters(env, target, parameters=[], print=false) { |
Martin Polreich | ac8bcce | 2019-07-19 13:41:12 +0200 | [diff] [blame] | 20 | def salt = new com.mirantis.mk.Salt() |
| 21 | def common = new com.mirantis.mk.Common() |
| 22 | result = [:] |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 23 | out = salt.runSaltProcessStep(env, "${target}", "mysql.status", [], null, false) |
| 24 | outlist = out['return'][0] |
| 25 | resultYaml = outlist.get(outlist.keySet()[0]).sort() |
| 26 | if (print) { |
| 27 | common.prettyPrint(resultYaml) |
| 28 | } |
| 29 | if (parameters instanceof String) { |
Martin Polreich | ac8bcce | 2019-07-19 13:41:12 +0200 | [diff] [blame] | 30 | parameters = [parameters] |
| 31 | } |
| 32 | if (parameters == [] || parameters == ['']) { |
| 33 | result = resultYaml |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 34 | } else { |
Martin Polreich | ac8bcce | 2019-07-19 13:41:12 +0200 | [diff] [blame] | 35 | for (String param in parameters) { |
| 36 | value = resultYaml[param] |
| 37 | if (value instanceof String && value.isBigDecimal()) { |
| 38 | value = value.toBigDecimal() |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 39 | } |
Martin Polreich | ac8bcce | 2019-07-19 13:41:12 +0200 | [diff] [blame] | 40 | result[param] = value |
| 41 | } |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 42 | } |
| 43 | return result |
| 44 | } |
| 45 | |
| 46 | /** |
| 47 | * Verifies Galera database |
| 48 | * |
| 49 | * This function checks for Galera master, tests connection and if reachable, it obtains the result |
| 50 | * of Salt mysql.status function. The result is then parsed, validated and outputed to the user. |
| 51 | * |
| 52 | * @param env Salt Connection object or pepperEnv |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 53 | * @param checkTimeSync Boolean value to enable time sync check |
| 54 | * @return resultCode int values used to determine exit status in the calling function |
| 55 | */ |
Denis Egorenko | 4babe45 | 2019-08-12 18:17:18 +0400 | [diff] [blame^] | 56 | def verifyGaleraStatus(env, checkTimeSync=false) { |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 57 | def salt = new com.mirantis.mk.Salt() |
| 58 | def common = new com.mirantis.mk.Common() |
Denis Egorenko | 4babe45 | 2019-08-12 18:17:18 +0400 | [diff] [blame^] | 59 | def mysqlStatusReport = [ |
| 60 | 'clusterMembersOnPower': [], |
| 61 | 'clusterMembersNotAvailable': [], |
| 62 | 'clusterMembersInClusterAlive': [], |
| 63 | 'clusterMembersNotAlive': [], |
| 64 | 'error': 0 |
| 65 | ] |
| 66 | |
| 67 | try { |
| 68 | def clusterMembers = salt.getMinions(env, "I@galera:master or I@galera:slave") |
| 69 | for (minion in clusterMembers) { |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 70 | try { |
| 71 | salt.minionsReachable(env, "I@salt:master", minion) |
Denis Egorenko | 4babe45 | 2019-08-12 18:17:18 +0400 | [diff] [blame^] | 72 | mysqlStatusReport['clusterMembersOnPower'] << minion |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 73 | } catch (Exception e) { |
| 74 | common.warningMsg("Slave '${minion}' is not reachable.") |
Denis Egorenko | 4babe45 | 2019-08-12 18:17:18 +0400 | [diff] [blame^] | 75 | mysqlStatusReport['clusterMembersNotAvailable'] << minion |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 76 | } |
| 77 | } |
Denis Egorenko | 4babe45 | 2019-08-12 18:17:18 +0400 | [diff] [blame^] | 78 | } catch (Exception e) { |
| 79 | common.errorMsg('Cannot obtain Galera minions list.') |
| 80 | common.errorMsg(e.getMessage()) |
| 81 | mysqlStatusReport['error'] = 128 |
| 82 | return mysqlStatusReport |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 83 | } |
Denis Egorenko | 4babe45 | 2019-08-12 18:17:18 +0400 | [diff] [blame^] | 84 | |
| 85 | if (!mysqlStatusReport['clusterMembersOnPower']) { |
| 86 | common.errorMsg("No Galera member was reachable.") |
| 87 | mysqlStatusReport['error'] = 130 |
| 88 | return mysqlStatusReport |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 89 | } |
Denis Egorenko | 4babe45 | 2019-08-12 18:17:18 +0400 | [diff] [blame^] | 90 | |
Martin Polreich | f48bb10 | 2019-04-02 14:12:58 +0200 | [diff] [blame] | 91 | def checkTargets = salt.getMinions(env, "I@xtrabackup:client or I@xtrabackup:server") |
| 92 | for (checkTarget in checkTargets) { |
| 93 | def nodeStatus = salt.minionsReachable(env, 'I@salt:master', checkTarget, null, 10, 5) |
| 94 | if (nodeStatus != null) { |
| 95 | def iostatRes = salt.getIostatValues(['saltId': env, 'target': checkTarget, 'parameterName': "%util", 'output': true]) |
| 96 | if (iostatRes == [:]) { |
| 97 | common.errorMsg("Recevived empty response from iostat call on ${checkTarget}. Maybe 'sysstat' package is not installed?") |
Denis Egorenko | 4babe45 | 2019-08-12 18:17:18 +0400 | [diff] [blame^] | 98 | mysqlStatusReport['error'] = 140 |
| 99 | return mysqlStatusReport |
Martin Polreich | f48bb10 | 2019-04-02 14:12:58 +0200 | [diff] [blame] | 100 | } |
| 101 | for (int i = 0; i < iostatRes.size(); i++) { |
| 102 | def diskKey = iostatRes.keySet()[i] |
| 103 | if (!(iostatRes[diskKey].toString().isBigDecimal() && (iostatRes[diskKey].toBigDecimal() < 50 ))) { |
| 104 | common.errorMsg("Disk ${diskKey} has to high i/o utilization. Maximum value is 50 and current value is ${iostatRes[diskKey]}.") |
Denis Egorenko | 4babe45 | 2019-08-12 18:17:18 +0400 | [diff] [blame^] | 105 | mysqlStatusReport['error'] = 141 |
| 106 | return mysqlStatusReport |
Martin Polreich | f48bb10 | 2019-04-02 14:12:58 +0200 | [diff] [blame] | 107 | } |
| 108 | } |
| 109 | } |
| 110 | } |
| 111 | common.infoMsg("Disk i/o utilization was checked and everything seems to be in order.") |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 112 | if (checkTimeSync && !salt.checkClusterTimeSync(env, "I@galera:master or I@galera:slave")) { |
| 113 | common.errorMsg("Time in cluster is desynchronized or it couldn't be detemined. You should fix this issue manually before proceeding.") |
Denis Egorenko | 4babe45 | 2019-08-12 18:17:18 +0400 | [diff] [blame^] | 114 | mysqlStatusReport['error'] = 131 |
| 115 | return mysqlStatusReport |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 116 | } |
Denis Egorenko | 4babe45 | 2019-08-12 18:17:18 +0400 | [diff] [blame^] | 117 | |
| 118 | for(member in mysqlStatusReport['clusterMembersOnPower']) { |
| 119 | def clusterStatus = getWsrepParameters(env, member, 'wsrep_cluster_status') |
| 120 | if (clusterStatus['wsrep_cluster_status']) { |
| 121 | mysqlStatusReport['clusterMembersInClusterAlive'] << member |
| 122 | } else { |
| 123 | mysqlStatusReport['clusterMembersNotAlive'] << member |
| 124 | } |
| 125 | } |
| 126 | if (!mysqlStatusReport['clusterMembersInClusterAlive']) { |
| 127 | common.errorMsg("Could not determine mysql status, because all nodes are not connected to cluster.") |
| 128 | mysqlStatusReport['error'] = 256 |
| 129 | return mysqlStatusReport |
| 130 | } |
| 131 | def testNode = mysqlStatusReport['clusterMembersInClusterAlive'].sort().first() |
| 132 | |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 133 | try { |
Denis Egorenko | 4babe45 | 2019-08-12 18:17:18 +0400 | [diff] [blame^] | 134 | mysqlStatusReport['statusRaw'] = salt.runSaltProcessStep(env, testNode, "mysql.status", [], null, false) |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 135 | } catch (Exception e) { |
| 136 | common.errorMsg('Could not determine mysql status.') |
Martin Polreich | 7c8ac9a | 2019-05-16 13:41:09 +0200 | [diff] [blame] | 137 | common.errorMsg(e.getMessage()) |
Denis Egorenko | 4babe45 | 2019-08-12 18:17:18 +0400 | [diff] [blame^] | 138 | mysqlStatusReport['error'] = 256 |
| 139 | return mysqlStatusReport |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 140 | } |
Denis Egorenko | 4babe45 | 2019-08-12 18:17:18 +0400 | [diff] [blame^] | 141 | |
| 142 | def status = "unknown" |
| 143 | def galeraMasterNode = salt.getReturnValues(salt.getPillar(env, testNode, "galera:master:enabled")) ? true : false |
| 144 | |
| 145 | if (mysqlStatusReport['statusRaw']) { |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 146 | try { |
Denis Egorenko | 4babe45 | 2019-08-12 18:17:18 +0400 | [diff] [blame^] | 147 | status = validateAndPrintGaleraStatusReport(env, mysqlStatusReport['statusRaw'], testNode, galeraMasterNode) |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 148 | } catch (Exception e) { |
| 149 | common.errorMsg('Could not parse the mysql status output. Check it manually.') |
Martin Polreich | 7c8ac9a | 2019-05-16 13:41:09 +0200 | [diff] [blame] | 150 | common.errorMsg(e.getMessage()) |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 151 | } |
| 152 | } else { |
Denis Egorenko | 4babe45 | 2019-08-12 18:17:18 +0400 | [diff] [blame^] | 153 | common.errorMsg("Mysql status response unrecognized or is empty. Response: ${mysqlStatusReport['statusRaw']}") |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 154 | } |
Denis Egorenko | 4babe45 | 2019-08-12 18:17:18 +0400 | [diff] [blame^] | 155 | if (mysqlStatusReport['clusterMembersNotAvailable']) { |
| 156 | common.errorMsg("Next nodes are unavailable: ${mysqlStatusReport['clusterMembersNotAvailable'].join(',')}") |
| 157 | } |
| 158 | if (mysqlStatusReport['clusterMembersNotAlive']) { |
| 159 | common.errorMsg("Next nodes are not connected to cluster: ${mysqlStatusReport['clusterMembersNotAlive'].join(',')}") |
| 160 | } |
| 161 | |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 162 | if (status == "OK") { |
| 163 | common.infoMsg("No errors found - MySQL status is ${status}.") |
Denis Egorenko | 4babe45 | 2019-08-12 18:17:18 +0400 | [diff] [blame^] | 164 | return mysqlStatusReport |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 165 | } else if (status == "unknown") { |
| 166 | common.warningMsg('MySQL status cannot be detemined') |
Denis Egorenko | 4babe45 | 2019-08-12 18:17:18 +0400 | [diff] [blame^] | 167 | mysqlStatusReport['error'] = 1 |
| 168 | return mysqlStatusReport |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 169 | } else { |
| 170 | common.errorMsg("Errors found.") |
Denis Egorenko | 4babe45 | 2019-08-12 18:17:18 +0400 | [diff] [blame^] | 171 | mysqlStatusReport['error'] = 2 |
| 172 | return mysqlStatusReport |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 173 | } |
| 174 | } |
| 175 | |
| 176 | /** Validates and prints result of verifyGaleraStatus function |
| 177 | @param env Salt Connection object or pepperEnv |
| 178 | @param out Output of the mysql.status Salt function |
| 179 | @return status "OK", "ERROR" or "uknown" depending on result of validation |
| 180 | */ |
| 181 | |
Denis Egorenko | 4babe45 | 2019-08-12 18:17:18 +0400 | [diff] [blame^] | 182 | def validateAndPrintGaleraStatusReport(env, out, minion, nodeRoleMaster=false) { |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 183 | def salt = new com.mirantis.mk.Salt() |
| 184 | def common = new com.mirantis.mk.Common() |
Denis Egorenko | 4babe45 | 2019-08-12 18:17:18 +0400 | [diff] [blame^] | 185 | def role = 'slave' |
| 186 | if (nodeRoleMaster) { |
| 187 | role = 'master' |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 188 | } |
| 189 | sizeOut = salt.getReturnValues(salt.getPillar(env, minion, "galera:${role}:members")) |
| 190 | expected_cluster_size = sizeOut.size() |
| 191 | outlist = out['return'][0] |
| 192 | resultYaml = outlist.get(outlist.keySet()[0]).sort() |
| 193 | common.prettyPrint(resultYaml) |
| 194 | parameters = [ |
| 195 | wsrep_cluster_status: [title: 'Cluster status', expectedValues: ['Primary'], description: ''], |
| 196 | wsrep_cluster_size: [title: 'Current cluster size', expectedValues: [expected_cluster_size], description: ''], |
| 197 | wsrep_ready: [title: 'Node status', expectedValues: ['ON', true], description: ''], |
| 198 | wsrep_local_state_comment: [title: 'Node status comment', expectedValues: ['Joining', 'Waiting on SST', 'Joined', 'Synced', 'Donor'], description: ''], |
| 199 | wsrep_connected: [title: 'Node connectivity', expectedValues: ['ON', true], description: ''], |
| 200 | wsrep_local_recv_queue_avg: [title: 'Average size of local reveived queue', expectedThreshold: [warn: 0.5, error: 1.0], description: '(Value above 0 means that the node cannot apply write-sets as fast as it receives them, which can lead to replication throttling)'], |
| 201 | wsrep_local_send_queue_avg: [title: 'Average size of local send queue', expectedThreshold: [warn: 0.5, error: 1.0], description: '(Value above 0 indicate replication throttling or network throughput issues, such as a bottleneck on the network link.)'] |
| 202 | ] |
| 203 | for (key in parameters.keySet()) { |
| 204 | value = resultYaml[key] |
| 205 | if (value instanceof String && value.isBigDecimal()) { |
| 206 | value = value.toBigDecimal() |
| 207 | } |
| 208 | parameters.get(key) << [actualValue: value] |
| 209 | } |
| 210 | for (key in parameters.keySet()) { |
| 211 | param = parameters.get(key) |
| 212 | if (key == 'wsrep_local_recv_queue_avg' || key == 'wsrep_local_send_queue_avg') { |
Martin Polreich | fb026be | 2019-05-16 13:36:23 +0200 | [diff] [blame] | 213 | if (param.get('actualValue') == null || (param.get('actualValue') > param.get('expectedThreshold').get('error'))) { |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 214 | param << [match: 'error'] |
| 215 | } else if (param.get('actualValue') > param.get('expectedThreshold').get('warn')) { |
| 216 | param << [match: 'warn'] |
| 217 | } else { |
| 218 | param << [match: 'ok'] |
| 219 | } |
| 220 | } else { |
| 221 | for (expValue in param.get('expectedValues')) { |
| 222 | if (expValue == param.get('actualValue')) { |
| 223 | param << [match: 'ok'] |
| 224 | break |
| 225 | } else { |
| 226 | param << [match: 'error'] |
| 227 | } |
| 228 | } |
| 229 | } |
| 230 | } |
| 231 | cluster_info_report = [] |
| 232 | cluster_warning_report = [] |
| 233 | cluster_error_report = [] |
| 234 | for (key in parameters.keySet()) { |
| 235 | param = parameters.get(key) |
| 236 | if (param.containsKey('expectedThreshold')) { |
| 237 | expValues = "below ${param.get('expectedThreshold').get('warn')}" |
| 238 | } else { |
| 239 | if (param.get('expectedValues').size() > 1) { |
| 240 | expValues = param.get('expectedValues').join(' or ') |
| 241 | } else { |
| 242 | expValues = param.get('expectedValues')[0] |
| 243 | } |
| 244 | } |
| 245 | reportString = "${param.title}: ${param.actualValue} (Expected: ${expValues}) ${param.description}" |
| 246 | if (param.get('match').equals('ok')) { |
| 247 | cluster_info_report.add("[OK ] ${reportString}") |
| 248 | } else if (param.get('match').equals('warn')) { |
| 249 | cluster_warning_report.add("[WARNING] ${reportString}") |
| 250 | } else { |
| 251 | cluster_error_report.add("[ ERROR] ${reportString})") |
| 252 | } |
| 253 | } |
| 254 | common.infoMsg("CLUSTER STATUS REPORT: ${cluster_info_report.size()} expected values, ${cluster_warning_report.size()} warnings and ${cluster_error_report.size()} error found:") |
| 255 | if (cluster_info_report.size() > 0) { |
| 256 | common.infoMsg(cluster_info_report.join('\n')) |
| 257 | } |
| 258 | if (cluster_warning_report.size() > 0) { |
| 259 | common.warningMsg(cluster_warning_report.join('\n')) |
| 260 | } |
| 261 | if (cluster_error_report.size() > 0) { |
| 262 | common.errorMsg(cluster_error_report.join('\n')) |
| 263 | return "ERROR" |
| 264 | } else { |
| 265 | return "OK" |
| 266 | } |
| 267 | } |
| 268 | |
Martin Polreich | ac8bcce | 2019-07-19 13:41:12 +0200 | [diff] [blame] | 269 | /** Returns last shutdown node of Galera cluster |
| 270 | @param env Salt Connection object or pepperEnv |
| 271 | @param nodes List of nodes to check only (defaults to []). If not provided, it will check all nodes. |
| 272 | Use this parameter if the cluster splits to several components and you only want to check one fo them. |
| 273 | @return status ip address or hostname of last shutdown node |
| 274 | */ |
| 275 | |
| 276 | def getGaleraLastShutdownNode(env, nodes = []) { |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 277 | def salt = new com.mirantis.mk.Salt() |
| 278 | def common = new com.mirantis.mk.Common() |
Martin Polreich | ac8bcce | 2019-07-19 13:41:12 +0200 | [diff] [blame] | 279 | members = [] |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 280 | lastNode = [ip: '', seqno: -2] |
| 281 | try { |
Martin Polreich | ac8bcce | 2019-07-19 13:41:12 +0200 | [diff] [blame] | 282 | if (nodes) { |
| 283 | nodes = salt.getIPAddressesForNodenames(env, nodes) |
| 284 | for (node in nodes) { |
| 285 | members = [host: "${node.get(node.keySet()[0])}"] + members |
| 286 | } |
| 287 | } else { |
| 288 | members = salt.getReturnValues(salt.getPillar(env, "I@galera:master", "galera:master:members")) |
| 289 | } |
Martin Polreich | 7c8ac9a | 2019-05-16 13:41:09 +0200 | [diff] [blame] | 290 | } catch (Exception e) { |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 291 | common.errorMsg('Could not retrieve members list') |
Martin Polreich | 7c8ac9a | 2019-05-16 13:41:09 +0200 | [diff] [blame] | 292 | common.errorMsg(e.getMessage()) |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 293 | return 'I@galera:master' |
| 294 | } |
| 295 | if (members) { |
| 296 | for (member in members) { |
| 297 | try { |
| 298 | salt.minionsReachable(env, 'I@salt:master', "S@${member.host}") |
| 299 | out = salt.getReturnValues(salt.cmdRun(env, "S@${member.host}", 'cat /var/lib/mysql/grastate.dat | grep "seqno" | cut -d ":" -f2', true, null, false)) |
| 300 | seqno = out.tokenize('\n')[0].trim() |
| 301 | if (seqno.isNumber()) { |
| 302 | seqno = seqno.toInteger() |
| 303 | } else { |
Denis Egorenko | 7c3bd95 | 2019-08-09 18:22:30 +0400 | [diff] [blame] | 304 | // in case if /var/lib/mysql/grastate.dat has no any seqno - set it to 0 |
| 305 | // thus node will be recovered if no other failed found |
| 306 | seqno = 0 |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 307 | } |
Martin Polreich | 7c8ac9a | 2019-05-16 13:41:09 +0200 | [diff] [blame] | 308 | } catch (Exception e) { |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 309 | common.warningMsg("Could not determine 'seqno' value for node ${member.host} ") |
Martin Polreich | 7c8ac9a | 2019-05-16 13:41:09 +0200 | [diff] [blame] | 310 | common.warningMsg(e.getMessage()) |
Denis Egorenko | 7c3bd95 | 2019-08-09 18:22:30 +0400 | [diff] [blame] | 311 | seqno = 0 |
| 312 | } |
| 313 | highestSeqno = lastNode.get('seqno') |
| 314 | if (seqno > highestSeqno) { |
| 315 | lastNode << [ip: "${member.host}", seqno: seqno] |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 316 | } |
| 317 | } |
| 318 | } |
| 319 | if (lastNode.get('ip') != '') { |
| 320 | return "S@${lastNode.ip}" |
| 321 | } else { |
| 322 | return "I@galera:master" |
| 323 | } |
| 324 | } |
| 325 | |
| 326 | /** |
Denis Egorenko | 7c3bd95 | 2019-08-09 18:22:30 +0400 | [diff] [blame] | 327 | * Wrapper around Mysql systemd service |
| 328 | * @param env Salt Connection object or pepperEnv |
| 329 | * @param targetNode Node to apply changes |
| 330 | * @param checkStatus Whether to check status of Mysql |
| 331 | * @param checkState State of service to check |
| 332 | */ |
| 333 | def manageServiceMysql(env, targetNode, action, checkStatus=true, checkState='running') { |
| 334 | def salt = new com.mirantis.mk.Salt() |
Denis Egorenko | 4babe45 | 2019-08-12 18:17:18 +0400 | [diff] [blame^] | 335 | salt.runSaltProcessStep(env, targetNode, "service.${action}", ['mysql']) |
Denis Egorenko | 7c3bd95 | 2019-08-09 18:22:30 +0400 | [diff] [blame] | 336 | if (checkStatus) { |
| 337 | try { |
Denis Egorenko | 4babe45 | 2019-08-12 18:17:18 +0400 | [diff] [blame^] | 338 | salt.commandStatus(env, targetNode, 'service mysql status', checkState) |
Denis Egorenko | 7c3bd95 | 2019-08-09 18:22:30 +0400 | [diff] [blame] | 339 | } catch (Exception er) { |
| 340 | input message: "Database is not running please fix it first and only then click on PROCEED." |
| 341 | } |
| 342 | } |
| 343 | } |
| 344 | |
| 345 | /** |
Martin Polreich | e48741b | 2019-03-21 16:00:23 +0100 | [diff] [blame] | 346 | * Restores Galera cluster |
| 347 | * @param env Salt Connection object or pepperEnv |
Denis Egorenko | 4babe45 | 2019-08-12 18:17:18 +0400 | [diff] [blame^] | 348 | * @param galeraStatus Map, Status of Galera cluster output from verifyGaleraStatus func |
| 349 | * @param restoreDb Run restore DB procedure |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 350 | * @return output of salt commands |
| 351 | */ |
Denis Egorenko | 4babe45 | 2019-08-12 18:17:18 +0400 | [diff] [blame^] | 352 | def restoreGaleraCluster(env, galeraStatus, restoreDb=true) { |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 353 | def salt = new com.mirantis.mk.Salt() |
| 354 | def common = new com.mirantis.mk.Common() |
Denis Egorenko | 4babe45 | 2019-08-12 18:17:18 +0400 | [diff] [blame^] | 355 | def nodesToRecover = [] |
| 356 | def total = false // whole cluster |
| 357 | if (galeraStatus['clusterMembersNotAlive']) { |
| 358 | nodesToRecover = galeraStatus['clusterMembersNotAlive'] |
| 359 | if (galeraStatus['clusterMembersInClusterAlive'].size() == 0) { |
| 360 | total = true |
| 361 | } |
| 362 | } else { |
| 363 | nodesToRecover = galeraStatus['clusterMembersInClusterAlive'] |
| 364 | total = true |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 365 | } |
Martin Polreich | e48741b | 2019-03-21 16:00:23 +0100 | [diff] [blame] | 366 | |
Denis Egorenko | 4babe45 | 2019-08-12 18:17:18 +0400 | [diff] [blame^] | 367 | def lastNodeTarget = '' |
| 368 | if (total) { |
| 369 | manageServiceMysql(env, 'I@galera:slave', 'stop', true, 'inactive') |
| 370 | manageServiceMysql(env, 'I@galera:master', 'stop', true, 'inactive') |
| 371 | lastNodeTarget = getGaleraLastShutdownNode(env) // in case if master was already down before |
| 372 | salt.cmdRun(env, "( I@galera:master or I@galera:slave ) and not ${lastNodeTarget}", "rm -f /var/lib/mysql/ib_logfile*") |
| 373 | salt.cmdRun(env, "( I@galera:master or I@galera:slave ) and not ${lastNodeTarget}", "rm -f /var/lib/mysql/grastate.dat") |
| 374 | } else { |
| 375 | lastNodeTarget = nodesToRecover.join(' or ') |
| 376 | manageServiceMysql(env, lastNodeTarget, 'stop', true, 'inactive') |
| 377 | } |
Martin Polreich | e48741b | 2019-03-21 16:00:23 +0100 | [diff] [blame] | 378 | |
Denis Egorenko | 4babe45 | 2019-08-12 18:17:18 +0400 | [diff] [blame^] | 379 | if (restoreDb) { |
| 380 | def timestamp = common.getDatetime() |
| 381 | salt.cmdRun(env, lastNodeTarget, "mkdir -p /root/mysql") |
| 382 | def bakDir = salt.getReturnValues(salt.cmdRun(env, lastNodeTarget, "mktemp -d --suffix='_${timestamp}' /root/mysql/mysql.bak.XXXXXX", false)) |
| 383 | salt.cmdRun(env, lastNodeTarget, "mv /var/lib/mysql/* ${bakDir} || echo 'Nothing to backup from directory /var/lib/mysql/'") |
| 384 | } |
| 385 | if (total) { |
| 386 | // make sure that gcom parameter is empty |
| 387 | salt.cmdRun(env, lastNodeTarget, "sed -i '/gcomm/c\\wsrep_cluster_address=\"gcomm://\"' /etc/mysql/my.cnf") |
| 388 | } else if (!restoreDb) { |
| 389 | // node rejoin |
| 390 | salt.cmdRun(env, lastNodeTarget, "rm -f /var/lib/mysql/ib_logfile*") |
| 391 | salt.cmdRun(env, lastNodeTarget, "rm -f /var/lib/mysql/grastate.dat") |
| 392 | } |
| 393 | |
| 394 | if (restoreDb) { |
Martin Polreich | e48741b | 2019-03-21 16:00:23 +0100 | [diff] [blame] | 395 | restoreGaleraDb(env, lastNodeTarget) |
| 396 | } |
| 397 | |
Denis Egorenko | 7c3bd95 | 2019-08-09 18:22:30 +0400 | [diff] [blame] | 398 | manageServiceMysql(env, lastNodeTarget, 'start') |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 399 | |
Denis Egorenko | 4babe45 | 2019-08-12 18:17:18 +0400 | [diff] [blame^] | 400 | if (total) { |
| 401 | manageServiceMysql(env, "( I@galera:master or I@galera:slave ) and not ${lastNodeTarget}", 'start') |
| 402 | salt.runSaltProcessStep(env, lastNodeTarget, 'state.sls_id', ['galera_config', 'galera']) |
| 403 | } |
Martin Polreich | f89f9b4 | 2019-05-07 15:37:13 +0200 | [diff] [blame] | 404 | } |
Martin Polreich | e48741b | 2019-03-21 16:00:23 +0100 | [diff] [blame] | 405 | |
| 406 | /** |
| 407 | * Restores Galera database |
| 408 | * @param env Salt Connection object or pepperEnv |
| 409 | * @param targetNode Node to be targeted |
| 410 | */ |
| 411 | def restoreGaleraDb(env, targetNode) { |
Martin Polreich | bcf56fb | 2019-08-08 13:47:45 +0200 | [diff] [blame] | 412 | def salt = new com.mirantis.mk.Salt() |
Martin Polreich | e48741b | 2019-03-21 16:00:23 +0100 | [diff] [blame] | 413 | def backup_dir = salt.getReturnValues(salt.getPillar(env, targetNode, 'xtrabackup:client:backup_dir')) |
| 414 | if(backup_dir == null || backup_dir.isEmpty()) { backup_dir='/var/backups/mysql/xtrabackup' } |
| 415 | salt.runSaltProcessStep(env, targetNode, 'file.remove', ["${backup_dir}/dbrestored"]) |
Martin Polreich | bcf56fb | 2019-08-08 13:47:45 +0200 | [diff] [blame] | 416 | salt.enforceState(['saltId': env, 'target': targetNode, 'state': 'xtrabackup.client']) |
| 417 | salt.enforceState(['saltId': env, 'target': targetNode, 'state': 'xtrabackup.client.restore']) |
Martin Polreich | e48741b | 2019-03-21 16:00:23 +0100 | [diff] [blame] | 418 | } |
| 419 | |
| 420 | def restoreGaleraDb(env) { |
Martin Polreich | bcf56fb | 2019-08-08 13:47:45 +0200 | [diff] [blame] | 421 | def common = new com.mirantis.mk.Common() |
Martin Polreich | e48741b | 2019-03-21 16:00:23 +0100 | [diff] [blame] | 422 | common.warningMsg("This method was renamed to 'restoreGaleraCluster'. Please change your pipeline to use this call instead! If you think that you really wanted to call 'restoreGaleraDb' you may be missing 'targetNode' parameter in you call.") |
| 423 | return restoreGaleraCluster(env) |
sgarbuz | f206346 | 2019-07-17 12:01:57 +0300 | [diff] [blame] | 424 | } |
| 425 | |
| 426 | /** |
| 427 | * Start first node in mysql cluster. Cluster members stay removed in mysql config, additional service restart will be needed once all nodes are up. |
| 428 | * https://docs.mirantis.com/mcp/q4-18/mcp-operations-guide/tshooting/ |
| 429 | * tshoot-mcp-openstack/tshoot-galera/restore-galera-cluster/ |
| 430 | * restore-galera-manually.html#restore-galera-manually |
| 431 | * |
| 432 | * @param env Salt Connection object or pepperEnv |
| 433 | * @param target last stopped Galera node |
| 434 | * @return output of salt commands |
| 435 | */ |
| 436 | def startFirstNode(env, target) { |
| 437 | def salt = new com.mirantis.mk.Salt() |
| 438 | def common = new com.mirantis.mk.Common() |
| 439 | |
| 440 | // make sure that gcom parameter is empty |
| 441 | salt.cmdRun(env, target, "sed -i '/wsrep_cluster_address/ s/^#*/#/' /etc/mysql/my.cnf") |
| 442 | salt.cmdRun(env, target, "sed -i '/wsrep_cluster_address/a wsrep_cluster_address=\"gcomm://\"' /etc/mysql/my.cnf") |
| 443 | |
| 444 | // start mysql service on the last node |
| 445 | salt.runSaltProcessStep(env, target, 'service.start', ['mysql']) |
| 446 | |
| 447 | // wait until mysql service on the last node is up |
| 448 | |
| 449 | common.retry(30, 10) { |
| 450 | value = getWsrepParameters(env, target, 'wsrep_evs_state') |
| 451 | if (value['wsrep_evs_state'] == 'OPERATIONAL') { |
| 452 | common.infoMsg('WSREP state: OPERATIONAL') |
| 453 | } else { |
| 454 | throw new Exception("Mysql service is not running please fix it.") |
| 455 | } |
| 456 | } |
Martin Polreich | e48741b | 2019-03-21 16:00:23 +0100 | [diff] [blame] | 457 | } |