| Martin Polreich | 8f0f3ac | 2019-02-15 10:03:33 +0100 | [diff] [blame] | 1 | package com.mirantis.mk | 
|  | 2 |  | 
|  | 3 | /** | 
|  | 4 | * | 
|  | 5 | * Galera functions | 
|  | 6 | * | 
|  | 7 | */ | 
|  | 8 |  | 
|  | 9 |  | 
|  | 10 | /** | 
|  | 11 | * Returns parameters from mysql.status output on given target node | 
|  | 12 | * | 
|  | 13 | * @param env           Salt Connection object or pepperEnv | 
|  | 14 | * @param target        Targeted node | 
|  | 15 | * @param parameters    Parameters to be retruned (String or list of Strings). If no parameters are provided or is set to '[]', it returns all of them. | 
|  | 16 | * @return result       List of parameters with its values | 
|  | 17 | */ | 
|  | 18 |  | 
|  | 19 | def getWsrepParameters(env, target, parameters=[], print=false) { | 
| Martin Polreich | 1281cde | 2019-02-28 11:39:49 +0100 | [diff] [blame] | 20 | def salt = new com.mirantis.mk.Salt() | 
|  | 21 | def common = new com.mirantis.mk.Common() | 
|  | 22 | result = [:] | 
| Martin Polreich | 8f0f3ac | 2019-02-15 10:03:33 +0100 | [diff] [blame] | 23 | out = salt.runSaltProcessStep(env, "${target}", "mysql.status", [], null, false) | 
|  | 24 | outlist = out['return'][0] | 
|  | 25 | resultYaml = outlist.get(outlist.keySet()[0]).sort() | 
|  | 26 | if (print) { | 
|  | 27 | common.prettyPrint(resultYaml) | 
|  | 28 | } | 
|  | 29 | if (parameters instanceof String) { | 
| Martin Polreich | 1fbda52 | 2019-02-26 14:46:33 +0100 | [diff] [blame] | 30 | parameters = [parameters] | 
|  | 31 | } | 
|  | 32 | if (parameters == [] || parameters == ['']) { | 
|  | 33 | result = resultYaml | 
| Martin Polreich | 8f0f3ac | 2019-02-15 10:03:33 +0100 | [diff] [blame] | 34 | } else { | 
| Martin Polreich | 1281cde | 2019-02-28 11:39:49 +0100 | [diff] [blame] | 35 | for (String param in parameters) { | 
|  | 36 | value = resultYaml[param] | 
| Martin Polreich | 1fbda52 | 2019-02-26 14:46:33 +0100 | [diff] [blame] | 37 | if (value instanceof String && value.isBigDecimal()) { | 
|  | 38 | value = value.toBigDecimal() | 
| Martin Polreich | 8f0f3ac | 2019-02-15 10:03:33 +0100 | [diff] [blame] | 39 | } | 
| Martin Polreich | 1281cde | 2019-02-28 11:39:49 +0100 | [diff] [blame] | 40 | result[param] = value | 
| Martin Polreich | 1fbda52 | 2019-02-26 14:46:33 +0100 | [diff] [blame] | 41 | } | 
| Martin Polreich | 8f0f3ac | 2019-02-15 10:03:33 +0100 | [diff] [blame] | 42 | } | 
|  | 43 | return result | 
|  | 44 | } | 
|  | 45 |  | 
|  | 46 | /** | 
|  | 47 | * Verifies Galera database | 
|  | 48 | * | 
|  | 49 | * This function checks for Galera master, tests connection and if reachable, it obtains the result | 
|  | 50 | *      of Salt mysql.status function. The result is then parsed, validated and outputed to the user. | 
|  | 51 | * | 
|  | 52 | * @param env           Salt Connection object or pepperEnv | 
|  | 53 | * @param slave         Boolean value to enable slave checking (if master in unreachable) | 
|  | 54 | * @param checkTimeSync Boolean value to enable time sync check | 
|  | 55 | * @return resultCode   int values used to determine exit status in the calling function | 
|  | 56 | */ | 
|  | 57 | def verifyGaleraStatus(env, slave=false, checkTimeSync=false) { | 
|  | 58 | def salt = new com.mirantis.mk.Salt() | 
|  | 59 | def common = new com.mirantis.mk.Common() | 
|  | 60 | def out = "" | 
|  | 61 | def status = "unknown" | 
|  | 62 | def testNode = "" | 
|  | 63 | if (!slave) { | 
|  | 64 | try { | 
|  | 65 | galeraMaster = salt.getMinions(env, "I@galera:master") | 
|  | 66 | common.infoMsg("Current Galera master is: ${galeraMaster}") | 
|  | 67 | salt.minionsReachable(env, "I@salt:master", "I@galera:master") | 
|  | 68 | testNode = "I@galera:master" | 
|  | 69 | } catch (Exception e) { | 
|  | 70 | common.errorMsg('Galera master is not reachable.') | 
|  | 71 | return 128 | 
|  | 72 | } | 
|  | 73 | } else { | 
|  | 74 | try { | 
|  | 75 | galeraSlaves = salt.getMinions(env, "I@galera:slave") | 
|  | 76 | common.infoMsg("Testing Galera slave minions: ${galeraSlaves}") | 
|  | 77 | } catch (Exception e) { | 
|  | 78 | common.errorMsg("Cannot obtain Galera slave minions list.") | 
|  | 79 | return 129 | 
|  | 80 | } | 
| Martin Polreich | 5df7578 | 2019-02-19 14:29:24 +0100 | [diff] [blame] | 81 | for (minion in galeraSlaves) { | 
| Martin Polreich | 8f0f3ac | 2019-02-15 10:03:33 +0100 | [diff] [blame] | 82 | try { | 
| Martin Polreich | 5df7578 | 2019-02-19 14:29:24 +0100 | [diff] [blame] | 83 | salt.minionsReachable(env, "I@salt:master", minion) | 
|  | 84 | testNode = minion | 
| Martin Polreich | 8f0f3ac | 2019-02-15 10:03:33 +0100 | [diff] [blame] | 85 | break | 
|  | 86 | } catch (Exception e) { | 
| Martin Polreich | 5df7578 | 2019-02-19 14:29:24 +0100 | [diff] [blame] | 87 | common.warningMsg("Slave '${minion}' is not reachable.") | 
| Martin Polreich | 8f0f3ac | 2019-02-15 10:03:33 +0100 | [diff] [blame] | 88 | } | 
|  | 89 | } | 
|  | 90 | } | 
|  | 91 | if (!testNode) { | 
|  | 92 | common.errorMsg("No Galera slave was reachable.") | 
|  | 93 | return 130 | 
|  | 94 | } | 
|  | 95 | if (checkTimeSync && !salt.checkClusterTimeSync(env, "I@galera:master or I@galera:slave")) { | 
|  | 96 | common.errorMsg("Time in cluster is desynchronized or it couldn't be detemined. You should fix this issue manually before proceeding.") | 
|  | 97 | return 131 | 
|  | 98 | } | 
|  | 99 | try { | 
|  | 100 | out = salt.runSaltProcessStep(env, "${testNode}", "mysql.status", [], null, false) | 
|  | 101 | } catch (Exception e) { | 
|  | 102 | common.errorMsg('Could not determine mysql status.') | 
|  | 103 | return 256 | 
|  | 104 | } | 
|  | 105 | if (out) { | 
|  | 106 | try { | 
|  | 107 | status = validateAndPrintGaleraStatusReport(env, out, testNode) | 
|  | 108 | } catch (Exception e) { | 
|  | 109 | common.errorMsg('Could not parse the mysql status output. Check it manually.') | 
|  | 110 | return 1 | 
|  | 111 | } | 
|  | 112 | } else { | 
|  | 113 | common.errorMsg("Mysql status response unrecognized or is empty. Response: ${out}") | 
|  | 114 | return 1024 | 
|  | 115 | } | 
|  | 116 | if (status == "OK") { | 
|  | 117 | common.infoMsg("No errors found - MySQL status is ${status}.") | 
|  | 118 | return 0 | 
|  | 119 | } else if (status == "unknown") { | 
|  | 120 | common.warningMsg('MySQL status cannot be detemined') | 
|  | 121 | return 1 | 
|  | 122 | } else { | 
|  | 123 | common.errorMsg("Errors found.") | 
|  | 124 | return 2 | 
|  | 125 | } | 
|  | 126 | } | 
|  | 127 |  | 
|  | 128 | /** Validates and prints result of verifyGaleraStatus function | 
|  | 129 | @param env      Salt Connection object or pepperEnv | 
|  | 130 | @param out      Output of the mysql.status Salt function | 
|  | 131 | @return status  "OK", "ERROR" or "uknown" depending on result of validation | 
|  | 132 | */ | 
|  | 133 |  | 
|  | 134 | def validateAndPrintGaleraStatusReport(env, out, minion) { | 
|  | 135 | def salt = new com.mirantis.mk.Salt() | 
|  | 136 | def common = new com.mirantis.mk.Common() | 
|  | 137 | if (minion == "I@galera:master") { | 
|  | 138 | role = "master" | 
|  | 139 | } else { | 
|  | 140 | role = "slave" | 
|  | 141 | } | 
|  | 142 | sizeOut = salt.getReturnValues(salt.getPillar(env, minion, "galera:${role}:members")) | 
|  | 143 | expected_cluster_size = sizeOut.size() | 
|  | 144 | outlist = out['return'][0] | 
|  | 145 | resultYaml = outlist.get(outlist.keySet()[0]).sort() | 
|  | 146 | common.prettyPrint(resultYaml) | 
|  | 147 | parameters = [ | 
|  | 148 | wsrep_cluster_status: [title: 'Cluster status', expectedValues: ['Primary'], description: ''], | 
|  | 149 | wsrep_cluster_size: [title: 'Current cluster size', expectedValues: [expected_cluster_size], description: ''], | 
|  | 150 | wsrep_ready: [title: 'Node status', expectedValues: ['ON', true], description: ''], | 
|  | 151 | wsrep_local_state_comment: [title: 'Node status comment', expectedValues: ['Joining', 'Waiting on SST', 'Joined', 'Synced', 'Donor'], description: ''], | 
|  | 152 | wsrep_connected: [title: 'Node connectivity', expectedValues: ['ON', true], description: ''], | 
|  | 153 | wsrep_local_recv_queue_avg: [title: 'Average size of local reveived queue', expectedThreshold: [warn: 0.5, error: 1.0], description: '(Value above 0 means that the node cannot apply write-sets as fast as it receives them, which can lead to replication throttling)'], | 
|  | 154 | wsrep_local_send_queue_avg: [title: 'Average size of local send queue', expectedThreshold: [warn: 0.5, error: 1.0], description: '(Value above 0 indicate replication throttling or network throughput issues, such as a bottleneck on the network link.)'] | 
|  | 155 | ] | 
|  | 156 | for (key in parameters.keySet()) { | 
|  | 157 | value = resultYaml[key] | 
|  | 158 | if (value instanceof String && value.isBigDecimal()) { | 
|  | 159 | value = value.toBigDecimal() | 
|  | 160 | } | 
|  | 161 | parameters.get(key) << [actualValue: value] | 
|  | 162 | } | 
|  | 163 | for (key in parameters.keySet()) { | 
|  | 164 | param = parameters.get(key) | 
|  | 165 | if (key == 'wsrep_local_recv_queue_avg' || key == 'wsrep_local_send_queue_avg') { | 
| Martin Polreich | 7981026 | 2019-02-25 12:51:11 +0100 | [diff] [blame] | 166 | if (param.get('actualValue') == null || (param.get('actualValue') > param.get('expectedThreshold').get('error'))) { | 
| Martin Polreich | 8f0f3ac | 2019-02-15 10:03:33 +0100 | [diff] [blame] | 167 | param << [match: 'error'] | 
|  | 168 | } else if (param.get('actualValue') > param.get('expectedThreshold').get('warn')) { | 
|  | 169 | param << [match: 'warn'] | 
|  | 170 | } else { | 
|  | 171 | param << [match: 'ok'] | 
|  | 172 | } | 
|  | 173 | } else { | 
|  | 174 | for (expValue in param.get('expectedValues')) { | 
|  | 175 | if (expValue == param.get('actualValue')) { | 
|  | 176 | param << [match: 'ok'] | 
|  | 177 | break | 
|  | 178 | } else { | 
|  | 179 | param << [match: 'error'] | 
|  | 180 | } | 
|  | 181 | } | 
|  | 182 | } | 
|  | 183 | } | 
|  | 184 | cluster_info_report = [] | 
|  | 185 | cluster_warning_report = [] | 
|  | 186 | cluster_error_report = [] | 
|  | 187 | for (key in parameters.keySet()) { | 
|  | 188 | param = parameters.get(key) | 
|  | 189 | if (param.containsKey('expectedThreshold')) { | 
|  | 190 | expValues = "below ${param.get('expectedThreshold').get('warn')}" | 
|  | 191 | } else { | 
|  | 192 | if (param.get('expectedValues').size() > 1) { | 
|  | 193 | expValues = param.get('expectedValues').join(' or ') | 
|  | 194 | } else { | 
|  | 195 | expValues = param.get('expectedValues')[0] | 
|  | 196 | } | 
|  | 197 | } | 
|  | 198 | reportString = "${param.title}: ${param.actualValue} (Expected: ${expValues}) ${param.description}" | 
|  | 199 | if (param.get('match').equals('ok')) { | 
|  | 200 | cluster_info_report.add("[OK     ] ${reportString}") | 
|  | 201 | } else if (param.get('match').equals('warn')) { | 
|  | 202 | cluster_warning_report.add("[WARNING] ${reportString}") | 
|  | 203 | } else { | 
|  | 204 | cluster_error_report.add("[  ERROR] ${reportString})") | 
|  | 205 | } | 
|  | 206 | } | 
|  | 207 | common.infoMsg("CLUSTER STATUS REPORT: ${cluster_info_report.size()} expected values, ${cluster_warning_report.size()} warnings and ${cluster_error_report.size()} error found:") | 
|  | 208 | if (cluster_info_report.size() > 0) { | 
|  | 209 | common.infoMsg(cluster_info_report.join('\n')) | 
|  | 210 | } | 
|  | 211 | if (cluster_warning_report.size() > 0) { | 
|  | 212 | common.warningMsg(cluster_warning_report.join('\n')) | 
|  | 213 | } | 
|  | 214 | if (cluster_error_report.size() > 0) { | 
|  | 215 | common.errorMsg(cluster_error_report.join('\n')) | 
|  | 216 | return "ERROR" | 
|  | 217 | } else { | 
|  | 218 | return "OK" | 
|  | 219 | } | 
|  | 220 | } | 
|  | 221 |  | 
| Martin Polreich | 1281cde | 2019-02-28 11:39:49 +0100 | [diff] [blame] | 222 | /** Returns last shutdown node of Galera cluster | 
|  | 223 | @param env      Salt Connection object or pepperEnv | 
|  | 224 | @param nodes    List of nodes to check only (defaults to []). If not provided, it will check all nodes. | 
|  | 225 | Use this parameter if the cluster splits to several components and you only want to check one fo them. | 
|  | 226 | @return status  ip address or hostname of last shutdown node | 
|  | 227 | */ | 
|  | 228 |  | 
|  | 229 | def getGaleraLastShutdownNode(env, nodes = []) { | 
| Martin Polreich | 8f0f3ac | 2019-02-15 10:03:33 +0100 | [diff] [blame] | 230 | def salt = new com.mirantis.mk.Salt() | 
|  | 231 | def common = new com.mirantis.mk.Common() | 
| Martin Polreich | 1281cde | 2019-02-28 11:39:49 +0100 | [diff] [blame] | 232 | members = [] | 
| Martin Polreich | 8f0f3ac | 2019-02-15 10:03:33 +0100 | [diff] [blame] | 233 | lastNode = [ip: '', seqno: -2] | 
|  | 234 | try { | 
| Martin Polreich | 1281cde | 2019-02-28 11:39:49 +0100 | [diff] [blame] | 235 | if (nodes) { | 
|  | 236 | nodes = salt.getIPAddressesForNodenames(env, nodes) | 
|  | 237 | for (node in nodes) { | 
|  | 238 | members = [host: "${node.get(node.keySet()[0])}"] + members | 
|  | 239 | } | 
|  | 240 | } else { | 
|  | 241 | members = salt.getReturnValues(salt.getPillar(env, "I@galera:master", "galera:master:members")) | 
|  | 242 | } | 
| Martin Polreich | 8f0f3ac | 2019-02-15 10:03:33 +0100 | [diff] [blame] | 243 | } catch (Exception er) { | 
|  | 244 | common.errorMsg('Could not retrieve members list') | 
|  | 245 | return 'I@galera:master' | 
|  | 246 | } | 
|  | 247 | if (members) { | 
|  | 248 | for (member in members) { | 
|  | 249 | try { | 
|  | 250 | salt.minionsReachable(env, 'I@salt:master', "S@${member.host}") | 
|  | 251 | out = salt.getReturnValues(salt.cmdRun(env, "S@${member.host}", 'cat /var/lib/mysql/grastate.dat | grep "seqno" | cut -d ":" -f2', true, null, false)) | 
|  | 252 | seqno = out.tokenize('\n')[0].trim() | 
|  | 253 | if (seqno.isNumber()) { | 
|  | 254 | seqno = seqno.toInteger() | 
|  | 255 | } else { | 
|  | 256 | seqno = -2 | 
|  | 257 | } | 
|  | 258 | highestSeqno = lastNode.get('seqno') | 
|  | 259 | if (seqno > highestSeqno) { | 
|  | 260 | lastNode << [ip: "${member.host}", seqno: seqno] | 
|  | 261 | } | 
|  | 262 | } catch (Exception er) { | 
|  | 263 | common.warningMsg("Could not determine 'seqno' value for node ${member.host} ") | 
|  | 264 | } | 
|  | 265 | } | 
|  | 266 | } | 
|  | 267 | if (lastNode.get('ip') != '') { | 
|  | 268 | return "S@${lastNode.ip}" | 
|  | 269 | } else { | 
|  | 270 | return "I@galera:master" | 
|  | 271 | } | 
|  | 272 | } | 
|  | 273 |  | 
|  | 274 | /** | 
|  | 275 | * Restores Galera database | 
|  | 276 | * @param env Salt Connection object or pepperEnv | 
|  | 277 | * @return output of salt commands | 
|  | 278 | */ | 
|  | 279 | def restoreGaleraDb(env) { | 
|  | 280 | def salt = new com.mirantis.mk.Salt() | 
|  | 281 | def common = new com.mirantis.mk.Common() | 
|  | 282 | try { | 
|  | 283 | salt.runSaltProcessStep(env, 'I@galera:slave', 'service.stop', ['mysql']) | 
|  | 284 | } catch (Exception er) { | 
|  | 285 | common.warningMsg('Mysql service already stopped') | 
|  | 286 | } | 
|  | 287 | try { | 
|  | 288 | salt.runSaltProcessStep(env, 'I@galera:master', 'service.stop', ['mysql']) | 
|  | 289 | } catch (Exception er) { | 
|  | 290 | common.warningMsg('Mysql service already stopped') | 
|  | 291 | } | 
|  | 292 | lastNodeTarget = getGaleraLastShutdownNode(env) | 
|  | 293 | try { | 
|  | 294 | salt.cmdRun(env, 'I@galera:slave', "rm /var/lib/mysql/ib_logfile*") | 
|  | 295 | } catch (Exception er) { | 
|  | 296 | common.warningMsg('Files are not present') | 
|  | 297 | } | 
|  | 298 | try { | 
|  | 299 | salt.cmdRun(env, 'I@galera:slave', "rm  /var/lib/mysql/grastate.dat") | 
|  | 300 | } catch (Exception er) { | 
|  | 301 | common.warningMsg('Files are not present') | 
|  | 302 | } | 
|  | 303 | try { | 
|  | 304 | salt.cmdRun(env, lastNodeTarget, "mkdir /root/mysql/mysql.bak") | 
|  | 305 | } catch (Exception er) { | 
|  | 306 | common.warningMsg('Directory already exists') | 
|  | 307 | } | 
|  | 308 | try { | 
|  | 309 | salt.cmdRun(env, lastNodeTarget, "rm -rf /root/mysql/mysql.bak/*") | 
|  | 310 | } catch (Exception er) { | 
|  | 311 | common.warningMsg('Directory already empty') | 
|  | 312 | } | 
|  | 313 | try { | 
|  | 314 | salt.cmdRun(env, lastNodeTarget, "mv /var/lib/mysql/* /root/mysql/mysql.bak") | 
|  | 315 | } catch (Exception er) { | 
|  | 316 | common.warningMsg('Files were already moved') | 
|  | 317 | } | 
|  | 318 | try { | 
|  | 319 | salt.runSaltProcessStep(env, lastNodeTarget, 'file.remove', ["/var/lib/mysql/.galera_bootstrap"]) | 
|  | 320 | } catch (Exception er) { | 
|  | 321 | common.warningMsg('File is not present') | 
|  | 322 | } | 
|  | 323 | salt.cmdRun(env, lastNodeTarget, "sed -i '/gcomm/c\\wsrep_cluster_address=\"gcomm://\"' /etc/mysql/my.cnf") | 
|  | 324 | def backup_dir = salt.getReturnValues(salt.getPillar(env, lastNodeTarget, 'xtrabackup:client:backup_dir')) | 
|  | 325 | if(backup_dir == null || backup_dir.isEmpty()) { backup_dir='/var/backups/mysql/xtrabackup' } | 
|  | 326 | salt.runSaltProcessStep(env, lastNodeTarget, 'file.remove', ["${backup_dir}/dbrestored"]) | 
|  | 327 | salt.cmdRun(env, 'I@xtrabackup:client', "su root -c 'salt-call state.sls xtrabackup'") | 
| Pavel Cizinsky | 154c117 | 2019-03-11 10:30:05 +0100 | [diff] [blame] | 328 | salt.enforceState(env, lastNodeTarget, 'galera') | 
| Martin Polreich | 8f0f3ac | 2019-02-15 10:03:33 +0100 | [diff] [blame] | 329 | // wait until mysql service on galera master is up | 
|  | 330 | try { | 
|  | 331 | salt.commandStatus(env, lastNodeTarget, 'service mysql status', 'running') | 
|  | 332 | } catch (Exception er) { | 
|  | 333 | input message: "Database is not running please fix it first and only then click on PROCEED." | 
|  | 334 | } | 
|  | 335 |  | 
|  | 336 | salt.runSaltProcessStep(env, "I@galera:master and not ${lastNodeTarget}", 'service.start', ['mysql']) | 
|  | 337 | salt.runSaltProcessStep(env, "I@galera:slave and not ${lastNodeTarget}", 'service.start', ['mysql']) | 
|  | 338 | } |