Update Galera functions for verify/restore

* update function for getting status from any available node,
  not only master;
* update function for restoring Galera cluster;

Related: PROD-32619

Change-Id: Ie70aee7ad9aa255a2cdcad1cbbc16c6953779516
(cherry picked from commit 527bf4b74356da652ef6e6a0a71e687fcf8ac688)
diff --git a/src/com/mirantis/mk/Galera.groovy b/src/com/mirantis/mk/Galera.groovy
index 3a10a1c..e5ffe12 100644
--- a/src/com/mirantis/mk/Galera.groovy
+++ b/src/com/mirantis/mk/Galera.groovy
@@ -50,50 +50,44 @@
  *      of Salt mysql.status function. The result is then parsed, validated and outputed to the user.
  *
  * @param env           Salt Connection object or pepperEnv
- * @param slave         Boolean value to enable slave checking (if master in unreachable)
  * @param checkTimeSync Boolean value to enable time sync check
  * @return resultCode   int values used to determine exit status in the calling function
  */
-def verifyGaleraStatus(env, slave=false, checkTimeSync=false) {
+def verifyGaleraStatus(env, checkTimeSync=false) {
     def salt = new com.mirantis.mk.Salt()
     def common = new com.mirantis.mk.Common()
-    def out = ""
-    def status = "unknown"
-    def testNode = ""
-    if (!slave) {
-        try {
-            galeraMaster = salt.getMinions(env, "I@galera:master")
-            common.infoMsg("Current Galera master is: ${galeraMaster}")
-            salt.minionsReachable(env, "I@salt:master", "I@galera:master")
-            testNode = "I@galera:master"
-        } catch (Exception e) {
-            common.errorMsg('Galera master is not reachable.')
-            common.errorMsg(e.getMessage())
-            return 128
-        }
-    } else {
-        try {
-            galeraSlaves = salt.getMinions(env, "I@galera:slave")
-            common.infoMsg("Testing Galera slave minions: ${galeraSlaves}")
-        } catch (Exception e) {
-            common.errorMsg("Cannot obtain Galera slave minions list.")
-            common.errorMsg(e.getMessage())
-            return 129
-        }
-        for (minion in galeraSlaves) {
+    def mysqlStatusReport = [
+        'clusterMembersOnPower': [],
+        'clusterMembersNotAvailable': [],
+        'clusterMembersInClusterAlive': [],
+        'clusterMembersNotAlive': [],
+        'error': 0
+    ]
+
+    try {
+        def clusterMembers = salt.getMinions(env, "I@galera:master or I@galera:slave")
+        for (minion in clusterMembers) {
             try {
                 salt.minionsReachable(env, "I@salt:master", minion)
-                testNode = minion
-                break
+                mysqlStatusReport['clusterMembersOnPower'] << minion
             } catch (Exception e) {
                 common.warningMsg("Slave '${minion}' is not reachable.")
+                mysqlStatusReport['clusterMembersNotAvailable'] << minion
             }
         }
+    } catch (Exception e) {
+        common.errorMsg('Cannot obtain Galera minions list.')
+        common.errorMsg(e.getMessage())
+        mysqlStatusReport['error'] = 128
+        return mysqlStatusReport
     }
-    if (!testNode) {
-        common.errorMsg("No Galera slave was reachable.")
-        return 130
+
+    if (!mysqlStatusReport['clusterMembersOnPower']) {
+        common.errorMsg("No Galera member was reachable.")
+        mysqlStatusReport['error'] = 130
+        return mysqlStatusReport
     }
+
     def checkTargets = salt.getMinions(env, "I@xtrabackup:client or I@xtrabackup:server")
     for (checkTarget in checkTargets) {
         def nodeStatus = salt.minionsReachable(env, 'I@salt:master', checkTarget, null, 10, 5)
@@ -101,13 +95,15 @@
             def iostatRes = salt.getIostatValues(['saltId': env, 'target': checkTarget, 'parameterName': "%util", 'output': true])
             if (iostatRes == [:]) {
                 common.errorMsg("Recevived empty response from iostat call on ${checkTarget}. Maybe 'sysstat' package is not installed?")
-                return 140
+                mysqlStatusReport['error'] = 140
+                return mysqlStatusReport
             }
             for (int i = 0; i < iostatRes.size(); i++) {
                 def diskKey = iostatRes.keySet()[i]
                 if (!(iostatRes[diskKey].toString().isBigDecimal() && (iostatRes[diskKey].toBigDecimal() < 50 ))) {
                     common.errorMsg("Disk ${diskKey} has to high i/o utilization. Maximum value is 50 and current value is ${iostatRes[diskKey]}.")
-                    return 141
+                    mysqlStatusReport['error'] = 141
+                    return mysqlStatusReport
                 }
             }
         }
@@ -115,36 +111,65 @@
     common.infoMsg("Disk i/o utilization was checked and everything seems to be in order.")
     if (checkTimeSync && !salt.checkClusterTimeSync(env, "I@galera:master or I@galera:slave")) {
         common.errorMsg("Time in cluster is desynchronized or it couldn't be detemined. You should fix this issue manually before proceeding.")
-        return 131
+        mysqlStatusReport['error'] = 131
+        return mysqlStatusReport
     }
+
+    for(member in mysqlStatusReport['clusterMembersOnPower']) {
+        def clusterStatus = getWsrepParameters(env, member, 'wsrep_cluster_status')
+        if (clusterStatus['wsrep_cluster_status']) {
+            mysqlStatusReport['clusterMembersInClusterAlive'] << member
+        } else {
+            mysqlStatusReport['clusterMembersNotAlive'] << member
+        }
+    }
+    if (!mysqlStatusReport['clusterMembersInClusterAlive']) {
+        common.errorMsg("Could not determine mysql status, because all nodes are not connected to cluster.")
+        mysqlStatusReport['error'] = 256
+        return mysqlStatusReport
+    }
+    def testNode = mysqlStatusReport['clusterMembersInClusterAlive'].sort().first()
+
     try {
-        out = salt.runSaltProcessStep(env, "${testNode}", "mysql.status", [], null, false)
+        mysqlStatusReport['statusRaw'] = salt.runSaltProcessStep(env, testNode, "mysql.status", [], null, false)
     } catch (Exception e) {
         common.errorMsg('Could not determine mysql status.')
         common.errorMsg(e.getMessage())
-        return 256
+        mysqlStatusReport['error'] = 256
+        return mysqlStatusReport
     }
-    if (out) {
+
+    def status = "unknown"
+    def galeraMasterNode = salt.getReturnValues(salt.getPillar(env, testNode, "galera:master:enabled")) ? true : false
+
+    if (mysqlStatusReport['statusRaw']) {
         try {
-            status = validateAndPrintGaleraStatusReport(env, out, testNode)
+            status = validateAndPrintGaleraStatusReport(env, mysqlStatusReport['statusRaw'], testNode, galeraMasterNode)
         } catch (Exception e) {
             common.errorMsg('Could not parse the mysql status output. Check it manually.')
             common.errorMsg(e.getMessage())
-            return 1
         }
     } else {
-        common.errorMsg("Mysql status response unrecognized or is empty. Response: ${out}")
-        return 1024
+        common.errorMsg("Mysql status response unrecognized or is empty. Response: ${mysqlStatusReport['statusRaw']}")
     }
+    if (mysqlStatusReport['clusterMembersNotAvailable']) {
+        common.errorMsg("Next nodes are unavailable: ${mysqlStatusReport['clusterMembersNotAvailable'].join(',')}")
+    }
+    if (mysqlStatusReport['clusterMembersNotAlive']) {
+        common.errorMsg("Next nodes are not connected to cluster: ${mysqlStatusReport['clusterMembersNotAlive'].join(',')}")
+    }
+
     if (status == "OK") {
         common.infoMsg("No errors found - MySQL status is ${status}.")
-        return 0
+        return mysqlStatusReport
     } else if (status == "unknown") {
         common.warningMsg('MySQL status cannot be detemined')
-        return 1
+        mysqlStatusReport['error'] = 1
+        return mysqlStatusReport
     } else {
         common.errorMsg("Errors found.")
-        return 2
+        mysqlStatusReport['error'] = 2
+        return mysqlStatusReport
     }
 }
 
@@ -154,13 +179,12 @@
 @return status  "OK", "ERROR" or "uknown" depending on result of validation
 */
 
-def validateAndPrintGaleraStatusReport(env, out, minion) {
+def validateAndPrintGaleraStatusReport(env, out, minion, nodeRoleMaster=false) {
     def salt = new com.mirantis.mk.Salt()
     def common = new com.mirantis.mk.Common()
-    if (minion == "I@galera:master") {
-        role = "master"
-    } else {
-        role = "slave"
+    def role = 'slave'
+    if (nodeRoleMaster) {
+        role = 'master'
     }
     sizeOut = salt.getReturnValues(salt.getPillar(env, minion, "galera:${role}:members"))
     expected_cluster_size = sizeOut.size()
@@ -308,10 +332,10 @@
 */
 def manageServiceMysql(env, targetNode, action, checkStatus=true, checkState='running') {
     def salt = new com.mirantis.mk.Salt()
-    salt.runSaltProcessStep(env, lastNodeTarget, "service.${action}", ['mysql'])
+    salt.runSaltProcessStep(env, targetNode, "service.${action}", ['mysql'])
     if (checkStatus) {
         try {
-            salt.commandStatus(env, lastNodeTarget, 'service mysql status', checkState)
+            salt.commandStatus(env, targetNode, 'service mysql status', checkState)
         } catch (Exception er) {
             input message: "Database is not running please fix it first and only then click on PROCEED."
         }
@@ -321,34 +345,62 @@
 /**
  * Restores Galera cluster
  * @param env           Salt Connection object or pepperEnv
- * @param runRestoreDb  Boolean to determine if the restoration of DB should be run as well
+ * @param galeraStatus  Map, Status of Galera cluster output  from verifyGaleraStatus func
+ * @param restoreDb     Run restore DB procedure
  * @return output of salt commands
  */
-def restoreGaleraCluster(env, runRestoreDb=true) {
+def restoreGaleraCluster(env, galeraStatus, restoreDb=true) {
     def salt = new com.mirantis.mk.Salt()
     def common = new com.mirantis.mk.Common()
-    lastNodeTarget = getGaleraLastShutdownNode(env)
-    manageServiceMysql(env, lastNodeTarget, 'stop', false)
-    if (runRestoreDb) {
-        salt.cmdRun(env, lastNodeTarget, "mkdir -p /root/mysql/mysql.bak")
-        salt.cmdRun(env, lastNodeTarget, "rm -rf /root/mysql/mysql.bak/*")
-        salt.cmdRun(env, lastNodeTarget, "mv /var/lib/mysql/* /root/mysql/mysql.bak")
+    def nodesToRecover = []
+    def total = false // whole cluster
+    if (galeraStatus['clusterMembersNotAlive']) {
+        nodesToRecover = galeraStatus['clusterMembersNotAlive']
+        if (galeraStatus['clusterMembersInClusterAlive'].size() == 0) {
+            total = true
+        }
+    } else {
+        nodesToRecover = galeraStatus['clusterMembersInClusterAlive']
+        total = true
     }
-    salt.cmdRun(env, lastNodeTarget, "rm -f /var/lib/mysql/.galera_bootstrap")
 
-    // make sure that gcom parameter is empty
-    salt.cmdRun(env, lastNodeTarget, "sed -i '/gcomm/c\\wsrep_cluster_address=\"gcomm://\"' /etc/mysql/my.cnf")
+    def lastNodeTarget = ''
+    if (total) {
+        manageServiceMysql(env, 'I@galera:slave', 'stop', true, 'inactive')
+        manageServiceMysql(env, 'I@galera:master', 'stop', true, 'inactive')
+        lastNodeTarget = getGaleraLastShutdownNode(env) // in case if master was already down before
+        salt.cmdRun(env, "( I@galera:master or I@galera:slave ) and not ${lastNodeTarget}", "rm -f /var/lib/mysql/ib_logfile*")
+        salt.cmdRun(env, "( I@galera:master or I@galera:slave ) and not ${lastNodeTarget}", "rm -f /var/lib/mysql/grastate.dat")
+    } else {
+        lastNodeTarget = nodesToRecover.join(' or ')
+        manageServiceMysql(env, lastNodeTarget, 'stop', true, 'inactive')
+    }
 
-    // run restore of DB
-    if (runRestoreDb) {
+    if (restoreDb) {
+        def timestamp = common.getDatetime()
+        salt.cmdRun(env, lastNodeTarget, "mkdir -p /root/mysql")
+        def bakDir = salt.getReturnValues(salt.cmdRun(env, lastNodeTarget, "mktemp -d --suffix='_${timestamp}' /root/mysql/mysql.bak.XXXXXX", false))
+        salt.cmdRun(env, lastNodeTarget, "mv /var/lib/mysql/* ${bakDir} || echo 'Nothing to backup from directory /var/lib/mysql/'")
+    }
+    if (total) {
+        // make sure that gcom parameter is empty
+        salt.cmdRun(env, lastNodeTarget, "sed -i '/gcomm/c\\wsrep_cluster_address=\"gcomm://\"' /etc/mysql/my.cnf")
+    } else if (!restoreDb) {
+        // node rejoin
+        salt.cmdRun(env, lastNodeTarget, "rm -f /var/lib/mysql/ib_logfile*")
+        salt.cmdRun(env, lastNodeTarget, "rm -f /var/lib/mysql/grastate.dat")
+    }
+
+    if (restoreDb) {
         restoreGaleraDb(env, lastNodeTarget)
     }
 
     manageServiceMysql(env, lastNodeTarget, 'start')
 
-    // apply any changes in configuration and return value to gcom parameter and then restart mysql to catch
-    salt.enforceState(['saltId': env, 'target': lastNodeTarget, 'state': 'galera'])
-    manageServiceMysql(env, lastNodeTarget, 'restart')
+    if (total) {
+        manageServiceMysql(env, "( I@galera:master or I@galera:slave ) and not ${lastNodeTarget}", 'start')
+        salt.runSaltProcessStep(env, lastNodeTarget, 'state.sls_id', ['galera_config', 'galera'])
+    }
 }
 
 /**