diff --git a/src/com/mirantis/mk/Common.groovy b/src/com/mirantis/mk/Common.groovy
index 1d4a29d..1cb3d6e 100644
--- a/src/com/mirantis/mk/Common.groovy
+++ b/src/com/mirantis/mk/Common.groovy
@@ -525,6 +525,9 @@
* Retry commands passed to body
+ * Don't use common.retry method for retrying salt.enforceState method. Use retries parameter
+ * built-in the salt.enforceState method instead to ensure correct functionality.
+ *
* @param times Number of retries
* @param delay Delay between retries (in seconds)
* @param body Commands to be in retry block
diff --git a/src/com/mirantis/mk/Galera.groovy b/src/com/mirantis/mk/Galera.groovy
new file mode 100644
index 0000000..96777a7
--- /dev/null
+++ b/src/com/mirantis/mk/Galera.groovy
@@ -0,0 +1,328 @@
+ *
+ * Galera functions
+ *
+ */
+ * Returns parameters from mysql.status output on given target node
+ *
+ * @param env Salt Connection object or pepperEnv
+ * @param target Targeted node
+ * @param parameters Parameters to be retruned (String or list of Strings). If no parameters are provided or is set to '[]', it returns all of them.
+ * @return result List of parameters with its values
+ */
+def getWsrepParameters(env, target, parameters=[], print=false) {
+ result = []
+ out = salt.runSaltProcessStep(env, "${target}", "mysql.status", [], null, false)
+ outlist = out['return'][0]
+ resultYaml = outlist.get(outlist.keySet()[0]).sort()
+ if (print) {
+ common.prettyPrint(resultYaml)
+ }
+ if (parameters instanceof String) {
+ value = resultYaml[key]
+ if (value instanceof String && value.isBigDecimal()) {
+ value = value.toBigDecimal()
+ }
+ result = [key: value]
+ } else {
+ if (parameters == []) {
+ result = resultYaml
+ } else {
+ for (key in parameters) {
+ value = resultYaml[key]
+ if (value instanceof String && value.isBigDecimal()) {
+ value = value.toBigDecimal()
+ }
+ result << [key: value]
+ }
+ }
+ }
+ return result
+ * Verifies Galera database
+ *
+ * This function checks for Galera master, tests connection and if reachable, it obtains the result
+ * of Salt mysql.status function. The result is then parsed, validated and outputed to the user.
+ *
+ * @param env Salt Connection object or pepperEnv
+ * @param slave Boolean value to enable slave checking (if master in unreachable)
+ * @param checkTimeSync Boolean value to enable time sync check
+ * @return resultCode int values used to determine exit status in the calling function
+ */
+def verifyGaleraStatus(env, slave=false, checkTimeSync=false) {
+ def salt = new
+ def common = new
+ def out = ""
+ def status = "unknown"
+ def testNode = ""
+ if (!slave) {
+ try {
+ galeraMaster = salt.getMinions(env, "I@galera:master")
+ common.infoMsg("Current Galera master is: ${galeraMaster}")
+ salt.minionsReachable(env, "I@salt:master", "I@galera:master")
+ testNode = "I@galera:master"
+ } catch (Exception e) {
+ common.errorMsg('Galera master is not reachable.')
+ return 128
+ }
+ } else {
+ try {
+ galeraSlaves = salt.getMinions(env, "I@galera:slave")
+ common.infoMsg("Testing Galera slave minions: ${galeraSlaves}")
+ } catch (Exception e) {
+ common.errorMsg("Cannot obtain Galera slave minions list.")
+ return 129
+ }
+ for (minion in galeraSlaves) {
+ try {
+ salt.minionsReachable(env, "I@salt:master", minion)
+ testNode = minion
+ break
+ } catch (Exception e) {
+ common.warningMsg("Slave '${minion}' is not reachable.")
+ }
+ }
+ }
+ if (!testNode) {
+ common.errorMsg("No Galera slave was reachable.")
+ return 130
+ }
+ if (checkTimeSync && !salt.checkClusterTimeSync(env, "I@galera:master or I@galera:slave")) {
+ common.errorMsg("Time in cluster is desynchronized or it couldn't be detemined. You should fix this issue manually before proceeding.")
+ return 131
+ }
+ try {
+ out = salt.runSaltProcessStep(env, "${testNode}", "mysql.status", [], null, false)
+ } catch (Exception e) {
+ common.errorMsg('Could not determine mysql status.')
+ return 256
+ }
+ if (out) {
+ try {
+ status = validateAndPrintGaleraStatusReport(env, out, testNode)
+ } catch (Exception e) {
+ common.errorMsg('Could not parse the mysql status output. Check it manually.')
+ return 1
+ }
+ } else {
+ common.errorMsg("Mysql status response unrecognized or is empty. Response: ${out}")
+ return 1024
+ }
+ if (status == "OK") {
+ common.infoMsg("No errors found - MySQL status is ${status}.")
+ return 0
+ } else if (status == "unknown") {
+ common.warningMsg('MySQL status cannot be detemined')
+ return 1
+ } else {
+ common.errorMsg("Errors found.")
+ return 2
+ }
+/** Validates and prints result of verifyGaleraStatus function
+@param env Salt Connection object or pepperEnv
+@param out Output of the mysql.status Salt function
+@return status "OK", "ERROR" or "uknown" depending on result of validation
+def validateAndPrintGaleraStatusReport(env, out, minion) {
+ def salt = new
+ def common = new
+ if (minion == "I@galera:master") {
+ role = "master"
+ } else {
+ role = "slave"
+ }
+ sizeOut = salt.getReturnValues(salt.getPillar(env, minion, "galera:${role}:members"))
+ expected_cluster_size = sizeOut.size()
+ outlist = out['return'][0]
+ resultYaml = outlist.get(outlist.keySet()[0]).sort()
+ common.prettyPrint(resultYaml)
+ parameters = [
+ wsrep_cluster_status: [title: 'Cluster status', expectedValues: ['Primary'], description: ''],
+ wsrep_cluster_size: [title: 'Current cluster size', expectedValues: [expected_cluster_size], description: ''],
+ wsrep_ready: [title: 'Node status', expectedValues: ['ON', true], description: ''],
+ wsrep_local_state_comment: [title: 'Node status comment', expectedValues: ['Joining', 'Waiting on SST', 'Joined', 'Synced', 'Donor'], description: ''],
+ wsrep_connected: [title: 'Node connectivity', expectedValues: ['ON', true], description: ''],
+ wsrep_local_recv_queue_avg: [title: 'Average size of local reveived queue', expectedThreshold: [warn: 0.5, error: 1.0], description: '(Value above 0 means that the node cannot apply write-sets as fast as it receives them, which can lead to replication throttling)'],
+ wsrep_local_send_queue_avg: [title: 'Average size of local send queue', expectedThreshold: [warn: 0.5, error: 1.0], description: '(Value above 0 indicate replication throttling or network throughput issues, such as a bottleneck on the network link.)']
+ ]
+ for (key in parameters.keySet()) {
+ value = resultYaml[key]
+ if (value instanceof String && value.isBigDecimal()) {
+ value = value.toBigDecimal()
+ }
+ parameters.get(key) << [actualValue: value]
+ }
+ for (key in parameters.keySet()) {
+ param = parameters.get(key)
+ if (key == 'wsrep_local_recv_queue_avg' || key == 'wsrep_local_send_queue_avg') {
+ if (param.get('actualValue') > param.get('expectedThreshold').get('error')) {
+ param << [match: 'error']
+ } else if (param.get('actualValue') > param.get('expectedThreshold').get('warn')) {
+ param << [match: 'warn']
+ } else {
+ param << [match: 'ok']
+ }
+ } else {
+ for (expValue in param.get('expectedValues')) {
+ if (expValue == param.get('actualValue')) {
+ param << [match: 'ok']
+ break
+ } else {
+ param << [match: 'error']
+ }
+ }
+ }
+ }
+ cluster_info_report = []
+ cluster_warning_report = []
+ cluster_error_report = []
+ for (key in parameters.keySet()) {
+ param = parameters.get(key)
+ if (param.containsKey('expectedThreshold')) {
+ expValues = "below ${param.get('expectedThreshold').get('warn')}"
+ } else {
+ if (param.get('expectedValues').size() > 1) {
+ expValues = param.get('expectedValues').join(' or ')
+ } else {
+ expValues = param.get('expectedValues')[0]
+ }
+ }
+ reportString = "${param.title}: ${param.actualValue} (Expected: ${expValues}) ${param.description}"
+ if (param.get('match').equals('ok')) {
+ cluster_info_report.add("[OK ] ${reportString}")
+ } else if (param.get('match').equals('warn')) {
+ cluster_warning_report.add("[WARNING] ${reportString}")
+ } else {
+ cluster_error_report.add("[ ERROR] ${reportString})")
+ }
+ }
+ common.infoMsg("CLUSTER STATUS REPORT: ${cluster_info_report.size()} expected values, ${cluster_warning_report.size()} warnings and ${cluster_error_report.size()} error found:")
+ if (cluster_info_report.size() > 0) {
+ common.infoMsg(cluster_info_report.join('\n'))
+ }
+ if (cluster_warning_report.size() > 0) {
+ common.warningMsg(cluster_warning_report.join('\n'))
+ }
+ if (cluster_error_report.size() > 0) {
+ common.errorMsg(cluster_error_report.join('\n'))
+ return "ERROR"
+ } else {
+ return "OK"
+ }
+def getGaleraLastShutdownNode(env) {
+ def salt = new
+ def common = new
+ members = ''
+ lastNode = [ip: '', seqno: -2]
+ try {
+ members = salt.getReturnValues(salt.getPillar(env, "I@galera:master", "galera:master:members"))
+ } catch (Exception er) {
+ common.errorMsg('Could not retrieve members list')
+ return 'I@galera:master'
+ }
+ if (members) {
+ for (member in members) {
+ try {
+ salt.minionsReachable(env, 'I@salt:master', "S@${}")
+ out = salt.getReturnValues(salt.cmdRun(env, "S@${}", 'cat /var/lib/mysql/grastate.dat | grep "seqno" | cut -d ":" -f2', true, null, false))
+ seqno = out.tokenize('\n')[0].trim()
+ if (seqno.isNumber()) {
+ seqno = seqno.toInteger()
+ } else {
+ seqno = -2
+ }
+ highestSeqno = lastNode.get('seqno')
+ if (seqno > highestSeqno) {
+ lastNode << [ip: "${}", seqno: seqno]
+ }
+ } catch (Exception er) {
+ common.warningMsg("Could not determine 'seqno' value for node ${} ")
+ }
+ }
+ }
+ if (lastNode.get('ip') != '') {
+ return "S@${lastNode.ip}"
+ } else {
+ return "I@galera:master"
+ }
+ * Restores Galera database
+ * @param env Salt Connection object or pepperEnv
+ * @return output of salt commands
+ */
+def restoreGaleraDb(env) {
+ def salt = new
+ def common = new
+ try {
+ salt.runSaltProcessStep(env, 'I@galera:slave', 'service.stop', ['mysql'])
+ } catch (Exception er) {
+ common.warningMsg('Mysql service already stopped')
+ }
+ try {
+ salt.runSaltProcessStep(env, 'I@galera:master', 'service.stop', ['mysql'])
+ } catch (Exception er) {
+ common.warningMsg('Mysql service already stopped')
+ }
+ lastNodeTarget = getGaleraLastShutdownNode(env)
+ try {
+ salt.cmdRun(env, 'I@galera:slave', "rm /var/lib/mysql/ib_logfile*")
+ } catch (Exception er) {
+ common.warningMsg('Files are not present')
+ }
+ try {
+ salt.cmdRun(env, 'I@galera:slave', "rm /var/lib/mysql/grastate.dat")
+ } catch (Exception er) {
+ common.warningMsg('Files are not present')
+ }
+ try {
+ salt.cmdRun(env, lastNodeTarget, "mkdir /root/mysql/mysql.bak")
+ } catch (Exception er) {
+ common.warningMsg('Directory already exists')
+ }
+ try {
+ salt.cmdRun(env, lastNodeTarget, "rm -rf /root/mysql/mysql.bak/*")
+ } catch (Exception er) {
+ common.warningMsg('Directory already empty')
+ }
+ try {
+ salt.cmdRun(env, lastNodeTarget, "mv /var/lib/mysql/* /root/mysql/mysql.bak")
+ } catch (Exception er) {
+ common.warningMsg('Files were already moved')
+ }
+ try {
+ salt.runSaltProcessStep(env, lastNodeTarget, 'file.remove', ["/var/lib/mysql/.galera_bootstrap"])
+ } catch (Exception er) {
+ common.warningMsg('File is not present')
+ }
+ salt.cmdRun(env, lastNodeTarget, "sed -i '/gcomm/c\\wsrep_cluster_address=\"gcomm://\"' /etc/mysql/my.cnf")
+ def backup_dir = salt.getReturnValues(salt.getPillar(env, lastNodeTarget, 'xtrabackup:client:backup_dir'))
+ if(backup_dir == null || backup_dir.isEmpty()) { backup_dir='/var/backups/mysql/xtrabackup' }
+ salt.runSaltProcessStep(env, lastNodeTarget, 'file.remove', ["${backup_dir}/dbrestored"])
+ salt.cmdRun(env, 'I@xtrabackup:client', "su root -c 'salt-call state.sls xtrabackup'")
+ salt.runSaltProcessStep(env, lastNodeTarget, 'service.start', ['mysql'])
+ // wait until mysql service on galera master is up
+ try {
+ salt.commandStatus(env, lastNodeTarget, 'service mysql status', 'running')
+ } catch (Exception er) {
+ input message: "Database is not running please fix it first and only then click on PROCEED."
+ }
+ salt.runSaltProcessStep(env, "I@galera:master and not ${lastNodeTarget}", 'service.start', ['mysql'])
+ salt.runSaltProcessStep(env, "I@galera:slave and not ${lastNodeTarget}", 'service.start', ['mysql'])
diff --git a/src/com/mirantis/mk/Openstack.groovy b/src/com/mirantis/mk/Openstack.groovy
index 82fd895..da2495d 100644
--- a/src/com/mirantis/mk/Openstack.groovy
+++ b/src/com/mirantis/mk/Openstack.groovy
@@ -444,12 +444,17 @@
def global_apps = salt.getConfig(env, 'I@salt:master:enabled:true', 'orchestration.upgrade.applications')
def node_apps = salt.getPillar(env, target, '__reclass__:applications')['return'][0].values()[0]
+ def node_pillar = salt.getPillar(env, target)
def node_sorted_apps = []
if ( !global_apps['return'][0].values()[0].isEmpty() ) {
Map<String,Integer> _sorted_apps = [:]
for (k in global_apps['return'][0].values()[0].keySet()) {
if (k in node_apps) {
- _sorted_apps[k] = global_apps['return'][0].values()[0][k].values()[0].toInteger()
+ if (node_pillar['return'][0].values()[k]['upgrade']['enabled'][0] != null) {
+ if (node_pillar['return'][0].values()[k]['upgrade']['enabled'][0].toBoolean()) {
+ _sorted_apps[k] = global_apps['return'][0].values()[0][k].values()[0].toInteger()
+ }
+ }
node_sorted_apps = common.SortMapByValueAsc(_sorted_apps).keySet()
@@ -504,280 +509,30 @@
\ No newline at end of file
diff --git a/src/com/mirantis/mk/Orchestrate.groovy b/src/com/mirantis/mk/Orchestrate.groovy
index c6d262a..0e2c239 100644
--- a/src/com/mirantis/mk/Orchestrate.groovy
+++ b/src/com/mirantis/mk/Orchestrate.groovy
@@ -44,15 +44,11 @@
} catch (Throwable e) {
common.warningMsg('Salt state salt.minion.base is not present in the Salt-formula yet.')
- common.retry(2,5){
- salt.enforceState([saltId: master, target: "* ${extra_tgt}", state: ['linux.system']])
- }
+ salt.enforceState([saltId: master, target: "* ${extra_tgt}", state: ['linux.system'], retries: 2])
if (staticMgmtNet) {
salt.runSaltProcessStep(master, "* ${extra_tgt}", '', ["salt-call state.sls; salt-call service.restart salt-minion"], null, true, 60)
- common.retry(2,5){
- salt.enforceState([saltId: master, target: "I@linux:network:interface ${extra_tgt}", state: ['']])
- }
+ salt.enforceState([saltId: master, target: "I@linux:network:interface ${extra_tgt}", state: [''], retries: 2])
salt.enforceState([saltId: master, target: "I@linux:system ${extra_tgt}", state: ['linux', 'openssh', 'ntp', 'rsyslog']])
@@ -91,9 +87,7 @@
} catch (Throwable e) {
common.warningMsg('Salt state salt.minion.base is not present in the Salt-formula yet.')
- common.retry(2,5){
- salt.enforceState([saltId: master, target: target, state: ['linux.system']])
- }
+ salt.enforceState([saltId: master, target: target, state: ['linux.system'], retries: 2])
if (staticMgmtNet) {
salt.runSaltProcessStep(master, target, '', ["salt-call state.sls; salt-call service.restart salt-minion"], null, true, 60)
@@ -787,8 +781,9 @@
// Jenkins
- def jenkins_master_url_pillar = salt.getPillar(master, jenkins_compound, '_param:jenkins_master_url')
- jenkins_master_url = salt.getReturnValues(jenkins_master_url_pillar)
+ def jenkins_master_host_pillar = salt.getPillar(master, jenkins_compound, '_param:jenkins_master_host')
+ def jenkins_master_port_pillar = salt.getPillar(master, jenkins_compound, '_param:jenkins_master_port')
+ jenkins_master_url = "http://${salt.getReturnValues(jenkins_master_host_pillar)}:${salt.getReturnValues(jenkins_master_port_pillar)}"
timeout(wait_timeout) {
common.infoMsg('Waiting for Jenkins to come up..')
@@ -824,8 +819,8 @@
def installStacklight(master, extra_tgt = '') {
def common = new
def salt = new
- def retries_wait = 20
- def retries = 15
+ def step_retries_wait = 20
+ def step_retries = 15
def first_target
// Install core services for K8S environments:
@@ -834,9 +829,7 @@
// In case of OpenStack, those are already installed
if (common.checkContains('STACK_INSTALL', 'k8s')) {
salt.enforceStateWithTest([saltId: master, target: "I@glusterfs:client ${extra_tgt}", state: 'glusterfs.client', retries: 2])
- common.retry(3, 5){
- salt.enforceState([saltId: master, target: "I@nginx:server ${extra_tgt}", state: 'salt.minion.cert'])
- }
+ salt.enforceState([saltId: master, target: "I@nginx:server ${extra_tgt}", state: 'salt.minion.cert', retries: 3])
salt.enforceState([saltId: master, target: "I@haproxy:proxy ${extra_tgt}", state: 'haproxy'])
salt.runSaltProcessStep(master, "I@haproxy:proxy ${extra_tgt}", 'service.status', ['haproxy'])
@@ -849,9 +842,7 @@
salt.enforceState([saltId: master, target: "I@mongodb:server ${extra_tgt}", state: 'mongodb.server'])
// Initialize mongodb replica set
- common.retry(5,20){
- salt.enforceState([saltId: master, target: "I@mongodb:server ${extra_tgt}", state: 'mongodb.cluster'])
- }
+ salt.enforceState([saltId: master, target: "I@mongodb:server ${extra_tgt}", state: 'mongodb.cluster', retries: 5, retries_wait: 20])
//Install Telegraf
@@ -887,18 +878,14 @@
} else {
common.errorMsg('[ERROR] Elasticsearch VIP port could not be retrieved')
- common.retry(retries,retries_wait) {
+ common.retry(step_retries,step_retries_wait) {
common.infoMsg('Waiting for Elasticsearch to become green..')
salt.cmdRun(master, "I@elasticsearch:client ${extra_tgt}", "curl -sf ${elasticsearch_vip}:${elasticsearch_port}/_cat/health | awk '{print \$4}' | grep green")
- common.retry(retries,retries_wait) {
- salt.enforceState([saltId: master, target: "I@elasticsearch:client ${extra_tgt}", state: 'elasticsearch.client'])
- }
+ salt.enforceState([saltId: master, target: "I@elasticsearch:client ${extra_tgt}", state: 'elasticsearch.client', retries: step_retries, retries_wait: step_retries_wait])
- common.retry(retries,retries_wait) {
- salt.enforceState([saltId: master, target: "I@kibana:client ${extra_tgt}", state: 'kibana.client'])
- }
+ salt.enforceState([saltId: master, target: "I@kibana:client ${extra_tgt}", state: 'kibana.client', retries: step_retries, retries_wait: step_retries_wait])
//Install InfluxDB
if (salt.testTarget(master, "I@influxdb:server ${extra_tgt}")) {
diff --git a/src/com/mirantis/mk/Salt.groovy b/src/com/mirantis/mk/Salt.groovy
index 982a9c4..006df14 100644
--- a/src/com/mirantis/mk/Salt.groovy
+++ b/src/com/mirantis/mk/Salt.groovy
@@ -277,7 +277,7 @@
out = runSaltCommand(params.saltId, 'local', ['expression':, 'type': 'compound'], 'state.sls', params.batch, params.saltArgs.reverse(), kwargs, -1, params.read_timeout)
// failOnError should be passed as true because we need to throw exception for retry block handler
checkResult(out, true, params.output, true, retriesCounter < params.retries) //disable ask on error for every interation except last one
- sleep(retries_wait)
+ sleep(params['retries_wait'])
} else {
// we have to reverse order in saltArgs because salt state have to be first
diff --git a/src/com/mirantis/mk/SaltModelTesting.groovy b/src/com/mirantis/mk/SaltModelTesting.groovy
index 2dd9b38..c4bd4fa 100644
--- a/src/com/mirantis/mk/SaltModelTesting.groovy
+++ b/src/com/mirantis/mk/SaltModelTesting.groovy
@@ -268,6 +268,7 @@
def testNode(LinkedHashMap config) {
def common = new
def dockerHostname = config.get('dockerHostname')
+ def domain = config.get('domain')
def reclassEnv = config.get('reclassEnv')
def clusterName = config.get('clusterName', "")
def formulasSource = config.get('formulasSource', 'pkg')
@@ -278,8 +279,8 @@
def testContext = config.get('testContext', 'test')
config['envOpts'] = [
- "MASTER_HOSTNAME=${dockerHostname}", "CLUSTER_NAME=${clusterName}",
- "MINION_ID=${dockerHostname}", "FORMULAS_SOURCE=${formulasSource}",
+ "HOSTNAME=${dockerHostname}", "CLUSTER_NAME=${clusterName}",
+ "DOMAIN=${domain}", "FORMULAS_SOURCE=${formulasSource}",
"RECLASS_IGNORE_CLASS_NOTFOUND=${ignoreClassNotfound}", "DEBUG=1",
@@ -293,6 +294,7 @@
'002_Prepare_something' : {
sh('''#!/bin/bash -x
rsync -ah ${RECLASS_ENV}/* /srv/salt/reclass && echo ' salt' >> /etc/hosts
+ echo " ${HOSTNAME}.${DOMAIN}" >> /etc/hosts
if [ -f '/srv/salt/reclass/salt_master_pillar.asc' ] ; then
mkdir -p /etc/salt/gpgkeys
chmod 700 /etc/salt/gpgkeys