Merge "Make minion restart timeout configurable"
diff --git a/src/com/mirantis/mk/Openstack.groovy b/src/com/mirantis/mk/Openstack.groovy
index 04ce85e..eedfbd8 100644
--- a/src/com/mirantis/mk/Openstack.groovy
+++ b/src/com/mirantis/mk/Openstack.groovy
@@ -478,31 +478,3 @@
salt.runSaltProcessStep(env, 'I@galera:slave', 'service.start', ['mysql'])
}
-
-/**
- * Recovers
- * @param master Salt master
- * @param recoverHost Hostname of the node to be recovered
- * @param healthyHost Hostname of healthy node from the same cluster
- * @return output of salt commands
- */
-def recoverGluster(master, recoverHost, healthyHost) {
- def salt = new com.mirantis.mk.Salt()
-
- // Recover glusterfs
- if (salt.testTarget(master, 'I@glusterfs:server')) {
- salt.enforceState(master, 'I@glusterfs:server', 'glusterfs.server.service')
- if (healthyHost != 'none' && recoverHost != 'none') {
- salt.runSaltCommand(master, 'local', ['expression': "E@${healthyHost}", 'type': 'compound'], "cp.push /var/lib/glusterd/vols/ upload_path='/tmp/'")
- salt.runSaltCommand(master, 'local', ['expression': "E@${recoverHost}", 'type': 'compound'], "get_dir salt://tmp/vols/ /var/lib/glusterd/")
- }
- salt.enforceState(master, 'I@glusterfs:server and *01*', 'glusterfs.server.setup', true, true, null, false, -1, 5)
- sleep(10)
- salt.cmdRun(master, 'I@glusterfs:server', "gluster peer status; gluster volume status")
- }
-
- // Ensure glusterfs clusters is ready
- if (salt.testTarget(master, 'I@glusterfs:client')) {
- salt.enforceState(master, 'I@glusterfs:client', 'glusterfs.client')
- }
-}
\ No newline at end of file
diff --git a/src/com/mirantis/mk/Orchestrate.groovy b/src/com/mirantis/mk/Orchestrate.groovy
index b0ecd96..b9ad8ad 100644
--- a/src/com/mirantis/mk/Orchestrate.groovy
+++ b/src/com/mirantis/mk/Orchestrate.groovy
@@ -37,7 +37,9 @@
} catch (Throwable e) {
common.warningMsg('Salt state salt.minion.base is not present in the Salt-formula yet.')
}
- salt.enforceState(master, '*', ['linux.system'])
+ common.retry(2,5){
+ salt.enforceState(master, '*', ['linux.system'])
+ }
if (staticMgmtNet) {
salt.runSaltProcessStep(master, '*', 'cmd.shell', ["salt-call state.sls linux.network; salt-call service.restart salt-minion"], null, true, 60)
}
@@ -75,7 +77,9 @@
} catch (Throwable e) {
common.warningMsg('Salt state salt.minion.base is not present in the Salt-formula yet.')
}
- salt.enforceState(master, target, ['linux.system'])
+ common.retry(2,5){
+ salt.enforceState(master, target, ['linux.system'])
+ }
if (staticMgmtNet) {
salt.runSaltProcessStep(master, target, 'cmd.shell', ["salt-call state.sls linux.network; salt-call service.restart salt-minion"], null, true, 60)
}
@@ -94,37 +98,35 @@
def installInfraKvm(master) {
def common = new com.mirantis.mk.Common()
def salt = new com.mirantis.mk.Salt()
- salt.fullRefresh(master, 'I@linux:system')
- def infra_conpund = 'I@salt:control'
+ def infra_compound = 'I@salt:control'
def minions = []
def wait_timeout = 10
def retries = wait_timeout * 30
+ salt.fullRefresh(master, 'I@linux:system')
salt.enforceState(master, 'I@salt:control', ['salt.minion'], true, false, null, false, 60, 2)
salt.enforceState(master, 'I@salt:control', ['linux.system', 'linux.network', 'ntp', 'rsyslog'])
salt.enforceState(master, 'I@salt:control', 'libvirt')
salt.enforceState(master, 'I@salt:control', 'salt.control')
- timeout(wait_timeout) {
- common.infoMsg("Waiting for minions to come up...")
- if (salt.testTarget(master, infra_conpund)) {
- // Gathering minions
- for ( infra_node in salt.getMinionsSorted(master, infra_conpund) ) {
- def pillar = salt.getPillar(master, infra_node, 'salt:control:cluster')
- if ( !pillar['return'].isEmpty() ) {
- for ( cluster in pillar['return'][0].values() ) {
- def engine = cluster.values()[0]['engine']
- def domain = cluster.values()[0]['domain']
- def node = cluster.values()[0]['node']
- if ( engine == "virt" ) {
- def nodes = node.values()
- if ( !nodes.isEmpty() ) {
- for ( vm in nodes ) {
- if ( vm['name'] != null ) {
- def vm_fqdn = vm['name'] + '.' + domain
- if ( !minions.contains(vm_fqdn) ) {
- minions.add(vm_fqdn)
- }
+ common.infoMsg("Building minions list...")
+ if (salt.testTarget(master, infra_compound)) {
+ // Gathering minions
+ for ( infra_node in salt.getMinionsSorted(master, infra_compound) ) {
+ def pillar = salt.getPillar(master, infra_node, 'salt:control:cluster')
+ if ( !pillar['return'].isEmpty() ) {
+ for ( cluster in pillar['return'][0].values() ) {
+ def engine = cluster.values()[0]['engine']
+ def domain = cluster.values()[0]['domain']
+ def node = cluster.values()[0]['node']
+ if ( engine == "virt" ) {
+ def nodes = node.values()
+ if ( !nodes.isEmpty() ) {
+ for ( vm in nodes ) {
+ if ( vm['name'] != null ) {
+ def vm_fqdn = vm['name'] + '.' + domain
+ if ( !minions.contains(vm_fqdn) ) {
+ minions.add(vm_fqdn)
}
}
}
@@ -133,13 +135,18 @@
}
}
}
+ }
- def minions_compound = minions.join(' or ')
- common.infoMsg('Waiting for next minions to register: ' + minions_compound,)
+ def minions_compound = minions.join(' or ')
+
+ common.infoMsg("Waiting for next minions to register within ${wait_timeout} minutes: " + minions_compound)
+ timeout(time: wait_timeout, unit: 'MINUTES') {
salt.minionsPresentFromList(master, 'I@salt:master', minions, true, null, true, retries, 1)
- common.infoMsg('Waiting for minions to respond')
- salt.minionsReachable(master, 'I@salt:master', minions_compound )
+ }
+ common.infoMsg('Waiting for minions to respond')
+ timeout(time: wait_timeout, unit: 'MINUTES') {
+ salt.minionsReachable(master, 'I@salt:master', minions_compound)
}
common.infoMsg("All minions are up.")
@@ -771,6 +778,8 @@
def installStacklight(master) {
def common = new com.mirantis.mk.Common()
def salt = new com.mirantis.mk.Salt()
+ def retries_wait = 20
+ def retries = 15
// Install core services for K8S environments:
// HAProxy, Nginx and lusterFS clients
@@ -790,18 +799,14 @@
// Install MongoDB for Alerta
if (salt.testTarget(master, 'I@mongodb:server')) {
- salt.enforceState(master, 'I@mongodb:server', 'mongodb')
- }
+ salt.enforceState(master, 'I@mongodb:server', 'mongodb.server')
- // Configure Alerta
- if (salt.testTarget(master, 'I@prometheus:alerta')) {
- salt.enforceState(master, 'I@docker:swarm and I@prometheus:alerta', 'prometheus.alerta')
+ // Initialize mongodb replica set
+ common.retry(5,20){
+ salt.enforceState(master, 'I@mongodb:server', 'mongodb.cluster')
+ }
}
- // Launch containers
- salt.enforceState(master, 'I@docker:swarm:role:master and I@prometheus:server', 'docker.client')
- salt.runSaltProcessStep(master, 'I@docker:swarm and I@prometheus:server', 'dockerng.ps')
-
//Install Telegraf
salt.enforceState(master, 'I@telegraf:agent or I@telegraf:remote_agent', 'telegraf')
@@ -815,8 +820,34 @@
salt.enforceState(master, 'I@elasticsearch:server', 'elasticsearch.server')
salt.enforceState(master, '*01* and I@kibana:server', 'kibana.server')
salt.enforceState(master, 'I@kibana:server', 'kibana.server')
- salt.enforceState(master, 'I@elasticsearch:client', 'elasticsearch.client')
- salt.enforceState(master, 'I@kibana:client', 'kibana.client')
+
+ // Check ES health cluster status
+ def pillar = salt.getPillar(master, 'I@elasticsearch:client', 'elasticsearch:client:server:host')
+ def elasticsearch_vip
+ if(!pillar['return'].isEmpty()) {
+ elasticsearch_vip = pillar['return'][0].values()[0]
+ } else {
+ common.errorMsg('[ERROR] Elasticsearch VIP address could not be retrieved')
+ }
+ pillar = salt.getPillar(master, 'I@elasticsearch:client', 'elasticsearch:client:server:port')
+ def elasticsearch_port
+ if(!pillar['return'].isEmpty()) {
+ elasticsearch_port = pillar['return'][0].values()[0]
+ } else {
+ common.errorMsg('[ERROR] Elasticsearch VIP port could not be retrieved')
+ }
+ common.retry(retries,retries_wait) {
+ common.infoMsg('Waiting for Elasticsearch to become green..')
+ salt.cmdRun(master, 'I@elasticsearch:client', "curl -sf ${elasticsearch_vip}:${elasticsearch_port}/_cat/health | awk '{print \$4}' | grep green")
+ }
+
+ common.retry(retries,retries_wait) {
+ salt.enforceState(master, 'I@elasticsearch:client', 'elasticsearch.client')
+ }
+
+ common.retry(retries,retries_wait) {
+ salt.enforceState(master, 'I@kibana:client', 'kibana.client')
+ }
//Install InfluxDB
if (salt.testTarget(master, 'I@influxdb:server')) {
@@ -824,11 +855,6 @@
salt.enforceState(master, 'I@influxdb:server', 'influxdb')
}
- //Install Prometheus LTS
- if (salt.testTarget(master, 'I@prometheus:relay')) {
- salt.enforceState(master, 'I@prometheus:relay', 'prometheus')
- }
-
// Install service for the log collection
if (salt.testTarget(master, 'I@fluentd:agent')) {
salt.enforceState(master, 'I@fluentd:agent', 'fluentd')
@@ -866,13 +892,22 @@
salt.enforceState(master, 'I@docker:swarm and I@prometheus:server', 'heka.remote_collector', true, false)
}
+ // Launch containers
+ salt.enforceState(master, 'I@docker:swarm:role:master and I@prometheus:server', 'docker.client')
+ salt.runSaltProcessStep(master, 'I@docker:swarm and I@prometheus:server', 'dockerng.ps')
+
+ //Install Prometheus LTS
+ if (salt.testTarget(master, 'I@prometheus:relay')) {
+ salt.enforceState(master, 'I@prometheus:relay', 'prometheus')
+ }
+
// Install sphinx server
if (salt.testTarget(master, 'I@sphinx:server')) {
salt.enforceState(master, 'I@sphinx:server', 'sphinx')
}
//Configure Grafana
- def pillar = salt.getPillar(master, 'ctl01*', '_param:stacklight_monitor_address')
+ pillar = salt.getPillar(master, 'ctl01*', '_param:stacklight_monitor_address')
common.prettyPrint(pillar)
def stacklight_vip
diff --git a/src/com/mirantis/mk/Salt.groovy b/src/com/mirantis/mk/Salt.groovy
index 46f27a6..188af61 100644
--- a/src/com/mirantis/mk/Salt.groovy
+++ b/src/com/mirantis/mk/Salt.groovy
@@ -270,27 +270,35 @@
if (waitUntilPresent){
def count = 0
while(count < maxRetries) {
+ try {
+ def out = runSaltCommand(saltId, 'local', ['expression': target, 'type': 'compound'], 'cmd.shell', batch, [cmd], null, 5)
+ if (output) {
+ printSaltCommandResult(out)
+ }
+ def valueMap = out["return"][0]
+ def result = valueMap.get(valueMap.keySet()[0])
+ def resultsArray = result.tokenize("\n")
+ def size = resultsArray.size()
+ if (size >= answers) {
+ return out
+ }
+ count++
+ sleep(time: 1000, unit: 'MILLISECONDS')
+ common.infoMsg("Waiting for ${cmd} on ${target} to be in correct state")
+ } catch (Exception er) {
+ common.infoMsg('[WARNING]: runSaltCommand command read timeout within 5 seconds. You have very slow or broken environment')
+ }
+ }
+ } else {
+ try {
def out = runSaltCommand(saltId, 'local', ['expression': target, 'type': 'compound'], 'cmd.shell', batch, [cmd], null, 5)
if (output) {
printSaltCommandResult(out)
}
- def valueMap = out["return"][0]
- def result = valueMap.get(valueMap.keySet()[0])
- def resultsArray = result.tokenize("\n")
- def size = resultsArray.size()
- if (size >= answers) {
- return out
- }
- count++
- sleep(time: 1000, unit: 'MILLISECONDS')
- common.infoMsg("Waiting for ${cmd} on ${target} to be in correct state")
+ return out
+ } catch (Exception er) {
+ common.infoMsg('[WARNING]: runSaltCommand command read timeout within 5 seconds. You have very slow or broken environment')
}
- } else {
- def out = runSaltCommand(saltId, 'local', ['expression': target, 'type': 'compound'], 'cmd.shell', batch, [cmd], null, 5)
- if (output) {
- printSaltCommandResult(out)
- }
- return out
}
// otherwise throw exception
common.errorMsg("Status of command ${cmd} on ${target} failed, please check it.")