blob: de9ab3ba7fbb01b0f7456c3c7494ea5dccc93519 [file] [log] [blame]
import time
from tcp_tests import logger
from tcp_tests.helpers.log_helpers import pretty_repr
LOG = logger.logger
class ExecuteCommandsMixin(object):
"""docstring for ExecuteCommands"""
__config = None
__underlay = None
def __init__(self, config, underlay):
self.__config = config
self.__underlay = underlay
super(ExecuteCommandsMixin, self).__init__()
def ensure_running_service(self, service_name, host, check_cmd,
state_running='start/running'):
"""Check if the service_name running or try to restart it
:param service_name: name of the service that will be checked
:param node_name: node on which the service will be checked
:param check_cmd: shell command to ensure that the service is running
:param state_running: string for check the service state
"""
cmd = "service {0} status | grep -q '{1}'".format(
service_name, state_running)
with self.__underlay.remote(host=host) as remote:
result = remote.execute(cmd)
if result.exit_code != 0:
LOG.info("{0} is not in running state on the node {1},"
" trying to start".format(service_name, host))
cmd = ("service {0} stop;"
" sleep 3; killall -9 {0};"
"service {0} start; sleep 5;"
.format(service_name))
remote.execute(cmd)
remote.execute(check_cmd)
remote.execute(check_cmd)
def execute_commands(self, commands, label="Command"):
"""Execute a sequence of commands
Main propose is to implement workarounds for salt formulas like:
- exit_code == 0 when there are actual failures
- salt_master and/or salt_minion stop working after executing a formula
- a formula fails at first run, but completes at next runs
:param label: label of the current sequence of the commands, for log
:param commands: list of dicts with the following data:
commands = [
...
{
# Required:
'cmd': 'shell command(s) to run',
'node_name': 'name of the node to run the command(s)',
# Optional:
'description': 'string with a readable command description',
'retry': {
'count': int, # How many times should be run the command
# until success
'delay': int, # Delay between tries in seconds
},
'skip_fail': bool # If True - continue with the next step
# without failure even if count number
# is reached.
# If False - rise an exception (default)
},
...
]
"""
for n, step in enumerate(commands):
# Required fields
cmd = step.get('cmd')
do = step.get('do')
# node_name = step.get('node_name')
# Optional fields
description = step.get('description', cmd)
# retry = step.get('retry', {'count': 1, 'delay': 1})
# retry_count = retry.get('count', 1)
# retry_delay = retry.get('delay', 1)
# skip_fail = step.get('skip_fail', False)
msg = "[ {0} #{1} ] {2}".format(label, n + 1, description)
LOG.info("\n\n{0}\n{1}".format(msg, '=' * len(msg)))
if cmd:
self.execute_command(step)
elif do:
self.command2(step)
def execute_command(self, step):
# Required fields
cmd = step.get('cmd')
node_name = step.get('node_name')
# Optional fields
description = step.get('description', cmd)
retry = step.get('retry', {'count': 1, 'delay': 1})
retry_count = retry.get('count', 1)
retry_delay = retry.get('delay', 1)
skip_fail = step.get('skip_fail', False)
with self.__underlay.remote(node_name=node_name) as remote:
for x in range(retry_count, 0, -1):
time.sleep(3)
result = remote.execute(cmd, verbose=True)
# Workaround of exit code 0 from salt in case of failures
failed = 0
for s in result['stdout']:
if s.startswith("Failed:"):
failed += int(s.split("Failed:")[1])
if 'Minion did not return. [No response]' in s:
failed += 1
if s.startswith("[CRITICAL]"):
failed += 1
if result.exit_code != 0:
time.sleep(retry_delay)
LOG.info(
" === RETRY ({0}/{1}) ========================="
.format(x - 1, retry_count))
elif failed != 0:
LOG.error(
" === SALT returned exit code = 0 while "
"there are failed modules! ===")
LOG.info(
" === RETRY ({0}/{1}) ======================="
.format(x - 1, retry_count))
else:
if self.__config.salt.salt_master_host != '0.0.0.0':
# Workarounds for crashed services
self.ensure_running_service(
"salt-master",
self.__config.salt.salt_master_host,
"salt-call pillar.items",
'active (running)') # Hardcoded for now
self.ensure_running_service(
"salt-minion",
self.__config.salt.salt_master_host,
"salt 'cfg01*' pillar.items",
"active (running)") # Hardcoded for now
break
if x == 1 and skip_fail is False:
# In the last retry iteration, raise an exception
raise Exception("Step '{0}' failed"
.format(description))
def command2(self, step):
# Required fields
do = step['do']
target = step['target']
state = step.get('state')
states = step.get('states')
# Optional fields
args = step.get('args')
kwargs = step.get('kwargs')
description = step.get('description', do)
retry = step.get('retry', {'count': 1, 'delay': 1})
retry_count = retry.get('count', 1)
retry_delay = retry.get('delay', 1)
skip_fail = step.get('skip_fail', False)
if not bool(state) ^ bool(states):
raise ValueError("You should use state or states in step")
for x in range(retry_count, 0, -1):
time.sleep(3)
method = getattr(self._salt, self._salt._map[do])
command_ret = method(tgt=target, state=state or states,
args=args, kwargs=kwargs)
command_ret = command_ret if \
isinstance(command_ret, list) else [command_ret]
results = [(r['return'][0], f) for r, f in command_ret]
# FIMME: Change to debug level
LOG.info(" === States output =======================\n"
"{}\n"
" =========================================".format(
pretty_repr([r for r, f in results])))
all_fails = [f for r, f in results if f]
if all_fails:
LOG.error("States finished with failures.\n{}".format(
all_fails))
time.sleep(retry_delay)
LOG.info(" === RETRY ({0}/{1}) ========================="
.format(x - 1, retry_count))
else:
break
if x == 1 and skip_fail is False:
# In the last retry iteration, raise an exception
raise Exception("Step '{0}' failed"
.format(description))