Blame - tcp_tests/managers/execute_commands.py - mcp/tcp-qa

blob: 76f4bc9c1094d12c00a0cc03ce83473f95a13b89 [file] [log] [blame]

Dmitry Tyzhnenko	2b730a0	2017-04-07 19:31:32 +0300	[diff] [blame^]	1
				2	import time
				3
				4	from tcp_tests import logger
				5	from tcp_tests.helpers.log_helpers import pretty_repr
				6
				7	LOG = logger.logger
				8
				9
				10	class ExecuteCommandsMixin(object):
				11	"""docstring for ExecuteCommands"""
				12
				13	def ensure_running_service(self, service_name, host, check_cmd,
				14	state_running='start/running'):
				15	"""Check if the service_name running or try to restart it
				16
				17	:param service_name: name of the service that will be checked
				18	:param node_name: node on which the service will be checked
				19	:param check_cmd: shell command to ensure that the service is running
				20	:param state_running: string for check the service state
				21	"""
				22	cmd = "service {0} status \| grep -q '{1}'".format(
				23	service_name, state_running)
				24	with self._underlay.remote(host=host) as remote:
				25	result = remote.execute(cmd)
				26	if result.exit_code != 0:
				27	LOG.info("{0} is not in running state on the node {1},"
				28	" trying to start".format(service_name, host))
				29	cmd = ("service {0} stop;"
				30	" sleep 3; killall -9 {0};"
				31	"service {0} start; sleep 5;"
				32	.format(service_name))
				33	remote.execute(cmd)
				34
				35	remote.execute(check_cmd)
				36	remote.execute(check_cmd)
				37
				38	def execute_commands(self, commands, label="Command"):
				39	"""Execute a sequence of commands
				40
				41	Main propose is to implement workarounds for salt formulas like:
				42	- exit_code == 0 when there are actual failures
				43	- salt_master and/or salt_minion stop working after executing a formula
				44	- a formula fails at first run, but completes at next runs
				45
				46	:param label: label of the current sequence of the commands, for log
				47	:param commands: list of dicts with the following data:
				48	commands = [
				49	...
				50	{
				51	# Required:
				52	'cmd': 'shell command(s) to run',
				53	'node_name': 'name of the node to run the command(s)',
				54	# Optional:
				55	'description': 'string with a readable command description',
				56	'retry': {
				57	'count': int, # How many times should be run the command
				58	# until success
				59	'delay': int, # Delay between tries in seconds
				60	},
				61	'skip_fail': bool # If True - continue with the next step
				62	# without failure even if count number
				63	# is reached.
				64	# If False - rise an exception (default)
				65	},
				66	...
				67	]
				68	"""
				69	for n, step in enumerate(commands):
				70	# Required fields
				71	cmd = step.get('cmd')
				72	do = step.get('do')
				73	# node_name = step.get('node_name')
				74	# Optional fields
				75	description = step.get('description', cmd)
				76	# retry = step.get('retry', {'count': 1, 'delay': 1})
				77	# retry_count = retry.get('count', 1)
				78	# retry_delay = retry.get('delay', 1)
				79	# skip_fail = step.get('skip_fail', False)
				80
				81	msg = "[ {0} #{1} ] {2}".format(label, n + 1, description)
				82	LOG.info("\n\n{0}\n{1}".format(msg, '=' * len(msg)))
				83
				84	if cmd:
				85	self.execute_command(step)
				86	elif do:
				87	self.command2(step)
				88
				89	def execute_command(self, step):
				90	# Required fields
				91	cmd = step.get('cmd')
				92	node_name = step.get('node_name')
				93	# Optional fields
				94	description = step.get('description', cmd)
				95	retry = step.get('retry', {'count': 1, 'delay': 1})
				96	retry_count = retry.get('count', 1)
				97	retry_delay = retry.get('delay', 1)
				98	skip_fail = step.get('skip_fail', False)
				99
				100	with self._underlay.remote(node_name=node_name) as remote:
				101
				102	for x in range(retry_count, 0, -1):
				103	time.sleep(3)
				104	result = remote.execute(cmd, verbose=True)
				105
				106	# Workaround of exit code 0 from salt in case of failures
				107	failed = 0
				108	for s in result['stdout']:
				109	if s.startswith("Failed:"):
				110	failed += int(s.split("Failed:")[1])
				111
				112	if result.exit_code != 0:
				113	time.sleep(retry_delay)
				114	LOG.info(
				115	" === RETRY ({0}/{1}) ========================="
				116	.format(x - 1, retry_count))
				117	elif failed != 0:
				118	LOG.error(
				119	" === SALT returned exit code = 0 while "
				120	"there are failed modules! ===")
				121	LOG.info(
				122	" === RETRY ({0}/{1}) ======================="
				123	.format(x - 1, retry_count))
				124	else:
				125	if self._config.salt.salt_master_host != '0.0.0.0':
				126	# Workarounds for crashed services
				127	self.ensure_running_service(
				128	"salt-master",
				129	self._config.salt.salt_master_host,
				130	"salt-call pillar.items",
				131	'active (running)') # Hardcoded for now
				132	self.ensure_running_service(
				133	"salt-minion",
				134	self._config.salt.salt_master_host,
				135	"salt 'cfg01*' pillar.items",
				136	"active (running)") # Hardcoded for now
				137	break
				138
				139	if x == 1 and skip_fail is False:
				140	# In the last retry iteration, raise an exception
				141	raise Exception("Step '{0}' failed"
				142	.format(description))
				143
				144	def command2(self, step):
				145	# Required fields
				146	do = step['do']
				147	target = step['target']
				148	state = step.get('state')
				149	states = step.get('states')
				150	# Optional fields
				151	args = step.get('args')
				152	kwargs = step.get('kwargs')
				153	description = step.get('description', do)
				154	retry = step.get('retry', {'count': 1, 'delay': 1})
				155	retry_count = retry.get('count', 1)
				156	retry_delay = retry.get('delay', 1)
				157	skip_fail = step.get('skip_fail', False)
				158
				159	if not bool(state) ^ bool(states):
				160	raise ValueError("You should use state or states in step")
				161
				162	for x in range(retry_count, 0, -1):
				163	time.sleep(3)
				164
				165	method = getattr(self._salt, self._salt._map[do])
				166	command_ret = method(tgt=target, state=state or states,
				167	args=args, kwargs=kwargs)
				168	command_ret = command_ret if \
				169	isinstance(command_ret, list) else [command_ret]
				170	results = [(r['return'][0], f) for r, f in command_ret]
				171
				172	# FIMME: Change to debug level
				173	LOG.info(" === States output =======================\n"
				174	"{}\n"
				175	" =========================================".format(
				176	pretty_repr([r for r, f in results])))
				177
				178	all_fails = [f for r, f in results if f]
				179	if all_fails:
				180	LOG.error("States finished with failures.\n{}".format(
				181	all_fails))
				182	time.sleep(retry_delay)
				183	LOG.info(" === RETRY ({0}/{1}) ========================="
				184	.format(x - 1, retry_count))
				185	else:
				186	break
				187
				188	if x == 1 and skip_fail is False:
				189	# In the last retry iteration, raise an exception
				190	raise Exception("Step '{0}' failed"
				191	.format(description))