Blame - tcp_tests/managers/execute_commands.py - mcp/tcp-qa

blob: 56e2722246bcf8016cc7070b42e714bfcec4c083 [file] [log] [blame]

Dmitry Tyzhnenko	2b730a0	2017-04-07 19:31:32 +0300	[diff] [blame]	1
				2	import time
				3
				4	from tcp_tests import logger
				5	from tcp_tests.helpers.log_helpers import pretty_repr
				6
				7	LOG = logger.logger
				8
				9
				10	class ExecuteCommandsMixin(object):
				11	"""docstring for ExecuteCommands"""
				12
Dmitry Tyzhnenko	bc0f826	2017-04-28 15:39:26 +0300	[diff] [blame]	13	__config = None
				14	__underlay = None
				15
				16	def __init__(self, config, underlay):
				17	self.__config = config
				18	self.__underlay = underlay
				19	super(ExecuteCommandsMixin, self).__init__()
				20
Dmitry Tyzhnenko	2b730a0	2017-04-07 19:31:32 +0300	[diff] [blame]	21	def ensure_running_service(self, service_name, host, check_cmd,
				22	state_running='start/running'):
				23	"""Check if the service_name running or try to restart it
				24
Dmitry Tyzhnenko	2b730a0	2017-04-07 19:31:32 +0300	[diff] [blame]	25	:param node_name: node on which the service will be checked
				26	:param check_cmd: shell command to ensure that the service is running
				27	:param state_running: string for check the service state
				28	"""
				29	cmd = "service {0} status \| grep -q '{1}'".format(
				30	service_name, state_running)
Dmitry Tyzhnenko	bc0f826	2017-04-28 15:39:26 +0300	[diff] [blame]	31	with self.__underlay.remote(host=host) as remote:
Dmitry Tyzhnenko	2b730a0	2017-04-07 19:31:32 +0300	[diff] [blame]	32	result = remote.execute(cmd)
				33	if result.exit_code != 0:
				34	LOG.info("{0} is not in running state on the node {1},"
				35	" trying to start".format(service_name, host))
				36	cmd = ("service {0} stop;"
				37	" sleep 3; killall -9 {0};"
				38	"service {0} start; sleep 5;"
				39	.format(service_name))
				40	remote.execute(cmd)
				41
				42	remote.execute(check_cmd)
				43	remote.execute(check_cmd)
				44
				45	def execute_commands(self, commands, label="Command"):
				46	"""Execute a sequence of commands
				47
				48	Main propose is to implement workarounds for salt formulas like:
				49	- exit_code == 0 when there are actual failures
				50	- salt_master and/or salt_minion stop working after executing a formula
				51	- a formula fails at first run, but completes at next runs
				52
				53	:param label: label of the current sequence of the commands, for log
				54	:param commands: list of dicts with the following data:
				55	commands = [
				56	...
				57	{
				58	# Required:
				59	'cmd': 'shell command(s) to run',
				60	'node_name': 'name of the node to run the command(s)',
				61	# Optional:
				62	'description': 'string with a readable command description',
				63	'retry': {
				64	'count': int, # How many times should be run the command
				65	# until success
				66	'delay': int, # Delay between tries in seconds
				67	},
				68	'skip_fail': bool # If True - continue with the next step
				69	# without failure even if count number
				70	# is reached.
				71	# If False - rise an exception (default)
				72	},
				73	...
				74	]
				75	"""
				76	for n, step in enumerate(commands):
				77	# Required fields
				78	cmd = step.get('cmd')
				79	do = step.get('do')
				80	# node_name = step.get('node_name')
				81	# Optional fields
				82	description = step.get('description', cmd)
				83	# retry = step.get('retry', {'count': 1, 'delay': 1})
				84	# retry_count = retry.get('count', 1)
				85	# retry_delay = retry.get('delay', 1)
				86	# skip_fail = step.get('skip_fail', False)
				87
				88	msg = "[ {0} #{1} ] {2}".format(label, n + 1, description)
				89	LOG.info("\n\n{0}\n{1}".format(msg, '=' * len(msg)))
				90
				91	if cmd:
				92	self.execute_command(step)
				93	elif do:
				94	self.command2(step)
				95
				96	def execute_command(self, step):
				97	# Required fields
				98	cmd = step.get('cmd')
				99	node_name = step.get('node_name')
				100	# Optional fields
				101	description = step.get('description', cmd)
				102	retry = step.get('retry', {'count': 1, 'delay': 1})
				103	retry_count = retry.get('count', 1)
				104	retry_delay = retry.get('delay', 1)
				105	skip_fail = step.get('skip_fail', False)
				106
Dmitry Tyzhnenko	bc0f826	2017-04-28 15:39:26 +0300	[diff] [blame]	107	with self.__underlay.remote(node_name=node_name) as remote:
Dmitry Tyzhnenko	2b730a0	2017-04-07 19:31:32 +0300	[diff] [blame]	108
				109	for x in range(retry_count, 0, -1):
				110	time.sleep(3)
				111	result = remote.execute(cmd, verbose=True)
				112
				113	# Workaround of exit code 0 from salt in case of failures
				114	failed = 0
Dennis Dmitriev	4db5bf2	2017-05-13 19:31:17 +0300	[diff] [blame]	115	for s in result['stdout'] + result['stderr']:
Dmitry Tyzhnenko	2b730a0	2017-04-07 19:31:32 +0300	[diff] [blame]	116	if s.startswith("Failed:"):
				117	failed += int(s.split("Failed:")[1])
Dennis Dmitriev	68671a6	2017-05-13 16:40:32 +0300	[diff] [blame]	118	if 'Minion did not return. [No response]' in s:
				119	failed += 1
Dennis Dmitriev	f854617	2017-07-20 21:57:05 +0300	[diff] [blame]	120	if 'Minion did not return. [Not connected]' in s:
				121	failed += 1
Dennis Dmitriev	68671a6	2017-05-13 16:40:32 +0300	[diff] [blame]	122	if s.startswith("[CRITICAL]"):
				123	failed += 1
Dmitry Tyzhnenko	2b730a0	2017-04-07 19:31:32 +0300	[diff] [blame]	124
				125	if result.exit_code != 0:
				126	time.sleep(retry_delay)
				127	LOG.info(
				128	" === RETRY ({0}/{1}) ========================="
				129	.format(x - 1, retry_count))
				130	elif failed != 0:
				131	LOG.error(
				132	" === SALT returned exit code = 0 while "
				133	"there are failed modules! ===")
				134	LOG.info(
				135	" === RETRY ({0}/{1}) ======================="
				136	.format(x - 1, retry_count))
				137	else:
Dmitry Tyzhnenko	bc0f826	2017-04-28 15:39:26 +0300	[diff] [blame]	138	if self.__config.salt.salt_master_host != '0.0.0.0':
Dmitry Tyzhnenko	2b730a0	2017-04-07 19:31:32 +0300	[diff] [blame]	139	# Workarounds for crashed services
				140	self.ensure_running_service(
				141	"salt-master",
Dmitry Tyzhnenko	bc0f826	2017-04-28 15:39:26 +0300	[diff] [blame]	142	self.__config.salt.salt_master_host,
Dmitry Tyzhnenko	2b730a0	2017-04-07 19:31:32 +0300	[diff] [blame]	143	"salt-call pillar.items",
				144	'active (running)') # Hardcoded for now
				145	self.ensure_running_service(
				146	"salt-minion",
Dmitry Tyzhnenko	bc0f826	2017-04-28 15:39:26 +0300	[diff] [blame]	147	self.__config.salt.salt_master_host,
Dmitry Tyzhnenko	2b730a0	2017-04-07 19:31:32 +0300	[diff] [blame]	148	"salt 'cfg01*' pillar.items",
				149	"active (running)") # Hardcoded for now
				150	break
				151
				152	if x == 1 and skip_fail is False:
				153	# In the last retry iteration, raise an exception
				154	raise Exception("Step '{0}' failed"
				155	.format(description))
				156
				157	def command2(self, step):
				158	# Required fields
				159	do = step['do']
				160	target = step['target']
				161	state = step.get('state')
				162	states = step.get('states')
				163	# Optional fields
				164	args = step.get('args')
				165	kwargs = step.get('kwargs')
				166	description = step.get('description', do)
				167	retry = step.get('retry', {'count': 1, 'delay': 1})
				168	retry_count = retry.get('count', 1)
				169	retry_delay = retry.get('delay', 1)
				170	skip_fail = step.get('skip_fail', False)
				171
				172	if not bool(state) ^ bool(states):
				173	raise ValueError("You should use state or states in step")
				174
				175	for x in range(retry_count, 0, -1):
				176	time.sleep(3)
				177
				178	method = getattr(self._salt, self._salt._map[do])
				179	command_ret = method(tgt=target, state=state or states,
				180	args=args, kwargs=kwargs)
				181	command_ret = command_ret if \
				182	isinstance(command_ret, list) else [command_ret]
				183	results = [(r['return'][0], f) for r, f in command_ret]
				184
				185	# FIMME: Change to debug level
				186	LOG.info(" === States output =======================\n"
				187	"{}\n"
				188	" =========================================".format(
				189	pretty_repr([r for r, f in results])))
				190
				191	all_fails = [f for r, f in results if f]
				192	if all_fails:
				193	LOG.error("States finished with failures.\n{}".format(
				194	all_fails))
				195	time.sleep(retry_delay)
				196	LOG.info(" === RETRY ({0}/{1}) ========================="
				197	.format(x - 1, retry_count))
				198	else:
				199	break
				200
				201	if x == 1 and skip_fail is False:
				202	# In the last retry iteration, raise an exception
				203	raise Exception("Step '{0}' failed"
				204	.format(description))