blob: 56e2722246bcf8016cc7070b42e714bfcec4c083 [file] [log] [blame]
Dmitry Tyzhnenko2b730a02017-04-07 19:31:32 +03001
2import time
3
4from tcp_tests import logger
5from tcp_tests.helpers.log_helpers import pretty_repr
6
7LOG = logger.logger
8
9
10class ExecuteCommandsMixin(object):
11 """docstring for ExecuteCommands"""
12
Dmitry Tyzhnenkobc0f8262017-04-28 15:39:26 +030013 __config = None
14 __underlay = None
15
16 def __init__(self, config, underlay):
17 self.__config = config
18 self.__underlay = underlay
19 super(ExecuteCommandsMixin, self).__init__()
20
Dmitry Tyzhnenko2b730a02017-04-07 19:31:32 +030021 def ensure_running_service(self, service_name, host, check_cmd,
22 state_running='start/running'):
23 """Check if the service_name running or try to restart it
24
Dmitry Tyzhnenko2b730a02017-04-07 19:31:32 +030025 :param node_name: node on which the service will be checked
26 :param check_cmd: shell command to ensure that the service is running
27 :param state_running: string for check the service state
28 """
29 cmd = "service {0} status | grep -q '{1}'".format(
30 service_name, state_running)
Dmitry Tyzhnenkobc0f8262017-04-28 15:39:26 +030031 with self.__underlay.remote(host=host) as remote:
Dmitry Tyzhnenko2b730a02017-04-07 19:31:32 +030032 result = remote.execute(cmd)
33 if result.exit_code != 0:
34 LOG.info("{0} is not in running state on the node {1},"
35 " trying to start".format(service_name, host))
36 cmd = ("service {0} stop;"
37 " sleep 3; killall -9 {0};"
38 "service {0} start; sleep 5;"
39 .format(service_name))
40 remote.execute(cmd)
41
42 remote.execute(check_cmd)
43 remote.execute(check_cmd)
44
45 def execute_commands(self, commands, label="Command"):
46 """Execute a sequence of commands
47
48 Main propose is to implement workarounds for salt formulas like:
49 - exit_code == 0 when there are actual failures
50 - salt_master and/or salt_minion stop working after executing a formula
51 - a formula fails at first run, but completes at next runs
52
53 :param label: label of the current sequence of the commands, for log
54 :param commands: list of dicts with the following data:
55 commands = [
56 ...
57 {
58 # Required:
59 'cmd': 'shell command(s) to run',
60 'node_name': 'name of the node to run the command(s)',
61 # Optional:
62 'description': 'string with a readable command description',
63 'retry': {
64 'count': int, # How many times should be run the command
65 # until success
66 'delay': int, # Delay between tries in seconds
67 },
68 'skip_fail': bool # If True - continue with the next step
69 # without failure even if count number
70 # is reached.
71 # If False - rise an exception (default)
72 },
73 ...
74 ]
75 """
76 for n, step in enumerate(commands):
77 # Required fields
78 cmd = step.get('cmd')
79 do = step.get('do')
80 # node_name = step.get('node_name')
81 # Optional fields
82 description = step.get('description', cmd)
83 # retry = step.get('retry', {'count': 1, 'delay': 1})
84 # retry_count = retry.get('count', 1)
85 # retry_delay = retry.get('delay', 1)
86 # skip_fail = step.get('skip_fail', False)
87
88 msg = "[ {0} #{1} ] {2}".format(label, n + 1, description)
89 LOG.info("\n\n{0}\n{1}".format(msg, '=' * len(msg)))
90
91 if cmd:
92 self.execute_command(step)
93 elif do:
94 self.command2(step)
95
96 def execute_command(self, step):
97 # Required fields
98 cmd = step.get('cmd')
99 node_name = step.get('node_name')
100 # Optional fields
101 description = step.get('description', cmd)
102 retry = step.get('retry', {'count': 1, 'delay': 1})
103 retry_count = retry.get('count', 1)
104 retry_delay = retry.get('delay', 1)
105 skip_fail = step.get('skip_fail', False)
106
Dmitry Tyzhnenkobc0f8262017-04-28 15:39:26 +0300107 with self.__underlay.remote(node_name=node_name) as remote:
Dmitry Tyzhnenko2b730a02017-04-07 19:31:32 +0300108
109 for x in range(retry_count, 0, -1):
110 time.sleep(3)
111 result = remote.execute(cmd, verbose=True)
112
113 # Workaround of exit code 0 from salt in case of failures
114 failed = 0
Dennis Dmitriev4db5bf22017-05-13 19:31:17 +0300115 for s in result['stdout'] + result['stderr']:
Dmitry Tyzhnenko2b730a02017-04-07 19:31:32 +0300116 if s.startswith("Failed:"):
117 failed += int(s.split("Failed:")[1])
Dennis Dmitriev68671a62017-05-13 16:40:32 +0300118 if 'Minion did not return. [No response]' in s:
119 failed += 1
Dennis Dmitrievf8546172017-07-20 21:57:05 +0300120 if 'Minion did not return. [Not connected]' in s:
121 failed += 1
Dennis Dmitriev68671a62017-05-13 16:40:32 +0300122 if s.startswith("[CRITICAL]"):
123 failed += 1
Dmitry Tyzhnenko2b730a02017-04-07 19:31:32 +0300124
125 if result.exit_code != 0:
126 time.sleep(retry_delay)
127 LOG.info(
128 " === RETRY ({0}/{1}) ========================="
129 .format(x - 1, retry_count))
130 elif failed != 0:
131 LOG.error(
132 " === SALT returned exit code = 0 while "
133 "there are failed modules! ===")
134 LOG.info(
135 " === RETRY ({0}/{1}) ======================="
136 .format(x - 1, retry_count))
137 else:
Dmitry Tyzhnenkobc0f8262017-04-28 15:39:26 +0300138 if self.__config.salt.salt_master_host != '0.0.0.0':
Dmitry Tyzhnenko2b730a02017-04-07 19:31:32 +0300139 # Workarounds for crashed services
140 self.ensure_running_service(
141 "salt-master",
Dmitry Tyzhnenkobc0f8262017-04-28 15:39:26 +0300142 self.__config.salt.salt_master_host,
Dmitry Tyzhnenko2b730a02017-04-07 19:31:32 +0300143 "salt-call pillar.items",
144 'active (running)') # Hardcoded for now
145 self.ensure_running_service(
146 "salt-minion",
Dmitry Tyzhnenkobc0f8262017-04-28 15:39:26 +0300147 self.__config.salt.salt_master_host,
Dmitry Tyzhnenko2b730a02017-04-07 19:31:32 +0300148 "salt 'cfg01*' pillar.items",
149 "active (running)") # Hardcoded for now
150 break
151
152 if x == 1 and skip_fail is False:
153 # In the last retry iteration, raise an exception
154 raise Exception("Step '{0}' failed"
155 .format(description))
156
157 def command2(self, step):
158 # Required fields
159 do = step['do']
160 target = step['target']
161 state = step.get('state')
162 states = step.get('states')
163 # Optional fields
164 args = step.get('args')
165 kwargs = step.get('kwargs')
166 description = step.get('description', do)
167 retry = step.get('retry', {'count': 1, 'delay': 1})
168 retry_count = retry.get('count', 1)
169 retry_delay = retry.get('delay', 1)
170 skip_fail = step.get('skip_fail', False)
171
172 if not bool(state) ^ bool(states):
173 raise ValueError("You should use state or states in step")
174
175 for x in range(retry_count, 0, -1):
176 time.sleep(3)
177
178 method = getattr(self._salt, self._salt._map[do])
179 command_ret = method(tgt=target, state=state or states,
180 args=args, kwargs=kwargs)
181 command_ret = command_ret if \
182 isinstance(command_ret, list) else [command_ret]
183 results = [(r['return'][0], f) for r, f in command_ret]
184
185 # FIMME: Change to debug level
186 LOG.info(" === States output =======================\n"
187 "{}\n"
188 " =========================================".format(
189 pretty_repr([r for r, f in results])))
190
191 all_fails = [f for r, f in results if f]
192 if all_fails:
193 LOG.error("States finished with failures.\n{}".format(
194 all_fails))
195 time.sleep(retry_delay)
196 LOG.info(" === RETRY ({0}/{1}) ========================="
197 .format(x - 1, retry_count))
198 else:
199 break
200
201 if x == 1 and skip_fail is False:
202 # In the last retry iteration, raise an exception
203 raise Exception("Step '{0}' failed"
204 .format(description))