blob: 76f4bc9c1094d12c00a0cc03ce83473f95a13b89 [file] [log] [blame]
Dmitry Tyzhnenko2b730a02017-04-07 19:31:32 +03001
2import time
3
4from tcp_tests import logger
5from tcp_tests.helpers.log_helpers import pretty_repr
6
7LOG = logger.logger
8
9
10class ExecuteCommandsMixin(object):
11 """docstring for ExecuteCommands"""
12
13 def ensure_running_service(self, service_name, host, check_cmd,
14 state_running='start/running'):
15 """Check if the service_name running or try to restart it
16
17 :param service_name: name of the service that will be checked
18 :param node_name: node on which the service will be checked
19 :param check_cmd: shell command to ensure that the service is running
20 :param state_running: string for check the service state
21 """
22 cmd = "service {0} status | grep -q '{1}'".format(
23 service_name, state_running)
24 with self._underlay.remote(host=host) as remote:
25 result = remote.execute(cmd)
26 if result.exit_code != 0:
27 LOG.info("{0} is not in running state on the node {1},"
28 " trying to start".format(service_name, host))
29 cmd = ("service {0} stop;"
30 " sleep 3; killall -9 {0};"
31 "service {0} start; sleep 5;"
32 .format(service_name))
33 remote.execute(cmd)
34
35 remote.execute(check_cmd)
36 remote.execute(check_cmd)
37
38 def execute_commands(self, commands, label="Command"):
39 """Execute a sequence of commands
40
41 Main propose is to implement workarounds for salt formulas like:
42 - exit_code == 0 when there are actual failures
43 - salt_master and/or salt_minion stop working after executing a formula
44 - a formula fails at first run, but completes at next runs
45
46 :param label: label of the current sequence of the commands, for log
47 :param commands: list of dicts with the following data:
48 commands = [
49 ...
50 {
51 # Required:
52 'cmd': 'shell command(s) to run',
53 'node_name': 'name of the node to run the command(s)',
54 # Optional:
55 'description': 'string with a readable command description',
56 'retry': {
57 'count': int, # How many times should be run the command
58 # until success
59 'delay': int, # Delay between tries in seconds
60 },
61 'skip_fail': bool # If True - continue with the next step
62 # without failure even if count number
63 # is reached.
64 # If False - rise an exception (default)
65 },
66 ...
67 ]
68 """
69 for n, step in enumerate(commands):
70 # Required fields
71 cmd = step.get('cmd')
72 do = step.get('do')
73 # node_name = step.get('node_name')
74 # Optional fields
75 description = step.get('description', cmd)
76 # retry = step.get('retry', {'count': 1, 'delay': 1})
77 # retry_count = retry.get('count', 1)
78 # retry_delay = retry.get('delay', 1)
79 # skip_fail = step.get('skip_fail', False)
80
81 msg = "[ {0} #{1} ] {2}".format(label, n + 1, description)
82 LOG.info("\n\n{0}\n{1}".format(msg, '=' * len(msg)))
83
84 if cmd:
85 self.execute_command(step)
86 elif do:
87 self.command2(step)
88
89 def execute_command(self, step):
90 # Required fields
91 cmd = step.get('cmd')
92 node_name = step.get('node_name')
93 # Optional fields
94 description = step.get('description', cmd)
95 retry = step.get('retry', {'count': 1, 'delay': 1})
96 retry_count = retry.get('count', 1)
97 retry_delay = retry.get('delay', 1)
98 skip_fail = step.get('skip_fail', False)
99
100 with self._underlay.remote(node_name=node_name) as remote:
101
102 for x in range(retry_count, 0, -1):
103 time.sleep(3)
104 result = remote.execute(cmd, verbose=True)
105
106 # Workaround of exit code 0 from salt in case of failures
107 failed = 0
108 for s in result['stdout']:
109 if s.startswith("Failed:"):
110 failed += int(s.split("Failed:")[1])
111
112 if result.exit_code != 0:
113 time.sleep(retry_delay)
114 LOG.info(
115 " === RETRY ({0}/{1}) ========================="
116 .format(x - 1, retry_count))
117 elif failed != 0:
118 LOG.error(
119 " === SALT returned exit code = 0 while "
120 "there are failed modules! ===")
121 LOG.info(
122 " === RETRY ({0}/{1}) ======================="
123 .format(x - 1, retry_count))
124 else:
125 if self._config.salt.salt_master_host != '0.0.0.0':
126 # Workarounds for crashed services
127 self.ensure_running_service(
128 "salt-master",
129 self._config.salt.salt_master_host,
130 "salt-call pillar.items",
131 'active (running)') # Hardcoded for now
132 self.ensure_running_service(
133 "salt-minion",
134 self._config.salt.salt_master_host,
135 "salt 'cfg01*' pillar.items",
136 "active (running)") # Hardcoded for now
137 break
138
139 if x == 1 and skip_fail is False:
140 # In the last retry iteration, raise an exception
141 raise Exception("Step '{0}' failed"
142 .format(description))
143
144 def command2(self, step):
145 # Required fields
146 do = step['do']
147 target = step['target']
148 state = step.get('state')
149 states = step.get('states')
150 # Optional fields
151 args = step.get('args')
152 kwargs = step.get('kwargs')
153 description = step.get('description', do)
154 retry = step.get('retry', {'count': 1, 'delay': 1})
155 retry_count = retry.get('count', 1)
156 retry_delay = retry.get('delay', 1)
157 skip_fail = step.get('skip_fail', False)
158
159 if not bool(state) ^ bool(states):
160 raise ValueError("You should use state or states in step")
161
162 for x in range(retry_count, 0, -1):
163 time.sleep(3)
164
165 method = getattr(self._salt, self._salt._map[do])
166 command_ret = method(tgt=target, state=state or states,
167 args=args, kwargs=kwargs)
168 command_ret = command_ret if \
169 isinstance(command_ret, list) else [command_ret]
170 results = [(r['return'][0], f) for r, f in command_ret]
171
172 # FIMME: Change to debug level
173 LOG.info(" === States output =======================\n"
174 "{}\n"
175 " =========================================".format(
176 pretty_repr([r for r, f in results])))
177
178 all_fails = [f for r, f in results if f]
179 if all_fails:
180 LOG.error("States finished with failures.\n{}".format(
181 all_fails))
182 time.sleep(retry_delay)
183 LOG.info(" === RETRY ({0}/{1}) ========================="
184 .format(x - 1, retry_count))
185 else:
186 break
187
188 if x == 1 and skip_fail is False:
189 # In the last retry iteration, raise an exception
190 raise Exception("Step '{0}' failed"
191 .format(description))