Dmitry Tyzhnenko | 2b730a0 | 2017-04-07 19:31:32 +0300 | [diff] [blame] | 1 | |
| 2 | import time |
| 3 | |
| 4 | from tcp_tests import logger |
| 5 | from tcp_tests.helpers.log_helpers import pretty_repr |
| 6 | |
| 7 | LOG = logger.logger |
| 8 | |
| 9 | |
| 10 | class ExecuteCommandsMixin(object): |
| 11 | """docstring for ExecuteCommands""" |
| 12 | |
Dmitry Tyzhnenko | bc0f826 | 2017-04-28 15:39:26 +0300 | [diff] [blame] | 13 | __config = None |
| 14 | __underlay = None |
| 15 | |
| 16 | def __init__(self, config, underlay): |
| 17 | self.__config = config |
| 18 | self.__underlay = underlay |
| 19 | super(ExecuteCommandsMixin, self).__init__() |
| 20 | |
Dmitry Tyzhnenko | 2b730a0 | 2017-04-07 19:31:32 +0300 | [diff] [blame] | 21 | def ensure_running_service(self, service_name, host, check_cmd, |
| 22 | state_running='start/running'): |
| 23 | """Check if the service_name running or try to restart it |
| 24 | |
| 25 | :param service_name: name of the service that will be checked |
| 26 | :param node_name: node on which the service will be checked |
| 27 | :param check_cmd: shell command to ensure that the service is running |
| 28 | :param state_running: string for check the service state |
| 29 | """ |
| 30 | cmd = "service {0} status | grep -q '{1}'".format( |
| 31 | service_name, state_running) |
Dmitry Tyzhnenko | bc0f826 | 2017-04-28 15:39:26 +0300 | [diff] [blame] | 32 | with self.__underlay.remote(host=host) as remote: |
Dmitry Tyzhnenko | 2b730a0 | 2017-04-07 19:31:32 +0300 | [diff] [blame] | 33 | result = remote.execute(cmd) |
| 34 | if result.exit_code != 0: |
| 35 | LOG.info("{0} is not in running state on the node {1}," |
| 36 | " trying to start".format(service_name, host)) |
| 37 | cmd = ("service {0} stop;" |
| 38 | " sleep 3; killall -9 {0};" |
| 39 | "service {0} start; sleep 5;" |
| 40 | .format(service_name)) |
| 41 | remote.execute(cmd) |
| 42 | |
| 43 | remote.execute(check_cmd) |
| 44 | remote.execute(check_cmd) |
| 45 | |
| 46 | def execute_commands(self, commands, label="Command"): |
| 47 | """Execute a sequence of commands |
| 48 | |
| 49 | Main propose is to implement workarounds for salt formulas like: |
| 50 | - exit_code == 0 when there are actual failures |
| 51 | - salt_master and/or salt_minion stop working after executing a formula |
| 52 | - a formula fails at first run, but completes at next runs |
| 53 | |
| 54 | :param label: label of the current sequence of the commands, for log |
| 55 | :param commands: list of dicts with the following data: |
| 56 | commands = [ |
| 57 | ... |
| 58 | { |
| 59 | # Required: |
| 60 | 'cmd': 'shell command(s) to run', |
| 61 | 'node_name': 'name of the node to run the command(s)', |
| 62 | # Optional: |
| 63 | 'description': 'string with a readable command description', |
| 64 | 'retry': { |
| 65 | 'count': int, # How many times should be run the command |
| 66 | # until success |
| 67 | 'delay': int, # Delay between tries in seconds |
| 68 | }, |
| 69 | 'skip_fail': bool # If True - continue with the next step |
| 70 | # without failure even if count number |
| 71 | # is reached. |
| 72 | # If False - rise an exception (default) |
| 73 | }, |
| 74 | ... |
| 75 | ] |
| 76 | """ |
| 77 | for n, step in enumerate(commands): |
| 78 | # Required fields |
| 79 | cmd = step.get('cmd') |
| 80 | do = step.get('do') |
| 81 | # node_name = step.get('node_name') |
| 82 | # Optional fields |
| 83 | description = step.get('description', cmd) |
| 84 | # retry = step.get('retry', {'count': 1, 'delay': 1}) |
| 85 | # retry_count = retry.get('count', 1) |
| 86 | # retry_delay = retry.get('delay', 1) |
| 87 | # skip_fail = step.get('skip_fail', False) |
| 88 | |
| 89 | msg = "[ {0} #{1} ] {2}".format(label, n + 1, description) |
| 90 | LOG.info("\n\n{0}\n{1}".format(msg, '=' * len(msg))) |
| 91 | |
| 92 | if cmd: |
| 93 | self.execute_command(step) |
| 94 | elif do: |
| 95 | self.command2(step) |
| 96 | |
| 97 | def execute_command(self, step): |
| 98 | # Required fields |
| 99 | cmd = step.get('cmd') |
| 100 | node_name = step.get('node_name') |
| 101 | # Optional fields |
| 102 | description = step.get('description', cmd) |
| 103 | retry = step.get('retry', {'count': 1, 'delay': 1}) |
| 104 | retry_count = retry.get('count', 1) |
| 105 | retry_delay = retry.get('delay', 1) |
| 106 | skip_fail = step.get('skip_fail', False) |
| 107 | |
Dmitry Tyzhnenko | bc0f826 | 2017-04-28 15:39:26 +0300 | [diff] [blame] | 108 | with self.__underlay.remote(node_name=node_name) as remote: |
Dmitry Tyzhnenko | 2b730a0 | 2017-04-07 19:31:32 +0300 | [diff] [blame] | 109 | |
| 110 | for x in range(retry_count, 0, -1): |
| 111 | time.sleep(3) |
| 112 | result = remote.execute(cmd, verbose=True) |
| 113 | |
| 114 | # Workaround of exit code 0 from salt in case of failures |
| 115 | failed = 0 |
Dennis Dmitriev | 4db5bf2 | 2017-05-13 19:31:17 +0300 | [diff] [blame] | 116 | for s in result['stdout'] + result['stderr']: |
Dmitry Tyzhnenko | 2b730a0 | 2017-04-07 19:31:32 +0300 | [diff] [blame] | 117 | if s.startswith("Failed:"): |
| 118 | failed += int(s.split("Failed:")[1]) |
Dennis Dmitriev | 68671a6 | 2017-05-13 16:40:32 +0300 | [diff] [blame] | 119 | if 'Minion did not return. [No response]' in s: |
| 120 | failed += 1 |
| 121 | if s.startswith("[CRITICAL]"): |
| 122 | failed += 1 |
Dmitry Tyzhnenko | 2b730a0 | 2017-04-07 19:31:32 +0300 | [diff] [blame] | 123 | |
| 124 | if result.exit_code != 0: |
| 125 | time.sleep(retry_delay) |
| 126 | LOG.info( |
| 127 | " === RETRY ({0}/{1}) =========================" |
| 128 | .format(x - 1, retry_count)) |
| 129 | elif failed != 0: |
| 130 | LOG.error( |
| 131 | " === SALT returned exit code = 0 while " |
| 132 | "there are failed modules! ===") |
| 133 | LOG.info( |
| 134 | " === RETRY ({0}/{1}) =======================" |
| 135 | .format(x - 1, retry_count)) |
| 136 | else: |
Dmitry Tyzhnenko | bc0f826 | 2017-04-28 15:39:26 +0300 | [diff] [blame] | 137 | if self.__config.salt.salt_master_host != '0.0.0.0': |
Dmitry Tyzhnenko | 2b730a0 | 2017-04-07 19:31:32 +0300 | [diff] [blame] | 138 | # Workarounds for crashed services |
| 139 | self.ensure_running_service( |
| 140 | "salt-master", |
Dmitry Tyzhnenko | bc0f826 | 2017-04-28 15:39:26 +0300 | [diff] [blame] | 141 | self.__config.salt.salt_master_host, |
Dmitry Tyzhnenko | 2b730a0 | 2017-04-07 19:31:32 +0300 | [diff] [blame] | 142 | "salt-call pillar.items", |
| 143 | 'active (running)') # Hardcoded for now |
| 144 | self.ensure_running_service( |
| 145 | "salt-minion", |
Dmitry Tyzhnenko | bc0f826 | 2017-04-28 15:39:26 +0300 | [diff] [blame] | 146 | self.__config.salt.salt_master_host, |
Dmitry Tyzhnenko | 2b730a0 | 2017-04-07 19:31:32 +0300 | [diff] [blame] | 147 | "salt 'cfg01*' pillar.items", |
| 148 | "active (running)") # Hardcoded for now |
| 149 | break |
| 150 | |
| 151 | if x == 1 and skip_fail is False: |
| 152 | # In the last retry iteration, raise an exception |
| 153 | raise Exception("Step '{0}' failed" |
| 154 | .format(description)) |
| 155 | |
| 156 | def command2(self, step): |
| 157 | # Required fields |
| 158 | do = step['do'] |
| 159 | target = step['target'] |
| 160 | state = step.get('state') |
| 161 | states = step.get('states') |
| 162 | # Optional fields |
| 163 | args = step.get('args') |
| 164 | kwargs = step.get('kwargs') |
| 165 | description = step.get('description', do) |
| 166 | retry = step.get('retry', {'count': 1, 'delay': 1}) |
| 167 | retry_count = retry.get('count', 1) |
| 168 | retry_delay = retry.get('delay', 1) |
| 169 | skip_fail = step.get('skip_fail', False) |
| 170 | |
| 171 | if not bool(state) ^ bool(states): |
| 172 | raise ValueError("You should use state or states in step") |
| 173 | |
| 174 | for x in range(retry_count, 0, -1): |
| 175 | time.sleep(3) |
| 176 | |
| 177 | method = getattr(self._salt, self._salt._map[do]) |
| 178 | command_ret = method(tgt=target, state=state or states, |
| 179 | args=args, kwargs=kwargs) |
| 180 | command_ret = command_ret if \ |
| 181 | isinstance(command_ret, list) else [command_ret] |
| 182 | results = [(r['return'][0], f) for r, f in command_ret] |
| 183 | |
| 184 | # FIMME: Change to debug level |
| 185 | LOG.info(" === States output =======================\n" |
| 186 | "{}\n" |
| 187 | " =========================================".format( |
| 188 | pretty_repr([r for r, f in results]))) |
| 189 | |
| 190 | all_fails = [f for r, f in results if f] |
| 191 | if all_fails: |
| 192 | LOG.error("States finished with failures.\n{}".format( |
| 193 | all_fails)) |
| 194 | time.sleep(retry_delay) |
| 195 | LOG.info(" === RETRY ({0}/{1}) =========================" |
| 196 | .format(x - 1, retry_count)) |
| 197 | else: |
| 198 | break |
| 199 | |
| 200 | if x == 1 and skip_fail is False: |
| 201 | # In the last retry iteration, raise an exception |
| 202 | raise Exception("Step '{0}' failed" |
| 203 | .format(description)) |