add rally to the run

commit: 9cc4ca3f6ed00013dcd072102f4a4308e9f0667b [log] [tgz]
author: Dennis Dmitriev <ddmitriev@mirantis.com> Thu Nov 03 13:50:45 2016 +0200
committer: Dennis Dmitriev <ddmitriev@mirantis.com> Thu Nov 03 13:50:45 2016 +0200
tree: f62baf933526cf49d6b8567e75a0351357ea5bdd
parent: f3e7184739cc8edc70ebdde137f0bc9074121872 [diff] [blame]
diff --git a/tcp_tests/managers/underlay_ssh_manager.py b/tcp_tests/managers/underlay_ssh_manager.py
index 90f3924..09521cd 100644
--- a/tcp_tests/managers/underlay_ssh_manager.py
+++ b/tcp_tests/managers/underlay_ssh_manager.py

@@ -367,13 +367,20 @@
 
     def ensure_running_service(self, service_name, node_name, check_cmd,
                                state_running='start/running'):
+        """Check if the service_name running or try to restart it
+
+        :param service_name: name of the service that will be checked
+        :param node_name: node on which the service will be checked
+        :param check_cmd: shell command to ensure that the service is running
+        :param state_running: string for check the service state
+        """
         cmd = "service {0} status | grep -q '{1}'".format(
             service_name, state_running)
         with self.remote(node_name=node_name) as remote:
             result = remote.execute(cmd)
             if result.exit_code != 0:
                 LOG.info("{0} is not in running state on the node {1},"
-                         " restarting".format(service_name, node_name))
+                         " trying to start".format(service_name, node_name))
                 cmd = ("service {0} stop;"
                        " sleep 3; killall -9 {0};"
                        "service {0} start; sleep 5;"
@@ -383,16 +390,55 @@
                 remote.execute(check_cmd)
                 remote.execute(check_cmd)
 
-    def execute_commands(self, commands):
-        for n, step in enumerate(commands):
-            LOG.info(" ####################################################")
-            LOG.info(" *** [ Command #{0} ] {1} ***"
-                     .format(n+1, step['description']))
+    def execute_commands(self, commands, label="Command"):
+        """Execute a sequence of commands
 
-            with self.remote(node_name=step['node_name']) as remote:
-                for x in range(step['retry']['count'], 0, -1):
+        Main propose is to implement workarounds for salt formulas like:
+        - exit_code == 0 when there are actual failures
+        - salt_master and/or salt_minion stop working after executing a formula
+        - a formula fails at first run, but completes at next runs
+
+        :param label: label of the current sequence of the commands, for log
+        :param commands: list of dicts with the following data:
+        commands = [
+            ...
+            {
+                # Required:
+                'cmd': 'shell command(s) to run',
+                'node_name': 'name of the node to run the command(s)',
+                # Optional:
+                'description': 'string with a readable command description',
+                'retry': {
+                    'count': int,  # How many times should be run the command
+                                   # until success
+                    'delay': int,  # Delay between tries in seconds
+                },
+                'skip_fail': bool  # If True - continue with the next step
+                                   # without failure even if count number
+                                   # is reached.
+                                   # If False - rise an exception (default)
+            },
+            ...
+        ]
+        """
+        for n, step in enumerate(commands):
+            # Required fields
+            cmd = step.get('cmd')
+            node_name = step.get('node_name')
+            # Optional fields
+            description = step.get('description', cmd)
+            retry = step.get('retry', {'count': 1, 'delay': 1})
+            retry_count = retry.get('count', 1)
+            retry_delay = retry.get('delay', 1)
+            skip_fail = retry.get('skip_fail', False)
+
+            LOG.info(" >>> [ {0} #{1} ] {2}".format(label, n+1, description))
+
+            with self.remote(node_name=node_name) as remote:
+
+                for x in range(retry_count, 0, -1):
                     time.sleep(3)
-                    result = remote.execute(step['cmd'], verbose=True)
+                    result = remote.execute(cmd, verbose=True)
 
                     # Workaround of exit code 0 from salt in case of failures
                     failed = 0
@@ -401,14 +447,14 @@
                             failed += int(s.split("Failed:")[1])
 
                     if result.exit_code != 0:
-                        time.sleep(step['retry']['delay'])
+                        time.sleep(retry_delay)
                         LOG.info(" === RETRY ({0}/{1}) ========================="
-                                 .format(x-1, step['retry']['count']))
+                                 .format(x-1, retry_count))
                     elif failed != 0:
                         LOG.error(" === SALT returned exit code = 0 while "
                                   "there are failed modules! ===")
                         LOG.info(" === RETRY ({0}/{1}) ======================="
-                                 .format(x-1, step['retry']['count']))
+                                 .format(x-1, retry_count))
                     else:
                         # Workarounds for crashed services
                         self.ensure_running_service(
@@ -423,7 +469,7 @@
                             "active (running)") # Hardcoded for now
                         break
 
-                    if x == 1 and step['skip_fail'] == False:
+                    if x == 1 and skip_fail == False:
                         # In the last retry iteration, raise an exception
                         raise Exception("Step '{0}' failed"
-                                        .format(step['description']))
+                                        .format(description))
commit	9cc4ca3f6ed00013dcd072102f4a4308e9f0667b	[log] [tgz]
author	Dennis Dmitriev <ddmitriev@mirantis.com>	Thu Nov 03 13:50:45 2016 +0200
committer	Dennis Dmitriev <ddmitriev@mirantis.com>	Thu Nov 03 13:50:45 2016 +0200
tree	f62baf933526cf49d6b8567e75a0351357ea5bdd
parent	f3e7184739cc8edc70ebdde137f0bc9074121872 [diff] [blame]