Merge "Add argument to stop stress test on first error"
diff --git a/tempest/stress/driver.py b/tempest/stress/driver.py
index c4c2041..d9b95e0 100644
--- a/tempest/stress/driver.py
+++ b/tempest/stress/driver.py
@@ -14,6 +14,7 @@
 
 import logging
 import multiprocessing
+import signal
 import time
 
 from tempest import clients
@@ -45,6 +46,7 @@
 # add the handler to the root logger
 logger = logging.getLogger('tempest.stress')
 logger.addHandler(_console)
+processes = []
 
 
 def do_ssh(command, host):
@@ -93,10 +95,29 @@
     return None
 
 
-def stress_openstack(tests, duration, max_runs=None):
+def sigchld_handler(signal, frame):
+    """
+    Signal handler (only active if stop_on_error is True).
+    """
+    terminate_all_processes()
+
+
+def terminate_all_processes():
+    """
+    Goes through the process list and terminates all child processes.
+    """
+    for process in processes:
+        if process['process'].is_alive():
+            try:
+                process['process'].terminate()
+            except Exception:
+                pass
+        process['process'].join()
+
+
+def stress_openstack(tests, duration, max_runs=None, stop_on_error=False):
     """
     Workload driver. Executes an action function against a nova-cluster.
-
     """
     logfiles = admin_manager.config.stress.target_logfiles
     log_check_interval = int(admin_manager.config.stress.log_check_interval)
@@ -105,7 +126,6 @@
         computes = _get_compute_nodes(controller)
         for node in computes:
             do_ssh("rm -f %s" % logfiles, node)
-    processes = []
     for test in tests:
         if test.get('use_admin', False):
             manager = admin_manager
@@ -127,7 +147,7 @@
                                           tenant_name=tenant_name)
 
             test_obj = importutils.import_class(test['action'])
-            test_run = test_obj(manager, logger, max_runs)
+            test_run = test_obj(manager, logger, max_runs, stop_on_error)
 
             kwargs = test.get('kwargs', {})
             test_run.setUp(**dict(kwargs.iteritems()))
@@ -150,6 +170,9 @@
 
             processes.append(process)
             p.start()
+    if stop_on_error:
+        # NOTE(mkoderer): only the parent should register the handler
+        signal.signal(signal.SIGCHLD, sigchld_handler)
     end_time = time.time() + duration
     had_errors = False
     while True:
@@ -168,6 +191,11 @@
                 break
 
         time.sleep(min(remaining, log_check_interval))
+        if stop_on_error:
+            for process in processes:
+                if process['statistic']['fails'] > 0:
+                    break
+
         if not logfiles:
             continue
         errors = _error_in_logs(logfiles, computes)
@@ -175,10 +203,7 @@
             had_errors = True
             break
 
-    for process in processes:
-        if process['process'].is_alive():
-            process['process'].terminate()
-        process['process'].join()
+    terminate_all_processes()
 
     sum_fails = 0
     sum_runs = 0
diff --git a/tempest/stress/run_stress.py b/tempest/stress/run_stress.py
index 106049d..32e3ae0 100755
--- a/tempest/stress/run_stress.py
+++ b/tempest/stress/run_stress.py
@@ -22,7 +22,7 @@
 
 
 def main(ns):
-    #NOTE(kodererm): moved import to make "-h" possible without OpenStack
+    # NOTE(mkoderer): moved import to make "-h" possible without OpenStack
     from tempest.stress import driver
     result = 0
     tests = json.load(open(ns.tests, 'r'))
@@ -30,12 +30,13 @@
         for test in tests:
             step_result = driver.stress_openstack([test],
                                                   ns.duration,
-                                                  ns.number)
-            #NOTE(kodererm): we just save the last result code
+                                                  ns.number,
+                                                  ns.stop)
+            # NOTE(mkoderer): we just save the last result code
             if (step_result != 0):
                 result = step_result
     else:
-        driver.stress_openstack(tests, ns.duration, ns.number)
+        driver.stress_openstack(tests, ns.duration, ns.number, ns.stop)
     return result
 
 
@@ -44,6 +45,8 @@
                     help="Duration of test in secs.")
 parser.add_argument('-s', '--serial', action='store_true',
                     help="Trigger running tests serially.")
+parser.add_argument('-S', '--stop', action='store_true',
+                    default=False, help="Stop on first error.")
 parser.add_argument('-n', '--number', type=int,
                     help="How often an action is executed for each process.")
 parser.add_argument('tests', help="Name of the file with test description.")
diff --git a/tempest/stress/stressaction.py b/tempest/stress/stressaction.py
index 77ddd1c..ab09adc 100644
--- a/tempest/stress/stressaction.py
+++ b/tempest/stress/stressaction.py
@@ -20,10 +20,11 @@
 
 class StressAction(object):
 
-    def __init__(self, manager, logger, max_runs=None):
+    def __init__(self, manager, logger, max_runs=None, stop_on_error=False):
         self.manager = manager
         self.logger = logger
         self.max_runs = max_runs
+        self.stop_on_error = stop_on_error
 
     def _shutdown_handler(self, signal, frame):
         self.tearDown()
@@ -63,6 +64,11 @@
                 self.logger.exception("Failure in run")
             finally:
                 shared_statistic['runs'] += 1
+                if self.stop_on_error and (shared_statistic['fails'] > 1):
+                    self.logger.warn("Stop process due to"
+                                     "\"stop-on-error\" argument")
+                    self.tearDown()
+                    sys.exit(1)
 
     def run(self):
         """This method is where the stress test code runs."""