Add argument to stop stress test on first error
If a single stress test action fails it is often useful to stop the
processing completely since all other action would fail too.
Change-Id: Iaab9b508cb243a69d70d3101c01ae53c01612d2c
diff --git a/tempest/stress/driver.py b/tempest/stress/driver.py
index c4c2041..d9b95e0 100644
--- a/tempest/stress/driver.py
+++ b/tempest/stress/driver.py
@@ -14,6 +14,7 @@
import logging
import multiprocessing
+import signal
import time
from tempest import clients
@@ -45,6 +46,7 @@
# add the handler to the root logger
logger = logging.getLogger('tempest.stress')
logger.addHandler(_console)
+processes = []
def do_ssh(command, host):
@@ -93,10 +95,29 @@
return None
-def stress_openstack(tests, duration, max_runs=None):
+def sigchld_handler(signal, frame):
+ """
+ Signal handler (only active if stop_on_error is True).
+ """
+ terminate_all_processes()
+
+
+def terminate_all_processes():
+ """
+ Goes through the process list and terminates all child processes.
+ """
+ for process in processes:
+ if process['process'].is_alive():
+ try:
+ process['process'].terminate()
+ except Exception:
+ pass
+ process['process'].join()
+
+
+def stress_openstack(tests, duration, max_runs=None, stop_on_error=False):
"""
Workload driver. Executes an action function against a nova-cluster.
-
"""
logfiles = admin_manager.config.stress.target_logfiles
log_check_interval = int(admin_manager.config.stress.log_check_interval)
@@ -105,7 +126,6 @@
computes = _get_compute_nodes(controller)
for node in computes:
do_ssh("rm -f %s" % logfiles, node)
- processes = []
for test in tests:
if test.get('use_admin', False):
manager = admin_manager
@@ -127,7 +147,7 @@
tenant_name=tenant_name)
test_obj = importutils.import_class(test['action'])
- test_run = test_obj(manager, logger, max_runs)
+ test_run = test_obj(manager, logger, max_runs, stop_on_error)
kwargs = test.get('kwargs', {})
test_run.setUp(**dict(kwargs.iteritems()))
@@ -150,6 +170,9 @@
processes.append(process)
p.start()
+ if stop_on_error:
+ # NOTE(mkoderer): only the parent should register the handler
+ signal.signal(signal.SIGCHLD, sigchld_handler)
end_time = time.time() + duration
had_errors = False
while True:
@@ -168,6 +191,11 @@
break
time.sleep(min(remaining, log_check_interval))
+ if stop_on_error:
+ for process in processes:
+ if process['statistic']['fails'] > 0:
+ break
+
if not logfiles:
continue
errors = _error_in_logs(logfiles, computes)
@@ -175,10 +203,7 @@
had_errors = True
break
- for process in processes:
- if process['process'].is_alive():
- process['process'].terminate()
- process['process'].join()
+ terminate_all_processes()
sum_fails = 0
sum_runs = 0
diff --git a/tempest/stress/run_stress.py b/tempest/stress/run_stress.py
index 106049d..32e3ae0 100755
--- a/tempest/stress/run_stress.py
+++ b/tempest/stress/run_stress.py
@@ -22,7 +22,7 @@
def main(ns):
- #NOTE(kodererm): moved import to make "-h" possible without OpenStack
+ # NOTE(mkoderer): moved import to make "-h" possible without OpenStack
from tempest.stress import driver
result = 0
tests = json.load(open(ns.tests, 'r'))
@@ -30,12 +30,13 @@
for test in tests:
step_result = driver.stress_openstack([test],
ns.duration,
- ns.number)
- #NOTE(kodererm): we just save the last result code
+ ns.number,
+ ns.stop)
+ # NOTE(mkoderer): we just save the last result code
if (step_result != 0):
result = step_result
else:
- driver.stress_openstack(tests, ns.duration, ns.number)
+ driver.stress_openstack(tests, ns.duration, ns.number, ns.stop)
return result
@@ -44,6 +45,8 @@
help="Duration of test in secs.")
parser.add_argument('-s', '--serial', action='store_true',
help="Trigger running tests serially.")
+parser.add_argument('-S', '--stop', action='store_true',
+ default=False, help="Stop on first error.")
parser.add_argument('-n', '--number', type=int,
help="How often an action is executed for each process.")
parser.add_argument('tests', help="Name of the file with test description.")
diff --git a/tempest/stress/stressaction.py b/tempest/stress/stressaction.py
index 77ddd1c..ab09adc 100644
--- a/tempest/stress/stressaction.py
+++ b/tempest/stress/stressaction.py
@@ -20,10 +20,11 @@
class StressAction(object):
- def __init__(self, manager, logger, max_runs=None):
+ def __init__(self, manager, logger, max_runs=None, stop_on_error=False):
self.manager = manager
self.logger = logger
self.max_runs = max_runs
+ self.stop_on_error = stop_on_error
def _shutdown_handler(self, signal, frame):
self.tearDown()
@@ -63,6 +64,11 @@
self.logger.exception("Failure in run")
finally:
shared_statistic['runs'] += 1
+ if self.stop_on_error and (shared_statistic['fails'] > 1):
+ self.logger.warn("Stop process due to"
+ "\"stop-on-error\" argument")
+ self.tearDown()
+ sys.exit(1)
def run(self):
"""This method is where the stress test code runs."""