Fix problem with never stopping stress tests
This fix will fix the issue for never stopping stress test in
the nightly build jobs. It changes three problems related to this:
- An exception in tearDown() will not cause a endless loop
- Hanging processes will be killed with -9 after a while
- tox job will stop on first error
Fixes-bug: 1230357
Change-Id: I3a0160295d98fdc7f8ffce64cc4a08c5c8ae654a
diff --git a/tempest/stress/driver.py b/tempest/stress/driver.py
index e518d28..d959543 100644
--- a/tempest/stress/driver.py
+++ b/tempest/stress/driver.py
@@ -13,6 +13,7 @@
# limitations under the License.
import multiprocessing
+import os
import signal
import time
@@ -87,12 +88,22 @@
"""
Goes through the process list and terminates all child processes.
"""
+ log_check_interval = int(admin_manager.config.stress.log_check_interval)
for process in processes:
if process['process'].is_alive():
try:
process['process'].terminate()
except Exception:
pass
+ time.sleep(log_check_interval)
+ for process in processes:
+ if process['process'].is_alive():
+ try:
+ pid = process['process'].pid
+ LOG.warn("Process %d hangs. Send SIGKILL." % pid)
+ os.kill(pid, signal.SIGKILL)
+ except Exception:
+ pass
process['process'].join()
diff --git a/tempest/stress/stressaction.py b/tempest/stress/stressaction.py
index 45a628d..61e46fa 100644
--- a/tempest/stress/stressaction.py
+++ b/tempest/stress/stressaction.py
@@ -30,7 +30,10 @@
self.stop_on_error = stop_on_error
def _shutdown_handler(self, signal, frame):
- self.tearDown()
+ try:
+ self.tearDown()
+ except Exception:
+ self.logger.exception("Error while tearDown")
sys.exit(0)
@property
diff --git a/tox.ini b/tox.ini
index 1b8a0fd..abc9e42 100644
--- a/tox.ini
+++ b/tox.ini
@@ -86,7 +86,7 @@
sitepackages = True
setenv = VIRTUAL_ENV={envdir}
commands =
- python -m tempest/stress/run_stress -a -d 3600
+ python -m tempest/stress/run_stress -a -d 3600 -S
[testenv:venv]
commands = {posargs}