Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 1 | # |
| 2 | # Licensed to the Apache Software Foundation (ASF) under one |
| 3 | # or more contributor license agreements. See the NOTICE file |
| 4 | # distributed with this work for additional information |
| 5 | # regarding copyright ownership. The ASF licenses this file |
| 6 | # to you under the Apache License, Version 2.0 (the |
| 7 | # "License"); you may not use this file except in compliance |
| 8 | # with the License. You may obtain a copy of the License at |
| 9 | # |
| 10 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | # |
| 12 | # Unless required by applicable law or agreed to in writing, |
| 13 | # software distributed under the License is distributed on an |
| 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 15 | # KIND, either express or implied. See the License for the |
| 16 | # specific language governing permissions and limitations |
| 17 | # under the License. |
| 18 | # |
| 19 | |
| 20 | import contextlib |
| 21 | import multiprocessing |
| 22 | import multiprocessing.managers |
| 23 | import os |
| 24 | import platform |
| 25 | import random |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 26 | import signal |
Nobuaki Sukegawa | a6ab1f5 | 2015-11-28 15:04:39 +0900 | [diff] [blame] | 27 | import socket |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 28 | import subprocess |
Nobuaki Sukegawa | a6ab1f5 | 2015-11-28 15:04:39 +0900 | [diff] [blame] | 29 | import sys |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 30 | import threading |
| 31 | import time |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 32 | |
Nobuaki Sukegawa | 2de2700 | 2015-11-22 01:13:48 +0900 | [diff] [blame] | 33 | from .compat import str_join |
| 34 | from .test import TestEntry, domain_socket_path |
| 35 | from .report import ExecReporter, SummaryReporter |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 36 | |
| 37 | RESULT_TIMEOUT = 128 |
| 38 | RESULT_ERROR = 64 |
| 39 | |
| 40 | |
| 41 | class ExecutionContext(object): |
| 42 | def __init__(self, cmd, cwd, env, report): |
| 43 | self._log = multiprocessing.get_logger() |
| 44 | self.report = report |
| 45 | self.cmd = cmd |
| 46 | self.cwd = cwd |
| 47 | self.env = env |
| 48 | self.timer = None |
| 49 | self.expired = False |
Nobuaki Sukegawa | 2ba7944 | 2016-01-12 19:37:55 +0900 | [diff] [blame] | 50 | self.killed = False |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 51 | |
| 52 | def _expire(self): |
| 53 | self._log.info('Timeout') |
| 54 | self.expired = True |
| 55 | self.kill() |
| 56 | |
| 57 | def kill(self): |
| 58 | self._log.debug('Killing process : %d' % self.proc.pid) |
Nobuaki Sukegawa | 2ba7944 | 2016-01-12 19:37:55 +0900 | [diff] [blame] | 59 | self.killed = True |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 60 | if platform.system() != 'Windows': |
| 61 | try: |
| 62 | os.killpg(self.proc.pid, signal.SIGKILL) |
Nobuaki Sukegawa | 5e432ec | 2016-01-24 01:34:29 +0900 | [diff] [blame] | 63 | except Exception: |
| 64 | self._log.info('Failed to kill process group', exc_info=sys.exc_info()) |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 65 | try: |
| 66 | self.proc.kill() |
Nobuaki Sukegawa | 5e432ec | 2016-01-24 01:34:29 +0900 | [diff] [blame] | 67 | except Exception: |
| 68 | self._log.info('Failed to kill process', exc_info=sys.exc_info()) |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 69 | |
| 70 | def _popen_args(self): |
| 71 | args = { |
| 72 | 'cwd': self.cwd, |
| 73 | 'env': self.env, |
| 74 | 'stdout': self.report.out, |
| 75 | 'stderr': subprocess.STDOUT, |
| 76 | } |
| 77 | # make sure child processes doesn't remain after killing |
| 78 | if platform.system() == 'Windows': |
| 79 | DETACHED_PROCESS = 0x00000008 |
| 80 | args.update(creationflags=DETACHED_PROCESS | subprocess.CREATE_NEW_PROCESS_GROUP) |
| 81 | else: |
| 82 | args.update(preexec_fn=os.setsid) |
| 83 | return args |
| 84 | |
| 85 | def start(self, timeout=0): |
Nobuaki Sukegawa | 2de2700 | 2015-11-22 01:13:48 +0900 | [diff] [blame] | 86 | joined = str_join(' ', self.cmd) |
Jens Geyer | aad06de | 2015-11-21 14:43:56 +0100 | [diff] [blame] | 87 | self._log.debug('COMMAND: %s', joined) |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 88 | self._log.debug('WORKDIR: %s', self.cwd) |
| 89 | self._log.debug('LOGFILE: %s', self.report.logpath) |
| 90 | self.report.begin() |
Nobuaki Sukegawa | 2de2700 | 2015-11-22 01:13:48 +0900 | [diff] [blame] | 91 | self.proc = subprocess.Popen(self.cmd, **self._popen_args()) |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 92 | if timeout > 0: |
| 93 | self.timer = threading.Timer(timeout, self._expire) |
| 94 | self.timer.start() |
| 95 | return self._scoped() |
| 96 | |
| 97 | @contextlib.contextmanager |
| 98 | def _scoped(self): |
| 99 | yield self |
| 100 | self._log.debug('Killing scoped process') |
Nobuaki Sukegawa | 2ba7944 | 2016-01-12 19:37:55 +0900 | [diff] [blame] | 101 | if self.proc.poll() is None: |
| 102 | self.kill() |
| 103 | self.report.killed() |
| 104 | else: |
| 105 | self._log.debug('Process died unexpectedly') |
| 106 | self.report.died() |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 107 | |
| 108 | def wait(self): |
| 109 | self.proc.communicate() |
| 110 | if self.timer: |
| 111 | self.timer.cancel() |
| 112 | self.report.end(self.returncode) |
| 113 | |
| 114 | @property |
| 115 | def returncode(self): |
| 116 | return self.proc.returncode if self.proc else None |
| 117 | |
| 118 | |
Nobuaki Sukegawa | 378b727 | 2016-01-03 17:04:50 +0900 | [diff] [blame] | 119 | def exec_context(port, logdir, test, prog): |
| 120 | report = ExecReporter(logdir, test, prog) |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 121 | prog.build_command(port) |
| 122 | return ExecutionContext(prog.command, prog.workdir, prog.env, report) |
| 123 | |
| 124 | |
Nobuaki Sukegawa | 5e432ec | 2016-01-24 01:34:29 +0900 | [diff] [blame] | 125 | def run_test(testdir, logdir, test_dict, max_retry, async=True): |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 126 | try: |
| 127 | logger = multiprocessing.get_logger() |
Nobuaki Sukegawa | 5e432ec | 2016-01-24 01:34:29 +0900 | [diff] [blame] | 128 | max_bind_retry = 3 |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 129 | retry_count = 0 |
Nobuaki Sukegawa | 5e432ec | 2016-01-24 01:34:29 +0900 | [diff] [blame] | 130 | bind_retry_count = 0 |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 131 | test = TestEntry(testdir, **test_dict) |
| 132 | while True: |
| 133 | if stop.is_set(): |
| 134 | logger.debug('Skipping because shutting down') |
Nobuaki Sukegawa | 5e432ec | 2016-01-24 01:34:29 +0900 | [diff] [blame] | 135 | return (retry_count, None) |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 136 | logger.debug('Start') |
| 137 | with PortAllocator.alloc_port_scoped(ports, test.socket) as port: |
| 138 | logger.debug('Start with port %d' % port) |
Nobuaki Sukegawa | 378b727 | 2016-01-03 17:04:50 +0900 | [diff] [blame] | 139 | sv = exec_context(port, logdir, test, test.server) |
| 140 | cl = exec_context(port, logdir, test, test.client) |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 141 | |
| 142 | logger.debug('Starting server') |
| 143 | with sv.start(): |
| 144 | if test.delay > 0: |
| 145 | logger.debug('Delaying client for %.2f seconds' % test.delay) |
| 146 | time.sleep(test.delay) |
Nobuaki Sukegawa | 5e432ec | 2016-01-24 01:34:29 +0900 | [diff] [blame] | 147 | connect_retry_count = 0 |
| 148 | max_connect_retry = 10 |
| 149 | connect_retry_wait = 0.5 |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 150 | while True: |
| 151 | logger.debug('Starting client') |
| 152 | cl.start(test.timeout) |
| 153 | logger.debug('Waiting client') |
| 154 | cl.wait() |
Nobuaki Sukegawa | 5e432ec | 2016-01-24 01:34:29 +0900 | [diff] [blame] | 155 | if not cl.report.maybe_false_positive() or connect_retry_count >= max_connect_retry: |
| 156 | if connect_retry_count > 0 and connect_retry_count < max_connect_retry: |
| 157 | logger.warn('[%s]: Connected after %d retry (%.2f sec each)' % (test.server.name, connect_retry_count, connect_retry_wait)) |
| 158 | # Wait for 50ms to see if server does not die at the end. |
Nobuaki Sukegawa | 2ba7944 | 2016-01-12 19:37:55 +0900 | [diff] [blame] | 159 | time.sleep(0.05) |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 160 | break |
Nobuaki Sukegawa | 5e432ec | 2016-01-24 01:34:29 +0900 | [diff] [blame] | 161 | logger.debug('Server may not be ready, waiting %.2f second...' % connect_retry_wait) |
| 162 | time.sleep(connect_retry_wait) |
| 163 | connect_retry_count += 1 |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 164 | |
Nobuaki Sukegawa | 5e432ec | 2016-01-24 01:34:29 +0900 | [diff] [blame] | 165 | if sv.report.maybe_false_positive() and bind_retry_count < max_bind_retry: |
| 166 | logger.warn('[%s]: Detected socket bind failure, retrying...', test.server.name) |
| 167 | bind_retry_count += 1 |
| 168 | else: |
Nobuaki Sukegawa | 2ba7944 | 2016-01-12 19:37:55 +0900 | [diff] [blame] | 169 | if cl.expired: |
Nobuaki Sukegawa | 5e432ec | 2016-01-24 01:34:29 +0900 | [diff] [blame] | 170 | result = RESULT_TIMEOUT |
Nobuaki Sukegawa | 2ba7944 | 2016-01-12 19:37:55 +0900 | [diff] [blame] | 171 | elif not sv.killed and cl.proc.returncode == 0: |
| 172 | # Server should be alive at the end. |
Nobuaki Sukegawa | 5e432ec | 2016-01-24 01:34:29 +0900 | [diff] [blame] | 173 | result = RESULT_ERROR |
Nobuaki Sukegawa | 2ba7944 | 2016-01-12 19:37:55 +0900 | [diff] [blame] | 174 | else: |
Nobuaki Sukegawa | 5e432ec | 2016-01-24 01:34:29 +0900 | [diff] [blame] | 175 | result = cl.proc.returncode |
| 176 | |
| 177 | if result == 0 or retry_count >= max_retry: |
| 178 | return (retry_count, result) |
| 179 | else: |
| 180 | logger.info('[%s-%s]: test failed, retrying...', test.server.name, test.client.name) |
| 181 | retry_count += 1 |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 182 | except (KeyboardInterrupt, SystemExit): |
| 183 | logger.info('Interrupted execution') |
| 184 | if not async: |
| 185 | raise |
| 186 | stop.set() |
| 187 | return None |
Nobuaki Sukegawa | 2ba7944 | 2016-01-12 19:37:55 +0900 | [diff] [blame] | 188 | except: |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 189 | if not async: |
| 190 | raise |
Nobuaki Sukegawa | 2ba7944 | 2016-01-12 19:37:55 +0900 | [diff] [blame] | 191 | logger.warn('Error executing [%s]', test.name, exc_info=sys.exc_info()) |
Nobuaki Sukegawa | 5e432ec | 2016-01-24 01:34:29 +0900 | [diff] [blame] | 192 | return (retry_count, RESULT_ERROR) |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 193 | |
| 194 | |
| 195 | class PortAllocator(object): |
| 196 | def __init__(self): |
| 197 | self._log = multiprocessing.get_logger() |
| 198 | self._lock = multiprocessing.Lock() |
| 199 | self._ports = set() |
| 200 | self._dom_ports = set() |
| 201 | self._last_alloc = 0 |
| 202 | |
| 203 | def _get_tcp_port(self): |
| 204 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) |
| 205 | sock.bind(('127.0.0.1', 0)) |
| 206 | port = sock.getsockname()[1] |
| 207 | self._lock.acquire() |
| 208 | try: |
| 209 | ok = port not in self._ports |
| 210 | if ok: |
| 211 | self._ports.add(port) |
| 212 | self._last_alloc = time.time() |
| 213 | finally: |
| 214 | self._lock.release() |
| 215 | sock.close() |
| 216 | return port if ok else self._get_tcp_port() |
| 217 | |
| 218 | def _get_domain_port(self): |
| 219 | port = random.randint(1024, 65536) |
| 220 | self._lock.acquire() |
| 221 | try: |
| 222 | ok = port not in self._dom_ports |
| 223 | if ok: |
| 224 | self._dom_ports.add(port) |
| 225 | finally: |
| 226 | self._lock.release() |
| 227 | return port if ok else self._get_domain_port() |
| 228 | |
| 229 | def alloc_port(self, socket_type): |
pavlo | dd08f6e | 2015-10-08 16:43:56 -0400 | [diff] [blame] | 230 | if socket_type in ('domain', 'abstract'): |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 231 | return self._get_domain_port() |
| 232 | else: |
| 233 | return self._get_tcp_port() |
| 234 | |
| 235 | # static method for inter-process invokation |
| 236 | @staticmethod |
| 237 | @contextlib.contextmanager |
| 238 | def alloc_port_scoped(allocator, socket_type): |
| 239 | port = allocator.alloc_port(socket_type) |
| 240 | yield port |
| 241 | allocator.free_port(socket_type, port) |
| 242 | |
| 243 | def free_port(self, socket_type, port): |
| 244 | self._log.debug('free_port') |
| 245 | self._lock.acquire() |
| 246 | try: |
| 247 | if socket_type == 'domain': |
| 248 | self._dom_ports.remove(port) |
| 249 | path = domain_socket_path(port) |
| 250 | if os.path.exists(path): |
| 251 | os.remove(path) |
pavlo | dd08f6e | 2015-10-08 16:43:56 -0400 | [diff] [blame] | 252 | elif socket_type == 'abstract': |
| 253 | self._dom_ports.remove(port) |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 254 | else: |
| 255 | self._ports.remove(port) |
Nobuaki Sukegawa | 5e432ec | 2016-01-24 01:34:29 +0900 | [diff] [blame] | 256 | except IOError: |
| 257 | self._log.info('Error while freeing port', exc_info=sys.exc_info()) |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 258 | finally: |
| 259 | self._lock.release() |
| 260 | |
| 261 | |
| 262 | class NonAsyncResult(object): |
| 263 | def __init__(self, value): |
| 264 | self._value = value |
| 265 | |
| 266 | def get(self, timeout=None): |
| 267 | return self._value |
| 268 | |
| 269 | def wait(self, timeout=None): |
| 270 | pass |
| 271 | |
| 272 | def ready(self): |
| 273 | return True |
| 274 | |
| 275 | def successful(self): |
| 276 | return self._value == 0 |
| 277 | |
| 278 | |
| 279 | class TestDispatcher(object): |
Nobuaki Sukegawa | bd16530 | 2016-01-19 11:10:07 +0900 | [diff] [blame] | 280 | def __init__(self, testdir, basedir, logdir_rel, concurrency): |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 281 | self._log = multiprocessing.get_logger() |
| 282 | self.testdir = testdir |
Nobuaki Sukegawa | bd16530 | 2016-01-19 11:10:07 +0900 | [diff] [blame] | 283 | self._report = SummaryReporter(basedir, logdir_rel, concurrency > 1) |
| 284 | self.logdir = self._report.testdir |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 285 | # seems needed for python 2.x to handle keyboard interrupt |
| 286 | self._stop = multiprocessing.Event() |
| 287 | self._async = concurrency > 1 |
| 288 | if not self._async: |
| 289 | self._pool = None |
| 290 | global stop |
| 291 | global ports |
| 292 | stop = self._stop |
| 293 | ports = PortAllocator() |
| 294 | else: |
| 295 | self._m = multiprocessing.managers.BaseManager() |
| 296 | self._m.register('ports', PortAllocator) |
| 297 | self._m.start() |
| 298 | self._pool = multiprocessing.Pool(concurrency, self._pool_init, (self._m.address,)) |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 299 | self._log.debug( |
| 300 | 'TestDispatcher started with %d concurrent jobs' % concurrency) |
| 301 | |
| 302 | def _pool_init(self, address): |
| 303 | global stop |
| 304 | global m |
| 305 | global ports |
| 306 | stop = self._stop |
| 307 | m = multiprocessing.managers.BaseManager(address) |
| 308 | m.connect() |
| 309 | ports = m.ports() |
| 310 | |
Nobuaki Sukegawa | 5e432ec | 2016-01-24 01:34:29 +0900 | [diff] [blame] | 311 | def _dispatch_sync(self, test, cont, max_retry): |
| 312 | r = run_test(self.testdir, self.logdir, test, max_retry, False) |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 313 | cont(r) |
| 314 | return NonAsyncResult(r) |
| 315 | |
Nobuaki Sukegawa | 5e432ec | 2016-01-24 01:34:29 +0900 | [diff] [blame] | 316 | def _dispatch_async(self, test, cont, max_retry): |
Nobuaki Sukegawa | 378b727 | 2016-01-03 17:04:50 +0900 | [diff] [blame] | 317 | self._log.debug('_dispatch_async') |
Nobuaki Sukegawa | 5e432ec | 2016-01-24 01:34:29 +0900 | [diff] [blame] | 318 | return self._pool.apply_async(func=run_test, args=(self.testdir, self.logdir, test, max_retry), callback=cont) |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 319 | |
Nobuaki Sukegawa | 5e432ec | 2016-01-24 01:34:29 +0900 | [diff] [blame] | 320 | def dispatch(self, test, max_retry): |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 321 | index = self._report.add_test(test) |
| 322 | |
Nobuaki Sukegawa | 5e432ec | 2016-01-24 01:34:29 +0900 | [diff] [blame] | 323 | def cont(result): |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 324 | if not self._stop.is_set(): |
Nobuaki Sukegawa | 5e432ec | 2016-01-24 01:34:29 +0900 | [diff] [blame] | 325 | retry_count, returncode = result |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 326 | self._log.debug('freeing port') |
| 327 | self._log.debug('adding result') |
Nobuaki Sukegawa | 5e432ec | 2016-01-24 01:34:29 +0900 | [diff] [blame] | 328 | self._report.add_result(index, returncode, returncode == RESULT_TIMEOUT, retry_count) |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 329 | self._log.debug('finish continuation') |
| 330 | fn = self._dispatch_async if self._async else self._dispatch_sync |
Nobuaki Sukegawa | 5e432ec | 2016-01-24 01:34:29 +0900 | [diff] [blame] | 331 | return fn(test, cont, max_retry) |
Roger Meier | 41ad434 | 2015-03-24 22:30:40 +0100 | [diff] [blame] | 332 | |
| 333 | def wait(self): |
| 334 | if self._async: |
| 335 | self._pool.close() |
| 336 | self._pool.join() |
| 337 | self._m.shutdown() |
| 338 | return self._report.end() |
| 339 | |
| 340 | def terminate(self): |
| 341 | self._stop.set() |
| 342 | if self._async: |
| 343 | self._pool.terminate() |
| 344 | self._pool.join() |
| 345 | self._m.shutdown() |