blob: 68bd928690261c4c364de0e7bc848810bcb3729e [file] [log] [blame]
Roger Meier41ad4342015-03-24 22:30:40 +01001#
2# Licensed to the Apache Software Foundation (ASF) under one
3# or more contributor license agreements. See the NOTICE file
4# distributed with this work for additional information
5# regarding copyright ownership. The ASF licenses this file
6# to you under the Apache License, Version 2.0 (the
7# "License"); you may not use this file except in compliance
8# with the License. You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing,
13# software distributed under the License is distributed on an
14# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15# KIND, either express or implied. See the License for the
16# specific language governing permissions and limitations
17# under the License.
18#
19
20import contextlib
21import multiprocessing
22import multiprocessing.managers
23import os
24import platform
25import random
Roger Meier41ad4342015-03-24 22:30:40 +010026import signal
Nobuaki Sukegawaa6ab1f52015-11-28 15:04:39 +090027import socket
Roger Meier41ad4342015-03-24 22:30:40 +010028import subprocess
Nobuaki Sukegawaa6ab1f52015-11-28 15:04:39 +090029import sys
Roger Meier41ad4342015-03-24 22:30:40 +010030import threading
31import time
Roger Meier41ad4342015-03-24 22:30:40 +010032
Nobuaki Sukegawa2de27002015-11-22 01:13:48 +090033from .compat import str_join
34from .test import TestEntry, domain_socket_path
35from .report import ExecReporter, SummaryReporter
Roger Meier41ad4342015-03-24 22:30:40 +010036
37RESULT_TIMEOUT = 128
38RESULT_ERROR = 64
39
40
41class ExecutionContext(object):
42 def __init__(self, cmd, cwd, env, report):
43 self._log = multiprocessing.get_logger()
44 self.report = report
45 self.cmd = cmd
46 self.cwd = cwd
47 self.env = env
48 self.timer = None
49 self.expired = False
Nobuaki Sukegawa2ba79442016-01-12 19:37:55 +090050 self.killed = False
Roger Meier41ad4342015-03-24 22:30:40 +010051
52 def _expire(self):
53 self._log.info('Timeout')
54 self.expired = True
55 self.kill()
56
57 def kill(self):
58 self._log.debug('Killing process : %d' % self.proc.pid)
Nobuaki Sukegawa2ba79442016-01-12 19:37:55 +090059 self.killed = True
Roger Meier41ad4342015-03-24 22:30:40 +010060 if platform.system() != 'Windows':
61 try:
62 os.killpg(self.proc.pid, signal.SIGKILL)
Nobuaki Sukegawa5e432ec2016-01-24 01:34:29 +090063 except Exception:
64 self._log.info('Failed to kill process group', exc_info=sys.exc_info())
Roger Meier41ad4342015-03-24 22:30:40 +010065 try:
66 self.proc.kill()
Nobuaki Sukegawa5e432ec2016-01-24 01:34:29 +090067 except Exception:
68 self._log.info('Failed to kill process', exc_info=sys.exc_info())
Roger Meier41ad4342015-03-24 22:30:40 +010069
70 def _popen_args(self):
71 args = {
72 'cwd': self.cwd,
73 'env': self.env,
74 'stdout': self.report.out,
75 'stderr': subprocess.STDOUT,
76 }
77 # make sure child processes doesn't remain after killing
78 if platform.system() == 'Windows':
79 DETACHED_PROCESS = 0x00000008
80 args.update(creationflags=DETACHED_PROCESS | subprocess.CREATE_NEW_PROCESS_GROUP)
81 else:
82 args.update(preexec_fn=os.setsid)
83 return args
84
85 def start(self, timeout=0):
Nobuaki Sukegawa2de27002015-11-22 01:13:48 +090086 joined = str_join(' ', self.cmd)
Jens Geyeraad06de2015-11-21 14:43:56 +010087 self._log.debug('COMMAND: %s', joined)
Roger Meier41ad4342015-03-24 22:30:40 +010088 self._log.debug('WORKDIR: %s', self.cwd)
89 self._log.debug('LOGFILE: %s', self.report.logpath)
90 self.report.begin()
Nobuaki Sukegawa2de27002015-11-22 01:13:48 +090091 self.proc = subprocess.Popen(self.cmd, **self._popen_args())
Roger Meier41ad4342015-03-24 22:30:40 +010092 if timeout > 0:
93 self.timer = threading.Timer(timeout, self._expire)
94 self.timer.start()
95 return self._scoped()
96
97 @contextlib.contextmanager
98 def _scoped(self):
99 yield self
100 self._log.debug('Killing scoped process')
Nobuaki Sukegawa2ba79442016-01-12 19:37:55 +0900101 if self.proc.poll() is None:
102 self.kill()
103 self.report.killed()
104 else:
105 self._log.debug('Process died unexpectedly')
106 self.report.died()
Roger Meier41ad4342015-03-24 22:30:40 +0100107
108 def wait(self):
109 self.proc.communicate()
110 if self.timer:
111 self.timer.cancel()
112 self.report.end(self.returncode)
113
114 @property
115 def returncode(self):
116 return self.proc.returncode if self.proc else None
117
118
Nobuaki Sukegawa378b7272016-01-03 17:04:50 +0900119def exec_context(port, logdir, test, prog):
120 report = ExecReporter(logdir, test, prog)
Roger Meier41ad4342015-03-24 22:30:40 +0100121 prog.build_command(port)
122 return ExecutionContext(prog.command, prog.workdir, prog.env, report)
123
124
Nobuaki Sukegawa5e432ec2016-01-24 01:34:29 +0900125def run_test(testdir, logdir, test_dict, max_retry, async=True):
Roger Meier41ad4342015-03-24 22:30:40 +0100126 try:
127 logger = multiprocessing.get_logger()
Nobuaki Sukegawa5e432ec2016-01-24 01:34:29 +0900128 max_bind_retry = 3
Roger Meier41ad4342015-03-24 22:30:40 +0100129 retry_count = 0
Nobuaki Sukegawa5e432ec2016-01-24 01:34:29 +0900130 bind_retry_count = 0
Roger Meier41ad4342015-03-24 22:30:40 +0100131 test = TestEntry(testdir, **test_dict)
132 while True:
133 if stop.is_set():
134 logger.debug('Skipping because shutting down')
Nobuaki Sukegawa5e432ec2016-01-24 01:34:29 +0900135 return (retry_count, None)
Roger Meier41ad4342015-03-24 22:30:40 +0100136 logger.debug('Start')
137 with PortAllocator.alloc_port_scoped(ports, test.socket) as port:
138 logger.debug('Start with port %d' % port)
Nobuaki Sukegawa378b7272016-01-03 17:04:50 +0900139 sv = exec_context(port, logdir, test, test.server)
140 cl = exec_context(port, logdir, test, test.client)
Roger Meier41ad4342015-03-24 22:30:40 +0100141
142 logger.debug('Starting server')
143 with sv.start():
144 if test.delay > 0:
145 logger.debug('Delaying client for %.2f seconds' % test.delay)
146 time.sleep(test.delay)
Nobuaki Sukegawa5e432ec2016-01-24 01:34:29 +0900147 connect_retry_count = 0
148 max_connect_retry = 10
149 connect_retry_wait = 0.5
Roger Meier41ad4342015-03-24 22:30:40 +0100150 while True:
151 logger.debug('Starting client')
152 cl.start(test.timeout)
153 logger.debug('Waiting client')
154 cl.wait()
Nobuaki Sukegawa5e432ec2016-01-24 01:34:29 +0900155 if not cl.report.maybe_false_positive() or connect_retry_count >= max_connect_retry:
156 if connect_retry_count > 0 and connect_retry_count < max_connect_retry:
157 logger.warn('[%s]: Connected after %d retry (%.2f sec each)' % (test.server.name, connect_retry_count, connect_retry_wait))
158 # Wait for 50ms to see if server does not die at the end.
Nobuaki Sukegawa2ba79442016-01-12 19:37:55 +0900159 time.sleep(0.05)
Roger Meier41ad4342015-03-24 22:30:40 +0100160 break
Nobuaki Sukegawa5e432ec2016-01-24 01:34:29 +0900161 logger.debug('Server may not be ready, waiting %.2f second...' % connect_retry_wait)
162 time.sleep(connect_retry_wait)
163 connect_retry_count += 1
Roger Meier41ad4342015-03-24 22:30:40 +0100164
Nobuaki Sukegawa5e432ec2016-01-24 01:34:29 +0900165 if sv.report.maybe_false_positive() and bind_retry_count < max_bind_retry:
166 logger.warn('[%s]: Detected socket bind failure, retrying...', test.server.name)
167 bind_retry_count += 1
168 else:
Nobuaki Sukegawa2ba79442016-01-12 19:37:55 +0900169 if cl.expired:
Nobuaki Sukegawa5e432ec2016-01-24 01:34:29 +0900170 result = RESULT_TIMEOUT
Nobuaki Sukegawa2ba79442016-01-12 19:37:55 +0900171 elif not sv.killed and cl.proc.returncode == 0:
172 # Server should be alive at the end.
Nobuaki Sukegawa5e432ec2016-01-24 01:34:29 +0900173 result = RESULT_ERROR
Nobuaki Sukegawa2ba79442016-01-12 19:37:55 +0900174 else:
Nobuaki Sukegawa5e432ec2016-01-24 01:34:29 +0900175 result = cl.proc.returncode
176
177 if result == 0 or retry_count >= max_retry:
178 return (retry_count, result)
179 else:
180 logger.info('[%s-%s]: test failed, retrying...', test.server.name, test.client.name)
181 retry_count += 1
Roger Meier41ad4342015-03-24 22:30:40 +0100182 except (KeyboardInterrupt, SystemExit):
183 logger.info('Interrupted execution')
184 if not async:
185 raise
186 stop.set()
187 return None
Nobuaki Sukegawa2ba79442016-01-12 19:37:55 +0900188 except:
Roger Meier41ad4342015-03-24 22:30:40 +0100189 if not async:
190 raise
Nobuaki Sukegawa2ba79442016-01-12 19:37:55 +0900191 logger.warn('Error executing [%s]', test.name, exc_info=sys.exc_info())
Nobuaki Sukegawa5e432ec2016-01-24 01:34:29 +0900192 return (retry_count, RESULT_ERROR)
Roger Meier41ad4342015-03-24 22:30:40 +0100193
194
195class PortAllocator(object):
196 def __init__(self):
197 self._log = multiprocessing.get_logger()
198 self._lock = multiprocessing.Lock()
199 self._ports = set()
200 self._dom_ports = set()
201 self._last_alloc = 0
202
203 def _get_tcp_port(self):
204 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
205 sock.bind(('127.0.0.1', 0))
206 port = sock.getsockname()[1]
207 self._lock.acquire()
208 try:
209 ok = port not in self._ports
210 if ok:
211 self._ports.add(port)
212 self._last_alloc = time.time()
213 finally:
214 self._lock.release()
215 sock.close()
216 return port if ok else self._get_tcp_port()
217
218 def _get_domain_port(self):
219 port = random.randint(1024, 65536)
220 self._lock.acquire()
221 try:
222 ok = port not in self._dom_ports
223 if ok:
224 self._dom_ports.add(port)
225 finally:
226 self._lock.release()
227 return port if ok else self._get_domain_port()
228
229 def alloc_port(self, socket_type):
pavlodd08f6e2015-10-08 16:43:56 -0400230 if socket_type in ('domain', 'abstract'):
Roger Meier41ad4342015-03-24 22:30:40 +0100231 return self._get_domain_port()
232 else:
233 return self._get_tcp_port()
234
235 # static method for inter-process invokation
236 @staticmethod
237 @contextlib.contextmanager
238 def alloc_port_scoped(allocator, socket_type):
239 port = allocator.alloc_port(socket_type)
240 yield port
241 allocator.free_port(socket_type, port)
242
243 def free_port(self, socket_type, port):
244 self._log.debug('free_port')
245 self._lock.acquire()
246 try:
247 if socket_type == 'domain':
248 self._dom_ports.remove(port)
249 path = domain_socket_path(port)
250 if os.path.exists(path):
251 os.remove(path)
pavlodd08f6e2015-10-08 16:43:56 -0400252 elif socket_type == 'abstract':
253 self._dom_ports.remove(port)
Roger Meier41ad4342015-03-24 22:30:40 +0100254 else:
255 self._ports.remove(port)
Nobuaki Sukegawa5e432ec2016-01-24 01:34:29 +0900256 except IOError:
257 self._log.info('Error while freeing port', exc_info=sys.exc_info())
Roger Meier41ad4342015-03-24 22:30:40 +0100258 finally:
259 self._lock.release()
260
261
262class NonAsyncResult(object):
263 def __init__(self, value):
264 self._value = value
265
266 def get(self, timeout=None):
267 return self._value
268
269 def wait(self, timeout=None):
270 pass
271
272 def ready(self):
273 return True
274
275 def successful(self):
276 return self._value == 0
277
278
279class TestDispatcher(object):
Nobuaki Sukegawabd165302016-01-19 11:10:07 +0900280 def __init__(self, testdir, basedir, logdir_rel, concurrency):
Roger Meier41ad4342015-03-24 22:30:40 +0100281 self._log = multiprocessing.get_logger()
282 self.testdir = testdir
Nobuaki Sukegawabd165302016-01-19 11:10:07 +0900283 self._report = SummaryReporter(basedir, logdir_rel, concurrency > 1)
284 self.logdir = self._report.testdir
Roger Meier41ad4342015-03-24 22:30:40 +0100285 # seems needed for python 2.x to handle keyboard interrupt
286 self._stop = multiprocessing.Event()
287 self._async = concurrency > 1
288 if not self._async:
289 self._pool = None
290 global stop
291 global ports
292 stop = self._stop
293 ports = PortAllocator()
294 else:
295 self._m = multiprocessing.managers.BaseManager()
296 self._m.register('ports', PortAllocator)
297 self._m.start()
298 self._pool = multiprocessing.Pool(concurrency, self._pool_init, (self._m.address,))
Roger Meier41ad4342015-03-24 22:30:40 +0100299 self._log.debug(
300 'TestDispatcher started with %d concurrent jobs' % concurrency)
301
302 def _pool_init(self, address):
303 global stop
304 global m
305 global ports
306 stop = self._stop
307 m = multiprocessing.managers.BaseManager(address)
308 m.connect()
309 ports = m.ports()
310
Nobuaki Sukegawa5e432ec2016-01-24 01:34:29 +0900311 def _dispatch_sync(self, test, cont, max_retry):
312 r = run_test(self.testdir, self.logdir, test, max_retry, False)
Roger Meier41ad4342015-03-24 22:30:40 +0100313 cont(r)
314 return NonAsyncResult(r)
315
Nobuaki Sukegawa5e432ec2016-01-24 01:34:29 +0900316 def _dispatch_async(self, test, cont, max_retry):
Nobuaki Sukegawa378b7272016-01-03 17:04:50 +0900317 self._log.debug('_dispatch_async')
Nobuaki Sukegawa5e432ec2016-01-24 01:34:29 +0900318 return self._pool.apply_async(func=run_test, args=(self.testdir, self.logdir, test, max_retry), callback=cont)
Roger Meier41ad4342015-03-24 22:30:40 +0100319
Nobuaki Sukegawa5e432ec2016-01-24 01:34:29 +0900320 def dispatch(self, test, max_retry):
Roger Meier41ad4342015-03-24 22:30:40 +0100321 index = self._report.add_test(test)
322
Nobuaki Sukegawa5e432ec2016-01-24 01:34:29 +0900323 def cont(result):
Roger Meier41ad4342015-03-24 22:30:40 +0100324 if not self._stop.is_set():
Nobuaki Sukegawa5e432ec2016-01-24 01:34:29 +0900325 retry_count, returncode = result
Roger Meier41ad4342015-03-24 22:30:40 +0100326 self._log.debug('freeing port')
327 self._log.debug('adding result')
Nobuaki Sukegawa5e432ec2016-01-24 01:34:29 +0900328 self._report.add_result(index, returncode, returncode == RESULT_TIMEOUT, retry_count)
Roger Meier41ad4342015-03-24 22:30:40 +0100329 self._log.debug('finish continuation')
330 fn = self._dispatch_async if self._async else self._dispatch_sync
Nobuaki Sukegawa5e432ec2016-01-24 01:34:29 +0900331 return fn(test, cont, max_retry)
Roger Meier41ad4342015-03-24 22:30:40 +0100332
333 def wait(self):
334 if self._async:
335 self._pool.close()
336 self._pool.join()
337 self._m.shutdown()
338 return self._report.end()
339
340 def terminate(self):
341 self._stop.set()
342 if self._async:
343 self._pool.terminate()
344 self._pool.join()
345 self._m.shutdown()