blob: f522bb19e67cf5bea5effa9cac765b2fa503ca56 [file] [log] [blame]
Roger Meier41ad4342015-03-24 22:30:40 +01001#
2# Licensed to the Apache Software Foundation (ASF) under one
3# or more contributor license agreements. See the NOTICE file
4# distributed with this work for additional information
5# regarding copyright ownership. The ASF licenses this file
6# to you under the Apache License, Version 2.0 (the
7# "License"); you may not use this file except in compliance
8# with the License. You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing,
13# software distributed under the License is distributed on an
14# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15# KIND, either express or implied. See the License for the
16# specific language governing permissions and limitations
17# under the License.
18#
19
20import contextlib
21import multiprocessing
22import multiprocessing.managers
23import os
24import platform
25import random
Roger Meier41ad4342015-03-24 22:30:40 +010026import signal
Nobuaki Sukegawaa6ab1f52015-11-28 15:04:39 +090027import socket
Roger Meier41ad4342015-03-24 22:30:40 +010028import subprocess
Nobuaki Sukegawaa6ab1f52015-11-28 15:04:39 +090029import sys
Roger Meier41ad4342015-03-24 22:30:40 +010030import threading
31import time
Roger Meier41ad4342015-03-24 22:30:40 +010032
Nobuaki Sukegawa2de27002015-11-22 01:13:48 +090033from .compat import str_join
34from .test import TestEntry, domain_socket_path
35from .report import ExecReporter, SummaryReporter
Roger Meier41ad4342015-03-24 22:30:40 +010036
37RESULT_TIMEOUT = 128
38RESULT_ERROR = 64
39
Nobuaki Sukegawad479e232016-02-28 11:28:19 +090040# globals
41ports = None
42stop = None
43
Roger Meier41ad4342015-03-24 22:30:40 +010044
45class ExecutionContext(object):
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +090046 def __init__(self, cmd, cwd, env, report):
47 self._log = multiprocessing.get_logger()
48 self.report = report
49 self.cmd = cmd
50 self.cwd = cwd
51 self.env = env
52 self.timer = None
53 self.expired = False
54 self.killed = False
Nobuaki Sukegawa59310f52016-02-18 01:41:46 +090055 self.proc = None
Roger Meier41ad4342015-03-24 22:30:40 +010056
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +090057 def _expire(self):
58 self._log.info('Timeout')
59 self.expired = True
60 self.kill()
Roger Meier41ad4342015-03-24 22:30:40 +010061
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +090062 def kill(self):
63 self._log.debug('Killing process : %d' % self.proc.pid)
64 self.killed = True
65 if platform.system() != 'Windows':
66 try:
67 os.killpg(self.proc.pid, signal.SIGKILL)
68 except Exception:
69 self._log.info('Failed to kill process group', exc_info=sys.exc_info())
70 try:
71 self.proc.kill()
72 except Exception:
73 self._log.info('Failed to kill process', exc_info=sys.exc_info())
Roger Meier41ad4342015-03-24 22:30:40 +010074
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +090075 def _popen_args(self):
76 args = {
77 'cwd': self.cwd,
78 'env': self.env,
79 'stdout': self.report.out,
80 'stderr': subprocess.STDOUT,
81 }
82 # make sure child processes doesn't remain after killing
83 if platform.system() == 'Windows':
84 DETACHED_PROCESS = 0x00000008
85 args.update(creationflags=DETACHED_PROCESS | subprocess.CREATE_NEW_PROCESS_GROUP)
86 else:
87 args.update(preexec_fn=os.setsid)
88 return args
Roger Meier41ad4342015-03-24 22:30:40 +010089
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +090090 def start(self, timeout=0):
91 joined = str_join(' ', self.cmd)
92 self._log.debug('COMMAND: %s', joined)
93 self._log.debug('WORKDIR: %s', self.cwd)
94 self._log.debug('LOGFILE: %s', self.report.logpath)
95 self.report.begin()
96 self.proc = subprocess.Popen(self.cmd, **self._popen_args())
97 if timeout > 0:
98 self.timer = threading.Timer(timeout, self._expire)
99 self.timer.start()
100 return self._scoped()
Roger Meier41ad4342015-03-24 22:30:40 +0100101
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900102 @contextlib.contextmanager
103 def _scoped(self):
104 yield self
105 self._log.debug('Killing scoped process')
106 if self.proc.poll() is None:
107 self.kill()
108 self.report.killed()
109 else:
110 self._log.debug('Process died unexpectedly')
111 self.report.died()
Roger Meier41ad4342015-03-24 22:30:40 +0100112
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900113 def wait(self):
114 self.proc.communicate()
115 if self.timer:
116 self.timer.cancel()
117 self.report.end(self.returncode)
Roger Meier41ad4342015-03-24 22:30:40 +0100118
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900119 @property
120 def returncode(self):
121 return self.proc.returncode if self.proc else None
Roger Meier41ad4342015-03-24 22:30:40 +0100122
123
Nobuaki Sukegawa378b7272016-01-03 17:04:50 +0900124def exec_context(port, logdir, test, prog):
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900125 report = ExecReporter(logdir, test, prog)
126 prog.build_command(port)
127 return ExecutionContext(prog.command, prog.workdir, prog.env, report)
Roger Meier41ad4342015-03-24 22:30:40 +0100128
129
Nobuaki Sukegawa5e432ec2016-01-24 01:34:29 +0900130def run_test(testdir, logdir, test_dict, max_retry, async=True):
Nobuaki Sukegawa59310f52016-02-18 01:41:46 +0900131 logger = multiprocessing.get_logger()
132
133 def ensure_socket_open(proc, port, max_delay):
134 sleeped = 0.1
135 time.sleep(sleeped)
Nobuaki Sukegawa59310f52016-02-18 01:41:46 +0900136 sleep_step = 0.2
Nobuaki Sukegawae9b32342016-02-27 03:44:02 +0900137 while True:
138 # Create sockets every iteration because refused sockets cannot be
139 # reused on some systems.
140 sock4 = socket.socket()
141 sock6 = socket.socket(family=socket.AF_INET6)
142 try:
143 if sock4.connect_ex(('127.0.0.1', port)) == 0 \
144 or sock6.connect_ex(('::1', port)) == 0:
145 return True
Nobuaki Sukegawa59310f52016-02-18 01:41:46 +0900146 if proc.poll() is not None:
147 logger.warn('server process is exited')
148 return False
149 if sleeped > max_delay:
150 logger.warn('sleeped for %f seconds but server port is not open' % sleeped)
151 return False
152 time.sleep(sleep_step)
153 sleeped += sleep_step
Nobuaki Sukegawae9b32342016-02-27 03:44:02 +0900154 finally:
155 sock4.close()
156 sock6.close()
157 logger.debug('waited %f sec for server port open' % sleeped)
158 return True
Nobuaki Sukegawa59310f52016-02-18 01:41:46 +0900159
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900160 try:
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900161 max_bind_retry = 3
162 retry_count = 0
163 bind_retry_count = 0
164 test = TestEntry(testdir, **test_dict)
165 while True:
166 if stop.is_set():
167 logger.debug('Skipping because shutting down')
168 return (retry_count, None)
169 logger.debug('Start')
170 with PortAllocator.alloc_port_scoped(ports, test.socket) as port:
171 logger.debug('Start with port %d' % port)
172 sv = exec_context(port, logdir, test, test.server)
173 cl = exec_context(port, logdir, test, test.client)
Roger Meier41ad4342015-03-24 22:30:40 +0100174
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900175 logger.debug('Starting server')
176 with sv.start():
Nobuaki Sukegawa59310f52016-02-18 01:41:46 +0900177 if test.socket in ('domain', 'abstract'):
178 time.sleep(0.1)
Nobuaki Sukegawa103974e2016-02-28 10:07:30 +0900179 port_ok = True
Nobuaki Sukegawa59310f52016-02-18 01:41:46 +0900180 else:
Nobuaki Sukegawa103974e2016-02-28 10:07:30 +0900181 port_ok = ensure_socket_open(sv.proc, port, test.delay)
182 if port_ok:
183 connect_retry_count = 0
184 max_connect_retry = 3
185 connect_retry_wait = 0.5
186 while True:
187 if sv.proc.poll() is not None:
188 logger.info('not starting client because server process is absent')
189 break
190 logger.debug('Starting client')
191 cl.start(test.timeout)
192 logger.debug('Waiting client')
193 cl.wait()
194 if not cl.report.maybe_false_positive() or connect_retry_count >= max_connect_retry:
195 if connect_retry_count > 0 and connect_retry_count < max_connect_retry:
196 logger.warn('[%s]: Connected after %d retry (%.2f sec each)' % (test.server.name, connect_retry_count, connect_retry_wait))
197 # Wait for 50ms to see if server does not die at the end.
198 time.sleep(0.05)
199 break
200 logger.debug('Server may not be ready, waiting %.2f second...' % connect_retry_wait)
201 time.sleep(connect_retry_wait)
202 connect_retry_count += 1
Roger Meier41ad4342015-03-24 22:30:40 +0100203
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900204 if sv.report.maybe_false_positive() and bind_retry_count < max_bind_retry:
205 logger.warn('[%s]: Detected socket bind failure, retrying...', test.server.name)
206 bind_retry_count += 1
207 else:
208 if cl.expired:
209 result = RESULT_TIMEOUT
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900210 else:
Nobuaki Sukegawa59310f52016-02-18 01:41:46 +0900211 result = cl.proc.returncode if cl.proc else RESULT_ERROR
212 if not sv.killed:
213 # Server died without being killed.
214 result |= RESULT_ERROR
Nobuaki Sukegawa5e432ec2016-01-24 01:34:29 +0900215
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900216 if result == 0 or retry_count >= max_retry:
217 return (retry_count, result)
218 else:
219 logger.info('[%s-%s]: test failed, retrying...', test.server.name, test.client.name)
220 retry_count += 1
Nobuaki Sukegawa59310f52016-02-18 01:41:46 +0900221 except Exception:
222 if not async:
223 raise
224 logger.warn('Error executing [%s]', test.name, exc_info=True)
225 return (retry_count, RESULT_ERROR)
226 except:
227 logger.info('Interrupted execution', exc_info=True)
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900228 if not async:
229 raise
230 stop.set()
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900231 return (retry_count, RESULT_ERROR)
Roger Meier41ad4342015-03-24 22:30:40 +0100232
233
234class PortAllocator(object):
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900235 def __init__(self):
236 self._log = multiprocessing.get_logger()
237 self._lock = multiprocessing.Lock()
238 self._ports = set()
239 self._dom_ports = set()
240 self._last_alloc = 0
Roger Meier41ad4342015-03-24 22:30:40 +0100241
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900242 def _get_tcp_port(self):
243 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
Nobuaki Sukegawa59310f52016-02-18 01:41:46 +0900244 sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
245 sock.bind(('', 0))
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900246 port = sock.getsockname()[1]
247 self._lock.acquire()
248 try:
249 ok = port not in self._ports
250 if ok:
251 self._ports.add(port)
252 self._last_alloc = time.time()
253 finally:
254 self._lock.release()
255 sock.close()
256 return port if ok else self._get_tcp_port()
Roger Meier41ad4342015-03-24 22:30:40 +0100257
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900258 def _get_domain_port(self):
259 port = random.randint(1024, 65536)
260 self._lock.acquire()
261 try:
262 ok = port not in self._dom_ports
263 if ok:
264 self._dom_ports.add(port)
265 finally:
266 self._lock.release()
267 return port if ok else self._get_domain_port()
Roger Meier41ad4342015-03-24 22:30:40 +0100268
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900269 def alloc_port(self, socket_type):
270 if socket_type in ('domain', 'abstract'):
271 return self._get_domain_port()
272 else:
273 return self._get_tcp_port()
Roger Meier41ad4342015-03-24 22:30:40 +0100274
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900275 # static method for inter-process invokation
276 @staticmethod
277 @contextlib.contextmanager
278 def alloc_port_scoped(allocator, socket_type):
279 port = allocator.alloc_port(socket_type)
280 yield port
281 allocator.free_port(socket_type, port)
Roger Meier41ad4342015-03-24 22:30:40 +0100282
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900283 def free_port(self, socket_type, port):
284 self._log.debug('free_port')
285 self._lock.acquire()
286 try:
287 if socket_type == 'domain':
288 self._dom_ports.remove(port)
289 path = domain_socket_path(port)
290 if os.path.exists(path):
291 os.remove(path)
292 elif socket_type == 'abstract':
293 self._dom_ports.remove(port)
294 else:
295 self._ports.remove(port)
296 except IOError:
297 self._log.info('Error while freeing port', exc_info=sys.exc_info())
298 finally:
299 self._lock.release()
Roger Meier41ad4342015-03-24 22:30:40 +0100300
301
302class NonAsyncResult(object):
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900303 def __init__(self, value):
304 self._value = value
Roger Meier41ad4342015-03-24 22:30:40 +0100305
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900306 def get(self, timeout=None):
307 return self._value
Roger Meier41ad4342015-03-24 22:30:40 +0100308
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900309 def wait(self, timeout=None):
310 pass
Roger Meier41ad4342015-03-24 22:30:40 +0100311
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900312 def ready(self):
313 return True
Roger Meier41ad4342015-03-24 22:30:40 +0100314
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900315 def successful(self):
316 return self._value == 0
Roger Meier41ad4342015-03-24 22:30:40 +0100317
318
319class TestDispatcher(object):
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900320 def __init__(self, testdir, basedir, logdir_rel, concurrency):
321 self._log = multiprocessing.get_logger()
322 self.testdir = testdir
323 self._report = SummaryReporter(basedir, logdir_rel, concurrency > 1)
324 self.logdir = self._report.testdir
325 # seems needed for python 2.x to handle keyboard interrupt
326 self._stop = multiprocessing.Event()
327 self._async = concurrency > 1
328 if not self._async:
329 self._pool = None
330 global stop
331 global ports
332 stop = self._stop
333 ports = PortAllocator()
334 else:
335 self._m = multiprocessing.managers.BaseManager()
336 self._m.register('ports', PortAllocator)
337 self._m.start()
338 self._pool = multiprocessing.Pool(concurrency, self._pool_init, (self._m.address,))
339 self._log.debug(
340 'TestDispatcher started with %d concurrent jobs' % concurrency)
Roger Meier41ad4342015-03-24 22:30:40 +0100341
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900342 def _pool_init(self, address):
343 global stop
344 global m
345 global ports
346 stop = self._stop
347 m = multiprocessing.managers.BaseManager(address)
348 m.connect()
349 ports = m.ports()
Roger Meier41ad4342015-03-24 22:30:40 +0100350
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900351 def _dispatch_sync(self, test, cont, max_retry):
352 r = run_test(self.testdir, self.logdir, test, max_retry, False)
353 cont(r)
354 return NonAsyncResult(r)
Roger Meier41ad4342015-03-24 22:30:40 +0100355
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900356 def _dispatch_async(self, test, cont, max_retry):
357 self._log.debug('_dispatch_async')
358 return self._pool.apply_async(func=run_test, args=(self.testdir, self.logdir, test, max_retry), callback=cont)
Roger Meier41ad4342015-03-24 22:30:40 +0100359
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900360 def dispatch(self, test, max_retry):
361 index = self._report.add_test(test)
Roger Meier41ad4342015-03-24 22:30:40 +0100362
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900363 def cont(result):
364 if not self._stop.is_set():
Nobuaki Sukegawa59310f52016-02-18 01:41:46 +0900365 if result and len(result) == 2:
366 retry_count, returncode = result
367 else:
368 retry_count = 0
369 returncode = RESULT_ERROR
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900370 self._log.debug('freeing port')
371 self._log.debug('adding result')
372 self._report.add_result(index, returncode, returncode == RESULT_TIMEOUT, retry_count)
373 self._log.debug('finish continuation')
374 fn = self._dispatch_async if self._async else self._dispatch_sync
375 return fn(test, cont, max_retry)
Roger Meier41ad4342015-03-24 22:30:40 +0100376
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900377 def wait(self):
378 if self._async:
379 self._pool.close()
380 self._pool.join()
381 self._m.shutdown()
382 return self._report.end()
Roger Meier41ad4342015-03-24 22:30:40 +0100383
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900384 def terminate(self):
385 self._stop.set()
386 if self._async:
387 self._pool.terminate()
388 self._pool.join()
389 self._m.shutdown()