blob: 2c212e0f4da3f91b96a1a9949c256d14b36ee381 [file] [log] [blame]
Roger Meier41ad4342015-03-24 22:30:40 +01001#
2# Licensed to the Apache Software Foundation (ASF) under one
3# or more contributor license agreements. See the NOTICE file
4# distributed with this work for additional information
5# regarding copyright ownership. The ASF licenses this file
6# to you under the Apache License, Version 2.0 (the
7# "License"); you may not use this file except in compliance
8# with the License. You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing,
13# software distributed under the License is distributed on an
14# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15# KIND, either express or implied. See the License for the
16# specific language governing permissions and limitations
17# under the License.
18#
19
20import contextlib
21import multiprocessing
22import multiprocessing.managers
23import os
24import platform
25import random
Roger Meier41ad4342015-03-24 22:30:40 +010026import signal
Nobuaki Sukegawaa6ab1f52015-11-28 15:04:39 +090027import socket
Roger Meier41ad4342015-03-24 22:30:40 +010028import subprocess
Nobuaki Sukegawaa6ab1f52015-11-28 15:04:39 +090029import sys
Roger Meier41ad4342015-03-24 22:30:40 +010030import threading
31import time
Roger Meier41ad4342015-03-24 22:30:40 +010032
Nobuaki Sukegawa2de27002015-11-22 01:13:48 +090033from .compat import str_join
34from .test import TestEntry, domain_socket_path
35from .report import ExecReporter, SummaryReporter
Roger Meier41ad4342015-03-24 22:30:40 +010036
37RESULT_TIMEOUT = 128
38RESULT_ERROR = 64
39
40
41class ExecutionContext(object):
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +090042 def __init__(self, cmd, cwd, env, report):
43 self._log = multiprocessing.get_logger()
44 self.report = report
45 self.cmd = cmd
46 self.cwd = cwd
47 self.env = env
48 self.timer = None
49 self.expired = False
50 self.killed = False
Nobuaki Sukegawa59310f52016-02-18 01:41:46 +090051 self.proc = None
Roger Meier41ad4342015-03-24 22:30:40 +010052
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +090053 def _expire(self):
54 self._log.info('Timeout')
55 self.expired = True
56 self.kill()
Roger Meier41ad4342015-03-24 22:30:40 +010057
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +090058 def kill(self):
59 self._log.debug('Killing process : %d' % self.proc.pid)
60 self.killed = True
61 if platform.system() != 'Windows':
62 try:
63 os.killpg(self.proc.pid, signal.SIGKILL)
64 except Exception:
65 self._log.info('Failed to kill process group', exc_info=sys.exc_info())
66 try:
67 self.proc.kill()
68 except Exception:
69 self._log.info('Failed to kill process', exc_info=sys.exc_info())
Roger Meier41ad4342015-03-24 22:30:40 +010070
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +090071 def _popen_args(self):
72 args = {
73 'cwd': self.cwd,
74 'env': self.env,
75 'stdout': self.report.out,
76 'stderr': subprocess.STDOUT,
77 }
78 # make sure child processes doesn't remain after killing
79 if platform.system() == 'Windows':
80 DETACHED_PROCESS = 0x00000008
81 args.update(creationflags=DETACHED_PROCESS | subprocess.CREATE_NEW_PROCESS_GROUP)
82 else:
83 args.update(preexec_fn=os.setsid)
84 return args
Roger Meier41ad4342015-03-24 22:30:40 +010085
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +090086 def start(self, timeout=0):
87 joined = str_join(' ', self.cmd)
88 self._log.debug('COMMAND: %s', joined)
89 self._log.debug('WORKDIR: %s', self.cwd)
90 self._log.debug('LOGFILE: %s', self.report.logpath)
91 self.report.begin()
92 self.proc = subprocess.Popen(self.cmd, **self._popen_args())
93 if timeout > 0:
94 self.timer = threading.Timer(timeout, self._expire)
95 self.timer.start()
96 return self._scoped()
Roger Meier41ad4342015-03-24 22:30:40 +010097
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +090098 @contextlib.contextmanager
99 def _scoped(self):
100 yield self
101 self._log.debug('Killing scoped process')
102 if self.proc.poll() is None:
103 self.kill()
104 self.report.killed()
105 else:
106 self._log.debug('Process died unexpectedly')
107 self.report.died()
Roger Meier41ad4342015-03-24 22:30:40 +0100108
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900109 def wait(self):
110 self.proc.communicate()
111 if self.timer:
112 self.timer.cancel()
113 self.report.end(self.returncode)
Roger Meier41ad4342015-03-24 22:30:40 +0100114
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900115 @property
116 def returncode(self):
117 return self.proc.returncode if self.proc else None
Roger Meier41ad4342015-03-24 22:30:40 +0100118
119
Nobuaki Sukegawa378b7272016-01-03 17:04:50 +0900120def exec_context(port, logdir, test, prog):
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900121 report = ExecReporter(logdir, test, prog)
122 prog.build_command(port)
123 return ExecutionContext(prog.command, prog.workdir, prog.env, report)
Roger Meier41ad4342015-03-24 22:30:40 +0100124
125
Nobuaki Sukegawa5e432ec2016-01-24 01:34:29 +0900126def run_test(testdir, logdir, test_dict, max_retry, async=True):
Nobuaki Sukegawa59310f52016-02-18 01:41:46 +0900127 logger = multiprocessing.get_logger()
128
129 def ensure_socket_open(proc, port, max_delay):
130 sleeped = 0.1
131 time.sleep(sleeped)
Nobuaki Sukegawa59310f52016-02-18 01:41:46 +0900132 sleep_step = 0.2
Nobuaki Sukegawae9b32342016-02-27 03:44:02 +0900133 while True:
134 # Create sockets every iteration because refused sockets cannot be
135 # reused on some systems.
136 sock4 = socket.socket()
137 sock6 = socket.socket(family=socket.AF_INET6)
138 try:
139 if sock4.connect_ex(('127.0.0.1', port)) == 0 \
140 or sock6.connect_ex(('::1', port)) == 0:
141 return True
Nobuaki Sukegawa59310f52016-02-18 01:41:46 +0900142 if proc.poll() is not None:
143 logger.warn('server process is exited')
144 return False
145 if sleeped > max_delay:
146 logger.warn('sleeped for %f seconds but server port is not open' % sleeped)
147 return False
148 time.sleep(sleep_step)
149 sleeped += sleep_step
Nobuaki Sukegawae9b32342016-02-27 03:44:02 +0900150 finally:
151 sock4.close()
152 sock6.close()
153 logger.debug('waited %f sec for server port open' % sleeped)
154 return True
Nobuaki Sukegawa59310f52016-02-18 01:41:46 +0900155
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900156 try:
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900157 max_bind_retry = 3
158 retry_count = 0
159 bind_retry_count = 0
160 test = TestEntry(testdir, **test_dict)
161 while True:
162 if stop.is_set():
163 logger.debug('Skipping because shutting down')
164 return (retry_count, None)
165 logger.debug('Start')
166 with PortAllocator.alloc_port_scoped(ports, test.socket) as port:
167 logger.debug('Start with port %d' % port)
168 sv = exec_context(port, logdir, test, test.server)
169 cl = exec_context(port, logdir, test, test.client)
Roger Meier41ad4342015-03-24 22:30:40 +0100170
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900171 logger.debug('Starting server')
172 with sv.start():
Nobuaki Sukegawa59310f52016-02-18 01:41:46 +0900173 if test.socket in ('domain', 'abstract'):
174 time.sleep(0.1)
175 else:
176 if not ensure_socket_open(sv.proc, port, test.delay):
177 break
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900178 connect_retry_count = 0
Nobuaki Sukegawa59310f52016-02-18 01:41:46 +0900179 max_connect_retry = 3
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900180 connect_retry_wait = 0.5
181 while True:
Nobuaki Sukegawa59310f52016-02-18 01:41:46 +0900182 if sv.proc.poll() is not None:
183 logger.info('not starting client because server process is absent')
184 break
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900185 logger.debug('Starting client')
186 cl.start(test.timeout)
187 logger.debug('Waiting client')
188 cl.wait()
189 if not cl.report.maybe_false_positive() or connect_retry_count >= max_connect_retry:
190 if connect_retry_count > 0 and connect_retry_count < max_connect_retry:
191 logger.warn('[%s]: Connected after %d retry (%.2f sec each)' % (test.server.name, connect_retry_count, connect_retry_wait))
192 # Wait for 50ms to see if server does not die at the end.
193 time.sleep(0.05)
194 break
195 logger.debug('Server may not be ready, waiting %.2f second...' % connect_retry_wait)
196 time.sleep(connect_retry_wait)
197 connect_retry_count += 1
Roger Meier41ad4342015-03-24 22:30:40 +0100198
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900199 if sv.report.maybe_false_positive() and bind_retry_count < max_bind_retry:
200 logger.warn('[%s]: Detected socket bind failure, retrying...', test.server.name)
201 bind_retry_count += 1
202 else:
203 if cl.expired:
204 result = RESULT_TIMEOUT
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900205 else:
Nobuaki Sukegawa59310f52016-02-18 01:41:46 +0900206 result = cl.proc.returncode if cl.proc else RESULT_ERROR
207 if not sv.killed:
208 # Server died without being killed.
209 result |= RESULT_ERROR
Nobuaki Sukegawa5e432ec2016-01-24 01:34:29 +0900210
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900211 if result == 0 or retry_count >= max_retry:
212 return (retry_count, result)
213 else:
214 logger.info('[%s-%s]: test failed, retrying...', test.server.name, test.client.name)
215 retry_count += 1
Nobuaki Sukegawa59310f52016-02-18 01:41:46 +0900216 except Exception:
217 if not async:
218 raise
219 logger.warn('Error executing [%s]', test.name, exc_info=True)
220 return (retry_count, RESULT_ERROR)
221 except:
222 logger.info('Interrupted execution', exc_info=True)
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900223 if not async:
224 raise
225 stop.set()
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900226 return (retry_count, RESULT_ERROR)
Roger Meier41ad4342015-03-24 22:30:40 +0100227
228
229class PortAllocator(object):
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900230 def __init__(self):
231 self._log = multiprocessing.get_logger()
232 self._lock = multiprocessing.Lock()
233 self._ports = set()
234 self._dom_ports = set()
235 self._last_alloc = 0
Roger Meier41ad4342015-03-24 22:30:40 +0100236
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900237 def _get_tcp_port(self):
238 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
Nobuaki Sukegawa59310f52016-02-18 01:41:46 +0900239 sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
240 sock.bind(('', 0))
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900241 port = sock.getsockname()[1]
242 self._lock.acquire()
243 try:
244 ok = port not in self._ports
245 if ok:
246 self._ports.add(port)
247 self._last_alloc = time.time()
248 finally:
249 self._lock.release()
250 sock.close()
251 return port if ok else self._get_tcp_port()
Roger Meier41ad4342015-03-24 22:30:40 +0100252
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900253 def _get_domain_port(self):
254 port = random.randint(1024, 65536)
255 self._lock.acquire()
256 try:
257 ok = port not in self._dom_ports
258 if ok:
259 self._dom_ports.add(port)
260 finally:
261 self._lock.release()
262 return port if ok else self._get_domain_port()
Roger Meier41ad4342015-03-24 22:30:40 +0100263
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900264 def alloc_port(self, socket_type):
265 if socket_type in ('domain', 'abstract'):
266 return self._get_domain_port()
267 else:
268 return self._get_tcp_port()
Roger Meier41ad4342015-03-24 22:30:40 +0100269
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900270 # static method for inter-process invokation
271 @staticmethod
272 @contextlib.contextmanager
273 def alloc_port_scoped(allocator, socket_type):
274 port = allocator.alloc_port(socket_type)
275 yield port
276 allocator.free_port(socket_type, port)
Roger Meier41ad4342015-03-24 22:30:40 +0100277
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900278 def free_port(self, socket_type, port):
279 self._log.debug('free_port')
280 self._lock.acquire()
281 try:
282 if socket_type == 'domain':
283 self._dom_ports.remove(port)
284 path = domain_socket_path(port)
285 if os.path.exists(path):
286 os.remove(path)
287 elif socket_type == 'abstract':
288 self._dom_ports.remove(port)
289 else:
290 self._ports.remove(port)
291 except IOError:
292 self._log.info('Error while freeing port', exc_info=sys.exc_info())
293 finally:
294 self._lock.release()
Roger Meier41ad4342015-03-24 22:30:40 +0100295
296
297class NonAsyncResult(object):
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900298 def __init__(self, value):
299 self._value = value
Roger Meier41ad4342015-03-24 22:30:40 +0100300
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900301 def get(self, timeout=None):
302 return self._value
Roger Meier41ad4342015-03-24 22:30:40 +0100303
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900304 def wait(self, timeout=None):
305 pass
Roger Meier41ad4342015-03-24 22:30:40 +0100306
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900307 def ready(self):
308 return True
Roger Meier41ad4342015-03-24 22:30:40 +0100309
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900310 def successful(self):
311 return self._value == 0
Roger Meier41ad4342015-03-24 22:30:40 +0100312
313
314class TestDispatcher(object):
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900315 def __init__(self, testdir, basedir, logdir_rel, concurrency):
316 self._log = multiprocessing.get_logger()
317 self.testdir = testdir
318 self._report = SummaryReporter(basedir, logdir_rel, concurrency > 1)
319 self.logdir = self._report.testdir
320 # seems needed for python 2.x to handle keyboard interrupt
321 self._stop = multiprocessing.Event()
322 self._async = concurrency > 1
323 if not self._async:
324 self._pool = None
325 global stop
326 global ports
327 stop = self._stop
328 ports = PortAllocator()
329 else:
330 self._m = multiprocessing.managers.BaseManager()
331 self._m.register('ports', PortAllocator)
332 self._m.start()
333 self._pool = multiprocessing.Pool(concurrency, self._pool_init, (self._m.address,))
334 self._log.debug(
335 'TestDispatcher started with %d concurrent jobs' % concurrency)
Roger Meier41ad4342015-03-24 22:30:40 +0100336
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900337 def _pool_init(self, address):
338 global stop
339 global m
340 global ports
341 stop = self._stop
342 m = multiprocessing.managers.BaseManager(address)
343 m.connect()
344 ports = m.ports()
Roger Meier41ad4342015-03-24 22:30:40 +0100345
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900346 def _dispatch_sync(self, test, cont, max_retry):
347 r = run_test(self.testdir, self.logdir, test, max_retry, False)
348 cont(r)
349 return NonAsyncResult(r)
Roger Meier41ad4342015-03-24 22:30:40 +0100350
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900351 def _dispatch_async(self, test, cont, max_retry):
352 self._log.debug('_dispatch_async')
353 return self._pool.apply_async(func=run_test, args=(self.testdir, self.logdir, test, max_retry), callback=cont)
Roger Meier41ad4342015-03-24 22:30:40 +0100354
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900355 def dispatch(self, test, max_retry):
356 index = self._report.add_test(test)
Roger Meier41ad4342015-03-24 22:30:40 +0100357
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900358 def cont(result):
359 if not self._stop.is_set():
Nobuaki Sukegawa59310f52016-02-18 01:41:46 +0900360 if result and len(result) == 2:
361 retry_count, returncode = result
362 else:
363 retry_count = 0
364 returncode = RESULT_ERROR
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900365 self._log.debug('freeing port')
366 self._log.debug('adding result')
367 self._report.add_result(index, returncode, returncode == RESULT_TIMEOUT, retry_count)
368 self._log.debug('finish continuation')
369 fn = self._dispatch_async if self._async else self._dispatch_sync
370 return fn(test, cont, max_retry)
Roger Meier41ad4342015-03-24 22:30:40 +0100371
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900372 def wait(self):
373 if self._async:
374 self._pool.close()
375 self._pool.join()
376 self._m.shutdown()
377 return self._report.end()
Roger Meier41ad4342015-03-24 22:30:40 +0100378
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900379 def terminate(self):
380 self._stop.set()
381 if self._async:
382 self._pool.terminate()
383 self._pool.join()
384 self._m.shutdown()