THRIFT-3642 Speed up cross test runner

This closes #873
diff --git a/test/crossrunner/run.py b/test/crossrunner/run.py
index 18c1623..4b4eb0a 100644
--- a/test/crossrunner/run.py
+++ b/test/crossrunner/run.py
@@ -48,6 +48,7 @@
         self.timer = None
         self.expired = False
         self.killed = False
+        self.proc = None
 
     def _expire(self):
         self._log.info('Timeout')
@@ -123,8 +124,31 @@
 
 
 def run_test(testdir, logdir, test_dict, max_retry, async=True):
+    logger = multiprocessing.get_logger()
+
+    def ensure_socket_open(proc, port, max_delay):
+        sleeped = 0.1
+        time.sleep(sleeped)
+        sock4 = socket.socket()
+        sock6 = socket.socket(family=socket.AF_INET6)
+        sleep_step = 0.2
+        try:
+            while sock4.connect_ex(('127.0.0.1', port)) and sock6.connect_ex(('::1', port)):
+                if proc.poll() is not None:
+                    logger.warn('server process is exited')
+                    return False
+                if sleeped > max_delay:
+                    logger.warn('sleeped for %f seconds but server port is not open' % sleeped)
+                    return False
+                time.sleep(sleep_step)
+                sleeped += sleep_step
+            logger.debug('waited %f sec for server port open' % sleeped)
+            return True
+        finally:
+            sock4.close()
+            sock6.close()
+
     try:
-        logger = multiprocessing.get_logger()
         max_bind_retry = 3
         retry_count = 0
         bind_retry_count = 0
@@ -141,13 +165,18 @@
 
                 logger.debug('Starting server')
                 with sv.start():
-                    if test.delay > 0:
-                        logger.debug('Delaying client for %.2f seconds' % test.delay)
-                        time.sleep(test.delay)
+                    if test.socket in ('domain', 'abstract'):
+                        time.sleep(0.1)
+                    else:
+                        if not ensure_socket_open(sv.proc, port, test.delay):
+                            break
                     connect_retry_count = 0
-                    max_connect_retry = 10
+                    max_connect_retry = 3
                     connect_retry_wait = 0.5
                     while True:
+                        if sv.proc.poll() is not None:
+                            logger.info('not starting client because server process is absent')
+                            break
                         logger.debug('Starting client')
                         cl.start(test.timeout)
                         logger.debug('Waiting client')
@@ -168,27 +197,27 @@
             else:
                 if cl.expired:
                     result = RESULT_TIMEOUT
-                elif not sv.killed and cl.proc.returncode == 0:
-                    # Server should be alive at the end.
-                    result = RESULT_ERROR
                 else:
-                    result = cl.proc.returncode
+                    result = cl.proc.returncode if cl.proc else RESULT_ERROR
+                    if not sv.killed:
+                        # Server died without being killed.
+                        result |= RESULT_ERROR
 
                 if result == 0 or retry_count >= max_retry:
                     return (retry_count, result)
                 else:
                     logger.info('[%s-%s]: test failed, retrying...', test.server.name, test.client.name)
                     retry_count += 1
-    except (KeyboardInterrupt, SystemExit):
-        logger.info('Interrupted execution')
+    except Exception:
+        if not async:
+            raise
+        logger.warn('Error executing [%s]', test.name, exc_info=True)
+        return (retry_count, RESULT_ERROR)
+    except:
+        logger.info('Interrupted execution', exc_info=True)
         if not async:
             raise
         stop.set()
-        return None
-    except:
-        if not async:
-            raise
-        logger.warn('Error executing [%s]', test.name, exc_info=sys.exc_info())
         return (retry_count, RESULT_ERROR)
 
 
@@ -202,7 +231,8 @@
 
     def _get_tcp_port(self):
         sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-        sock.bind(('127.0.0.1', 0))
+        sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+        sock.bind(('', 0))
         port = sock.getsockname()[1]
         self._lock.acquire()
         try:
@@ -322,7 +352,11 @@
 
         def cont(result):
             if not self._stop.is_set():
-                retry_count, returncode = result
+                if result and len(result) == 2:
+                    retry_count, returncode = result
+                else:
+                    retry_count = 0
+                    returncode = RESULT_ERROR
                 self._log.debug('freeing port')
                 self._log.debug('adding result')
                 self._report.add_result(index, returncode, returncode == RESULT_TIMEOUT, retry_count)