fixes, fixes, fixes

commit: 46d4f39135afe25c5784f92986ece253cb7c8811 [log] [tgz]
author: koder aka kdanilov <kdanilov@mirantis.com> Fri Apr 24 11:35:00 2015 +0300
committer: koder aka kdanilov <kdanilov@mirantis.com> Fri Apr 24 11:35:00 2015 +0300
tree: 6b541a97bd4234a7d6dfe817561c73e805c558f1
parent: abd6ead9fc5dde4d3d2ba13133fc67b5e130d9ca [diff]
diff --git a/wally/suits/io/agent.py b/wally/suits/io/agent.py
index 5fb835e..336b176 100644
--- a/wally/suits/io/agent.py
+++ b/wally/suits/io/agent.py

@@ -428,52 +428,41 @@
 
 def run_fio(sliced_it, raw_results_func=None):
     sliced_list = list(sliced_it)
-    ok = True
 
-    try:
-        curr_test_num = 0
-        executed_tests = 0
-        result = {}
+    curr_test_num = 0
+    executed_tests = 0
+    result = {}
 
-        for i, test_slice in enumerate(sliced_list):
-            res_cfg_it = do_run_fio(test_slice)
-            res_cfg_it = enumerate(res_cfg_it, curr_test_num)
+    for i, test_slice in enumerate(sliced_list):
+        res_cfg_it = do_run_fio(test_slice)
+        res_cfg_it = enumerate(res_cfg_it, curr_test_num)
 
-            for curr_test_num, (job_output, section) in res_cfg_it:
-                executed_tests += 1
+        for curr_test_num, (job_output, section) in res_cfg_it:
+            executed_tests += 1
 
-                if raw_results_func is not None:
-                    raw_results_func(executed_tests,
-                                     [job_output, section])
+            if raw_results_func is not None:
+                raw_results_func(executed_tests,
+                                 [job_output, section])
 
-                msg = "{0} != {1}".format(section.name, job_output["jobname"])
-                assert section.name == job_output["jobname"], msg
+            msg = "{0} != {1}".format(section.name, job_output["jobname"])
+            assert section.name == job_output["jobname"], msg
 
-                if section.name.startswith('_'):
-                    continue
+            if section.name.startswith('_'):
+                continue
 
-                add_job_results(section, job_output, result)
+            add_job_results(section, job_output, result)
 
-            curr_test_num += 1
-            msg_template = "Done {0} tests from {1}. ETA: {2}"
+        curr_test_num += 1
+        msg_template = "Done {0} tests from {1}. ETA: {2}"
 
-            rest = sliced_list[i:]
-            time_eta = sum(map(calculate_execution_time, rest))
-            test_left = sum(map(len, rest))
-            print msg_template.format(curr_test_num,
-                                      test_left,
-                                      sec_to_str(time_eta))
+        rest = sliced_list[i:]
+        time_eta = sum(map(calculate_execution_time, rest))
+        test_left = sum(map(len, rest))
+        print msg_template.format(curr_test_num,
+                                  test_left,
+                                  sec_to_str(time_eta))
 
-    except (SystemExit, KeyboardInterrupt):
-        raise
-
-    except Exception:
-        print "=========== ERROR ============="
-        traceback.print_exc()
-        print "======== END OF ERROR ========="
-        ok = False
-
-    return result, executed_tests, ok
+    return result, executed_tests
 
 
 def run_benchmark(binary_tp, *argv, **kwargs):
@@ -603,8 +592,8 @@
         rrfunc = raw_res_func if argv_obj.show_raw_results else None
 
         stime = time.time()
-        job_res, num_tests, ok = run_benchmark(argv_obj.type,
-                                               sliced_it, rrfunc)
+        job_res, num_tests = run_benchmark(argv_obj.type,
+                                           sliced_it, rrfunc)
         etime = time.time()
 
         res = {'__meta__': {'raw_cfg': job_cfg, 'params': params},
@@ -622,8 +611,21 @@
             out_fd.write(pprint.pformat(res) + "\n")
         out_fd.write("\n========= END OF RESULTS =========\n")
 
-        return 0 if ok else 1
+        return 0
+    except:
+        out_fd.write("============ ERROR =============\n")
+        out_fd.write(traceback.format_exc() + "\n")
+        out_fd.write("============ END OF ERROR =============\n")
+        return 1
     finally:
+        try:
+            if out_fd is not sys.stdout:
+                out_fd.flush()
+                os.fsync(out_fd)
+                out_fd.close()
+        except Exception:
+            traceback.print_exc()
+
         if argv_obj.pid_file is not None:
             if os.path.exists(argv_obj.pid_file):
                 os.unlink(argv_obj.pid_file)

diff --git a/wally/suits/io/results_loader.py b/wally/suits/io/results_loader.py
index 9005450..3c8d9c5 100644
--- a/wally/suits/io/results_loader.py
+++ b/wally/suits/io/results_loader.py

@@ -29,6 +29,14 @@
 
 
 def parse_output(out_err):
+    err_start_patt = r"(?ims)=+\s+ERROR\s+=+"
+    err_end_patt = r"(?ims)=+\s+END OF ERROR\s+=+"
+
+    for block in re.split(err_start_patt, out_err)[1:]:
+        tb, garbage = re.split(err_end_patt, block)
+        msg = "Test fails with error:\n" + tb.strip() + "\n"
+        raise OSError(msg)
+
     start_patt = r"(?ims)=+\s+RESULTS\(format=json\)\s+=+"
     end_patt = r"(?ims)=+\s+END OF RESULTS\s+=+"
 

diff --git a/wally/suits/itest.py b/wally/suits/itest.py
index 294909b..ad5e8a5 100644
--- a/wally/suits/itest.py
+++ b/wally/suits/itest.py

@@ -120,6 +120,8 @@
 
 
 class IOPerfTest(IPerfTest):
+    tcp_conn_timeout = 30
+    max_pig_timeout = 30
 
     def __init__(self, *dt, **mp):
         IPerfTest.__init__(self, *dt, **mp)
@@ -236,6 +238,67 @@
         elif self.is_primary:
             logger.warning("Prefilling of test files is disabled")
 
+    def get_test_status(self):
+        is_connected = None
+        has_pid_file = None
+        pid = None
+        err = None
+
+        try:
+            conn = connect(self.node.conn_url,
+                           conn_timeout=self.tcp_conn_timeout)
+            with conn:
+                with conn.open_sftp() as sftp:
+                    try:
+                        pid = read_from_remote(sftp, self.pid_file)
+                        has_pid_file = True
+                    except (NameError, IOError) as exc:
+                        pid = None
+                        has_pid_file = False
+
+            is_connected = True
+
+        except (socket.error, SSHException, EOFError) as exc:
+            err = str(exc)
+            is_connected = False
+
+        return is_connected, has_pid_file, pid, err
+
+    def wait_till_finished(self, timeout):
+        conn_id = self.node.get_conn_id()
+        end_of_wait_time = timeout + time.time()
+
+        # time_till_check = random.randint(30, 90)
+        time_till_check = 5
+        pid = None
+        has_pid_file = False
+        pid_get_timeout = self.max_pig_timeout + time.time()
+        curr_connected = True
+
+        while end_of_wait_time > time.time():
+            time.sleep(time_till_check)
+
+            is_connected, has_pid_file, pid, err = self.get_test_status()
+
+            if not has_pid_file:
+                if pid is None and time.time() > pid_get_timeout:
+                    msg = ("On node {0} pid file doesn't " +
+                           "appears in time")
+                    logger.error(msg.format(conn_id))
+                    raise RuntimeError("Start timeout")
+                else:
+                    # execution finished
+                    break
+
+            if is_connected and not curr_connected:
+                msg = "Connection with {0} is restored"
+                logger.debug(msg.format(conn_id))
+            elif not is_connected and curr_connected:
+                msg = "Lost connection with " + conn_id + ". Error: " + err
+                logger.debug(msg)
+
+            curr_connected = is_connected
+
     def run(self, barrier):
         conn_id = self.node.get_conn_id()
 
@@ -286,80 +349,26 @@
                                          wait_till.strftime("%H:%M:%S")))
 
             self.run_over_ssh(cmd)
+
             msg = "Test on node {0} started in screen {1}"
             logger.debug(msg.format(conn_id, screen_name))
 
-            end_of_wait_time = timeout + time.time()
-
-            # time_till_check = random.randint(30, 90)
-            time_till_check = 5
-
-            pid = None
-            no_pid_file = True
-            tcp_conn_timeout = 30
-            pid_get_timeout = 30 + time.time()
-            connection_ok = True
-
             # TODO: add monitoring socket
             if self.node.connection is not Local:
                 self.node.connection.close()
 
-            while end_of_wait_time > time.time():
-                conn = None
-                time.sleep(time_till_check)
-
-                try:
-                    if self.node.connection is not Local:
-                        conn = connect(self.node.conn_url,
-                                       conn_timeout=tcp_conn_timeout)
-                    else:
-                        conn = self.node.connection
-
-                    try:
-                        with conn.open_sftp() as sftp:
-                            try:
-                                pid = read_from_remote(sftp, self.pid_file)
-                                no_pid_file = False
-                            except (NameError, IOError):
-                                no_pid_file = True
-                    finally:
-                        if conn is not Local:
-                            conn.close()
-                            conn = None
-
-                    if no_pid_file:
-                        if pid is None:
-                            if time.time() > pid_get_timeout:
-                                msg = ("On node {0} pid file doesn't " +
-                                       "appears in time")
-                                logger.error(msg.format(conn_id))
-                                raise RuntimeError("Start timeout")
-                        else:
-                            # execution finished
-                            break
-                    if not connection_ok:
-                        msg = "Connection with {0} is restored"
-                        logger.debug(msg.format(conn_id))
-                        connection_ok = True
-
-                except (socket.error, SSHException, EOFError) as exc:
-                    if connection_ok:
-                        connection_ok = False
-                        msg = "Lost connection with " + conn_id
-                        msg += ". Error: " + str(exc)
-                        logger.debug(msg)
-
+            self.wait_till_finished(timeout)
             logger.debug("Done")
 
             if self.node.connection is not Local:
-                timeout = tcp_conn_timeout * 3
+                conn_timeout = self.tcp_conn_timeout * 3
                 self.node.connection = connect(self.node.conn_url,
-                                               conn_timeout=timeout)
+                                               conn_timeout=conn_timeout)
 
             with self.node.connection.open_sftp() as sftp:
                 # try to reboot and then connect
-                out_err = read_from_remote(sftp,
-                                           self.log_fl)
+                out_err = read_from_remote(sftp, self.log_fl)
+
         finally:
             barrier.exit()
 
@@ -369,6 +378,8 @@
         try:
             for data in parse_output(out_err):
                 self.on_result_cb(data)
+        except OSError:
+            raise
         except Exception as exc:
             msg_templ = "Error during postprocessing results: {0!s}"
             raise RuntimeError(msg_templ.format(exc))
commit	46d4f39135afe25c5784f92986ece253cb7c8811	[log] [tgz]
author	koder aka kdanilov <kdanilov@mirantis.com>	Fri Apr 24 11:35:00 2015 +0300
committer	koder aka kdanilov <kdanilov@mirantis.com>	Fri Apr 24 11:35:00 2015 +0300
tree	6b541a97bd4234a7d6dfe817561c73e805c558f1
parent	abd6ead9fc5dde4d3d2ba13133fc67b5e130d9ca [diff]