Check VM's console log before trying to SSH to it. Due to issue described in related bug report, it seems that it may happen sometimes that tempest will start trying to ssh to the instance before ssh key will be really configured in the instance and in such case it may happened that there will be AuthenticationFailure error always there. Even if ssh key will be configured properly later during the test. To workaround that issue and avoid failures of tests, this patch adds check if the vm is really booted and ready to ssh. It is done by checking console log of the VM and looking for specific string "login:" which appears at least in case of Cirros and Ubuntu images used in our CI jobs. In case when such string will not be found, test will continue to run and will still try to ssh to the instance. So in worst case it may slow ssh to the instance a bit but shouldn't really have any bad impact on test as before this patch it would probably also wait similar amount of time but on trying to SSH to the instance. Similar workaround was introduced in the neutron-tempest-plugin with [1]. After it was merged number of failed jobs due to ssh "Authentication failure" issues lower down significantly. [1] https://review.opendev.org/#/c/758968/ Change-Id: I84a7c2fc389d181388ef821848ffad4bc67f1213 Related-Bug: #1892861

commit: e3405ba808f97eae57f3a60991000afaa34cbe89 [log] [tgz]
author: Slawek Kaplonski <skaplons@redhat.com> Mon Nov 09 17:24:13 2020 +0100
committer: Slawek Kaplonski <skaplons@redhat.com> Mon Nov 16 15:29:07 2020 +0100
tree: 15bc2e68b5c1a5085b01848ae3176efda84dcf78
parent: ae784996c9cf6cc7de90d20188ea5bc6542c209d [diff]
diff --git a/tempest/common/compute.py b/tempest/common/compute.py
index edb9d16..da3a4a9 100644
--- a/tempest/common/compute.py
+++ b/tempest/common/compute.py

@@ -64,7 +64,7 @@
 def create_test_server(clients, validatable=False, validation_resources=None,
                        tenant_network=None, wait_until=None,
                        volume_backed=False, name=None, flavor=None,
-                       image_id=None, **kwargs):
+                       image_id=None, wait_for_sshable=True, **kwargs):
     """Common wrapper utility returning a test server.
 
     This method is a common wrapper returning a test server that can be
@@ -100,6 +100,8 @@
         CONF.compute.flavor_ref will be used instead.
     :param image_id: ID of the image to be used to provision the server. If not
         defined, CONF.compute.image_ref will be used instead.
+    :param wait_for_sshable: Check server's console log and wait until it will
+        be ready to login.
     :returns: a tuple
     """
 
@@ -270,6 +272,10 @@
                             LOG.exception('Server %s failed to delete in time',
                                           server['id'])
 
+    if (validatable and CONF.compute_feature_enabled.console_output and
+            wait_for_sshable):
+        waiters.wait_for_guest_os_boot(clients.servers_client, server['id'])
+
     return body, servers
 
 

diff --git a/tempest/common/waiters.py b/tempest/common/waiters.py
index 789daaf..17796df 100644
--- a/tempest/common/waiters.py
+++ b/tempest/common/waiters.py

@@ -437,3 +437,20 @@
                        'the required time (%s s)' % (port_id, server_id,
                                                      client.build_timeout))
             raise lib_exc.TimeoutException(message)
+
+
+def wait_for_guest_os_boot(client, server_id):
+    start_time = int(time.time())
+    while True:
+        console_output = client.get_console_output(server_id)['output']
+        for line in console_output.split('\n'):
+            if 'login:' in line.lower():
+                return
+        if int(time.time()) - start_time >= client.build_timeout:
+            LOG.info("Guest OS on server %s probably isn't ready or its "
+                     "console log can't be parsed properly. If guest OS "
+                     "isn't ready, that may cause problems with SSH to "
+                     "the server.",
+                     server_id)
+            return
+        time.sleep(client.build_interval)

diff --git a/tempest/tests/common/test_waiters.py b/tempest/tests/common/test_waiters.py
index 73924bd..f03c7cc 100755
--- a/tempest/tests/common/test_waiters.py
+++ b/tempest/tests/common/test_waiters.py

@@ -131,6 +131,36 @@
                                           mock.call('server_id')])
         sleep.assert_called_once_with(client.build_interval)
 
+    def test_wait_for_guest_os_boot(self):
+        get_console_output = mock.Mock(
+            side_effect=[
+                {'output': 'os not ready yet\n'},
+                {'output': 'login:\n'}
+            ])
+        client = self.mock_client(get_console_output=get_console_output)
+        self.patch('time.time', return_value=0.)
+        sleep = self.patch('time.sleep')
+
+        with mock.patch.object(waiters.LOG, "info") as log_info:
+            waiters.wait_for_guest_os_boot(client, 'server_id')
+
+        get_console_output.assert_has_calls([
+            mock.call('server_id'), mock.call('server_id')])
+        sleep.assert_called_once_with(client.build_interval)
+        log_info.assert_not_called()
+
+    def test_wait_for_guest_os_boot_timeout(self):
+        get_console_output = mock.Mock(
+            return_value={'output': 'os not ready yet\n'})
+        client = self.mock_client(get_console_output=get_console_output)
+        self.patch('time.time', side_effect=[0., client.build_timeout + 1.])
+        self.patch('time.sleep')
+
+        with mock.patch.object(waiters.LOG, "info") as log_info:
+            waiters.wait_for_guest_os_boot(client, 'server_id')
+
+        log_info.assert_called_once()
+
 
 class TestVolumeWaiters(base.TestCase):
     vol_migrating_src_host = {
commit	e3405ba808f97eae57f3a60991000afaa34cbe89	[log] [tgz]
author	Slawek Kaplonski <skaplons@redhat.com>	Mon Nov 09 17:24:13 2020 +0100
committer	Slawek Kaplonski <skaplons@redhat.com>	Mon Nov 16 15:29:07 2020 +0100
tree	15bc2e68b5c1a5085b01848ae3176efda84dcf78
parent	ae784996c9cf6cc7de90d20188ea5bc6542c209d [diff]