Check VM's console log before trying to SSH to it.
Due to issue described in related bug report, it seems that it may
happen sometimes that tempest will start trying to ssh to the instance
before ssh key will be really configured in the instance and in such
case it may happened that there will be AuthenticationFailure error
always there. Even if ssh key will be configured properly later during
the test.
To workaround that issue and avoid failures of tests, this patch adds
check if the vm is really booted and ready to ssh. It is done by
checking console log of the VM and looking for specific string "login:"
which appears at least in case of Cirros and Ubuntu images used in our
CI jobs.
In case when such string will not be found, test will continue to run
and will still try to ssh to the instance. So in worst case it may slow
ssh to the instance a bit but shouldn't really have any bad impact on
test as before this patch it would probably also wait similar amount of
time but on trying to SSH to the instance.
Similar workaround was introduced in the neutron-tempest-plugin with
[1]. After it was merged number of failed jobs due to ssh "Authentication
failure" issues lower down significantly.
[1] https://review.opendev.org/#/c/758968/
Change-Id: I84a7c2fc389d181388ef821848ffad4bc67f1213
Related-Bug: #1892861
diff --git a/tempest/common/compute.py b/tempest/common/compute.py
index edb9d16..da3a4a9 100644
--- a/tempest/common/compute.py
+++ b/tempest/common/compute.py
@@ -64,7 +64,7 @@
def create_test_server(clients, validatable=False, validation_resources=None,
tenant_network=None, wait_until=None,
volume_backed=False, name=None, flavor=None,
- image_id=None, **kwargs):
+ image_id=None, wait_for_sshable=True, **kwargs):
"""Common wrapper utility returning a test server.
This method is a common wrapper returning a test server that can be
@@ -100,6 +100,8 @@
CONF.compute.flavor_ref will be used instead.
:param image_id: ID of the image to be used to provision the server. If not
defined, CONF.compute.image_ref will be used instead.
+ :param wait_for_sshable: Check server's console log and wait until it will
+ be ready to login.
:returns: a tuple
"""
@@ -270,6 +272,10 @@
LOG.exception('Server %s failed to delete in time',
server['id'])
+ if (validatable and CONF.compute_feature_enabled.console_output and
+ wait_for_sshable):
+ waiters.wait_for_guest_os_boot(clients.servers_client, server['id'])
+
return body, servers
diff --git a/tempest/common/waiters.py b/tempest/common/waiters.py
index 789daaf..17796df 100644
--- a/tempest/common/waiters.py
+++ b/tempest/common/waiters.py
@@ -437,3 +437,20 @@
'the required time (%s s)' % (port_id, server_id,
client.build_timeout))
raise lib_exc.TimeoutException(message)
+
+
+def wait_for_guest_os_boot(client, server_id):
+ start_time = int(time.time())
+ while True:
+ console_output = client.get_console_output(server_id)['output']
+ for line in console_output.split('\n'):
+ if 'login:' in line.lower():
+ return
+ if int(time.time()) - start_time >= client.build_timeout:
+ LOG.info("Guest OS on server %s probably isn't ready or its "
+ "console log can't be parsed properly. If guest OS "
+ "isn't ready, that may cause problems with SSH to "
+ "the server.",
+ server_id)
+ return
+ time.sleep(client.build_interval)
diff --git a/tempest/tests/common/test_waiters.py b/tempest/tests/common/test_waiters.py
index 73924bd..f03c7cc 100755
--- a/tempest/tests/common/test_waiters.py
+++ b/tempest/tests/common/test_waiters.py
@@ -131,6 +131,36 @@
mock.call('server_id')])
sleep.assert_called_once_with(client.build_interval)
+ def test_wait_for_guest_os_boot(self):
+ get_console_output = mock.Mock(
+ side_effect=[
+ {'output': 'os not ready yet\n'},
+ {'output': 'login:\n'}
+ ])
+ client = self.mock_client(get_console_output=get_console_output)
+ self.patch('time.time', return_value=0.)
+ sleep = self.patch('time.sleep')
+
+ with mock.patch.object(waiters.LOG, "info") as log_info:
+ waiters.wait_for_guest_os_boot(client, 'server_id')
+
+ get_console_output.assert_has_calls([
+ mock.call('server_id'), mock.call('server_id')])
+ sleep.assert_called_once_with(client.build_interval)
+ log_info.assert_not_called()
+
+ def test_wait_for_guest_os_boot_timeout(self):
+ get_console_output = mock.Mock(
+ return_value={'output': 'os not ready yet\n'})
+ client = self.mock_client(get_console_output=get_console_output)
+ self.patch('time.time', side_effect=[0., client.build_timeout + 1.])
+ self.patch('time.sleep')
+
+ with mock.patch.object(waiters.LOG, "info") as log_info:
+ waiters.wait_for_guest_os_boot(client, 'server_id')
+
+ log_info.assert_called_once()
+
class TestVolumeWaiters(base.TestCase):
vol_migrating_src_host = {