workaround: Wait for instance.host=None when shelving
There is a race in Nova where an instance being shelved is set to the
SHELVED_OFFLOADED vm_state before the instance.host is cleared. It
appears to be non-trivial to fix given the comments in that code
explictly say the updates have the be in that order.
This works around the issue by waiting until the instance.host is None
before considering the shelve offload to be completed.
Related-Bug: #2045785
Related-Issue: PRODX-52273 PRODX-51683 PRODX-54963
Supercede Ia45247a7aa14eb5f0038d4512a0b4ebe6af5a573
Change-Id: Ic0177a46192810a21be0d02a221c83df28ec16fe
diff --git a/tempest/api/compute/admin/test_servers_on_multinodes.py b/tempest/api/compute/admin/test_servers_on_multinodes.py
index 591abbc..efea266 100644
--- a/tempest/api/compute/admin/test_servers_on_multinodes.py
+++ b/tempest/api/compute/admin/test_servers_on_multinodes.py
@@ -155,17 +155,10 @@
"Less than 2 compute nodes, skipping multi-nodes test.")
def _shelve_offload_then_unshelve_to_host(self, server, host):
- compute.shelve_server(self.servers_client, server['id'],
- force_shelve_offload=True)
-
- # Work around https://bugs.launchpad.net/nova/+bug/2045785
- # This can be removed when ^ is fixed.
- def _check_server_host_is_none():
- server_details = self.os_admin.servers_client.show_server(
- server['id'])
- self.assertIsNone(server_details['server']['OS-EXT-SRV-ATTR:host'])
-
- self.wait_for(_check_server_host_is_none)
+ compute.shelve_server(self.servers_client,
+ server['id'],
+ force_shelve_offload=True,
+ admin_clients=self.os_admin.servers_client)
self.os_admin.servers_client.unshelve_server(
server['id'],
diff --git a/tempest/api/compute/base.py b/tempest/api/compute/base.py
index 313f73d..0f2cdd9 100644
--- a/tempest/api/compute/base.py
+++ b/tempest/api/compute/base.py
@@ -13,8 +13,6 @@
# License for the specific language governing permissions and limitations
# under the License.
-import time
-
from oslo_log import log as logging
from tempest.common import compute
@@ -302,21 +300,6 @@
body['id'])
return body
- def wait_for(self, condition, *args):
- """Repeatedly calls condition() until a timeout."""
- start_time = int(time.time())
- while True:
- try:
- condition(*args)
- except Exception:
- pass
- else:
- return
- if int(time.time()) - start_time >= self.build_timeout:
- condition(*args)
- return
- time.sleep(self.build_interval)
-
@classmethod
def prepare_instance_network(cls):
if (CONF.validation.auth_method != 'disabled' and
diff --git a/tempest/api/compute/servers/test_delete_server.py b/tempest/api/compute/servers/test_delete_server.py
index 178be16..6a50856 100644
--- a/tempest/api/compute/servers/test_delete_server.py
+++ b/tempest/api/compute/servers/test_delete_server.py
@@ -28,6 +28,7 @@
class DeleteServersTestJSON(base.BaseV2ComputeTest):
"""Test deleting servers in various states"""
create_default_network = True
+ credentials = ['primary', 'admin']
if CONF.compute_feature_enabled.volume_multiattach:
min_microversion = '2.60'
@@ -39,6 +40,7 @@
def setup_clients(cls):
super(DeleteServersTestJSON, cls).setup_clients()
cls.client = cls.servers_client
+ cls.admin_servers_client = cls.os_admin.servers_client
@decorators.idempotent_id('9e6e0c87-3352-42f7-9faf-5d6210dbd159')
def test_delete_server_while_in_building_state(self):
@@ -91,7 +93,8 @@
def test_delete_server_while_in_shelved_state(self):
"""Test deleting a server while it's VM state is Shelved"""
server = self.create_test_server(wait_until='ACTIVE')
- compute.shelve_server(self.client, server['id'])
+ compute.shelve_server(self.client, server['id'],
+ admin_clients=self.admin_servers_client)
self.client.delete_server(server['id'])
waiters.wait_for_server_termination(self.client, server['id'])
diff --git a/tempest/api/compute/servers/test_server_actions.py b/tempest/api/compute/servers/test_server_actions.py
index 47f97b0..fcd4493 100644
--- a/tempest/api/compute/servers/test_server_actions.py
+++ b/tempest/api/compute/servers/test_server_actions.py
@@ -37,6 +37,8 @@
class ServerActionsBase(base.BaseV2ComputeTest):
"""Test server actions"""
+ credentials = ['primary', 'admin']
+
image_id = None
if CONF.compute_feature_enabled.volume_multiattach:
min_microversion = '2.60'
@@ -64,6 +66,7 @@
def setup_clients(cls):
super(ServerActionsBase, cls).setup_clients()
cls.client = cls.servers_client
+ cls.admin_servers_client = cls.os_admin.servers_client
def _test_reboot_server(self, server_id, reboot_type):
if CONF.validation.run_validation:
@@ -112,7 +115,7 @@
# may be assigned a bit later.
# as we compare network addresses before and after rebuild,
# need to ensure we remember the floating one too.
- self.wait_for(floating_ip_ready)
+ waiters.wait_for(floating_ip_ready)
# Get the IPs the server has before rebuilding it
original_addresses = (self.client.show_server(server_id)['server']
@@ -287,7 +290,7 @@
# "console-log" API.
# The detail is https://bugs.launchpad.net/nova/+bug/1251920
self.reboot_server(self.server_id, type='HARD')
- self.wait_for(self._get_output, self.server_id)
+ waiters.wait_for(self._get_output, self.server_id)
@decorators.idempotent_id('bd61a9fd-062f-4670-972b-2d6c3e3b9e73')
@testtools.skipUnless(CONF.compute_feature_enabled.pause,
@@ -625,7 +628,7 @@
self.assertGreater(lines, 3, "Cannot get enough console log "
"length. (lines: %s)" % lines)
- self.wait_for(_check_full_length_console_log)
+ waiters.wait_for(_check_full_length_console_log)
@decorators.skip_because(bug='2028851')
@decorators.idempotent_id('5b65d4e7-4ecd-437c-83c0-d6b79d927568')
@@ -640,7 +643,7 @@
self.client.stop_server(self.server_id)
waiters.wait_for_server_status(self.client, self.server_id, 'SHUTOFF')
- self.wait_for(self._get_output, self.server_id)
+ waiters.wait_for(self._get_output, self.server_id)
@decorators.idempotent_id('8cf9f450-a871-42cf-9bef-77eba189c0b0')
@decorators.related_bug('1745529')
@@ -654,7 +657,8 @@
waiters.wait_for_server_status(self.client, self.server_id, 'PAUSED')
# Check if Shelve operation is successful on paused server.
compute.shelve_server(self.client, self.server_id,
- force_shelve_offload=True)
+ force_shelve_offload=True,
+ admin_clients=self.admin_servers_client)
@decorators.idempotent_id('c6bc11bf-592e-4015-9319-1c98dc64daf5')
@testtools.skipUnless(CONF.compute_feature_enabled.vnc_console,
@@ -707,7 +711,8 @@
raise lib_exc.InvalidConfiguration(
'api_v2 must be True in [image-feature-enabled].')
compute.shelve_server(self.client, self.server_id,
- force_shelve_offload=True)
+ force_shelve_offload=True,
+ admin_clients=self.admin_servers_client)
server = self.client.show_server(self.server_id)['server']
image_name = server['name'] + '-shelved'
diff --git a/tempest/api/compute/servers/test_servers_negative.py b/tempest/api/compute/servers/test_servers_negative.py
index 23568aa..18f76bb 100644
--- a/tempest/api/compute/servers/test_servers_negative.py
+++ b/tempest/api/compute/servers/test_servers_negative.py
@@ -33,6 +33,7 @@
"""Negative tests of servers"""
create_default_network = True
+ credentials = ['primary', 'admin']
def setUp(self):
super(ServersNegativeTestJSON, self).setUp()
@@ -53,6 +54,7 @@
def setup_clients(cls):
super(ServersNegativeTestJSON, cls).setup_clients()
cls.client = cls.servers_client
+ cls.admin_servers_client = cls.os_admin.servers_client
@classmethod
def resource_setup(cls):
@@ -503,7 +505,8 @@
@decorators.attr(type=['negative'])
def test_shelve_shelved_server(self):
"""Shelving a shelved server should fail"""
- compute.shelve_server(self.client, self.server_id)
+ compute.shelve_server(self.client, self.server_id,
+ admin_clients=self.admin_servers_client)
def _unshelve_server():
server_info = self.client.show_server(self.server_id)['server']
diff --git a/tempest/common/compute.py b/tempest/common/compute.py
index a8aafe9..6c1c942 100644
--- a/tempest/common/compute.py
+++ b/tempest/common/compute.py
@@ -365,7 +365,8 @@
return body, created_servers
-def shelve_server(servers_client, server_id, force_shelve_offload=False):
+def shelve_server(servers_client, server_id, force_shelve_offload=False,
+ admin_clients=None):
"""Common wrapper utility to shelve server.
This method is a common wrapper to make server in 'SHELVED'
@@ -376,23 +377,39 @@
:param force_shelve_offload: Forcefully offload shelve server if it
is configured not to offload server
automatically after offload time.
+ :param admin_clients: Compute servers admin client to wait for
+ proper SHELVED_OFFLOADED state.
"""
+ offload_time = CONF.compute.shelved_offload_time
+ if (force_shelve_offload or offload_time >= 0) and admin_clients is None:
+ raise ValueError("Need admin client to wait for "
+ "SHELVED_OFFLOADED state")
+
body = servers_client.shelve_server(server_id)
request_id = body.response['x-openstack-request-id']
offload_time = CONF.compute.shelved_offload_time
- if offload_time >= 0:
- waiters.wait_for_server_status(servers_client, server_id,
- 'SHELVED_OFFLOADED',
- extra_timeout=offload_time,
- request_id=request_id)
- else:
+ if offload_time < 0:
waiters.wait_for_server_status(servers_client, server_id, 'SHELVED')
- if force_shelve_offload:
- servers_client.shelve_offload_server(server_id)
- waiters.wait_for_server_status(servers_client, server_id,
- 'SHELVED_OFFLOADED',
- request_id=request_id)
+
+ if not force_shelve_offload:
+ return
+ servers_client.shelve_offload_server(server_id)
+
+ waiters.wait_for_server_status(
+ servers_client, server_id,
+ 'SHELVED_OFFLOADED',
+ extra_timeout=0 if offload_time < 0 else offload_time,
+ request_id=request_id)
+
+ # Work around https://bugs.launchpad.net/nova/+bug/2045785
+ # This can be removed when ^ is fixed.
+ def _check_server_host_is_none():
+ server_details = admin_clients.show_server(server_id)
+ host = server_details['server']['OS-EXT-SRV-ATTR:host']
+ assert host is None
+
+ waiters.wait_for(_check_server_host_is_none)
def create_websocket(url):
diff --git a/tempest/common/waiters.py b/tempest/common/waiters.py
index a2f65cc..660a427 100644
--- a/tempest/common/waiters.py
+++ b/tempest/common/waiters.py
@@ -741,3 +741,19 @@
return len(ports) == number
test_utils.call_until_true(
_wait_ports, delay, interval, network_id, number)
+
+
+def wait_for(condition, *args):
+ """Repeatedly calls condition() until a timeout."""
+ start_time = int(time.time())
+ while True:
+ try:
+ condition(*args)
+ except Exception:
+ pass
+ else:
+ return
+ if int(time.time()) - start_time >= CONF.compute.build_timeout:
+ condition(*args)
+ return
+ time.sleep(CONF.compute.build_interval)
diff --git a/tempest/scenario/test_shelve_instance.py b/tempest/scenario/test_shelve_instance.py
index 3ba301a..f740937 100644
--- a/tempest/scenario/test_shelve_instance.py
+++ b/tempest/scenario/test_shelve_instance.py
@@ -52,8 +52,10 @@
raise cls.skipException("Shelve is not available.")
def _shelve_then_unshelve_server(self, server):
- compute.shelve_server(self.servers_client, server['id'],
- force_shelve_offload=True)
+ compute.shelve_server(self.servers_client,
+ server['id'],
+ force_shelve_offload=True,
+ admin_clients=self.admin_servers_client)
self.servers_client.unshelve_server(server['id'])
waiters.wait_for_server_status(self.servers_client, server['id'],