Validate network downtime during live migration
This patch adds the ability to measure network downtime during live
migration process.
A fixture has been added to start and stop a background pinger
process and also read status from it.
The downtime measurement has 0.2 seconds granularity.
In order to reduce overall traffic the ping payload size set to
minimal value.
Change-Id: I83c6a5d49f5d4da05deb677907e5048ecdd2242b
diff --git a/releasenotes/notes/measure-downtime-during-live-migration-5e8305be270de680.yaml b/releasenotes/notes/measure-downtime-during-live-migration-5e8305be270de680.yaml
new file mode 100644
index 0000000..9f4abd1
--- /dev/null
+++ b/releasenotes/notes/measure-downtime-during-live-migration-5e8305be270de680.yaml
@@ -0,0 +1,9 @@
+---
+features:
+ - |
+ Added new module net_downtime including the fixture NetDowntimeMeter that
+ can be used to measure how long the connectivity with an IP is lost
+ during certain operations like a server live migration.
+ The configuration option allowed_network_downtime has been added with a
+ default value of 5.0 seconds, which would be the maximum time that
+ the connectivity downtime is expected to last.
diff --git a/tempest/common/utils/net_downtime.py b/tempest/common/utils/net_downtime.py
new file mode 100644
index 0000000..9675ec8
--- /dev/null
+++ b/tempest/common/utils/net_downtime.py
@@ -0,0 +1,63 @@
+# Copyright 2022 OpenStack Foundation
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import signal
+import subprocess
+
+import fixtures
+
+from oslo_log import log
+
+
+LOG = log.getLogger(__name__)
+
+
+class NetDowntimeMeter(fixtures.Fixture):
+ def __init__(self, dest_ip, interval='0.2'):
+ self.dest_ip = dest_ip
+ # Note: for intervals lower than 0.2 ping requires root privileges
+ self.interval = interval
+ self.ping_process = None
+
+ def _setUp(self):
+ self.start_background_pinger()
+
+ def start_background_pinger(self):
+ cmd = ['ping', '-q', '-s1']
+ cmd.append('-i{}'.format(self.interval))
+ cmd.append(self.dest_ip)
+ LOG.debug("Starting background pinger to '{}' with interval {}".format(
+ self.dest_ip, self.interval))
+ self.ping_process = subprocess.Popen(
+ cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ self.addCleanup(self.cleanup)
+
+ def cleanup(self):
+ if self.ping_process and self.ping_process.poll() is None:
+ LOG.debug('Terminating background pinger with pid {}'.format(
+ self.ping_process.pid))
+ self.ping_process.terminate()
+ self.ping_process = None
+
+ def get_downtime(self):
+ self.ping_process.send_signal(signal.SIGQUIT)
+ # Example of the expected output:
+ # 264/274 packets, 3% loss
+ output = self.ping_process.stderr.readline().strip().decode('utf-8')
+ if output and len(output.split()[0].split('/')) == 2:
+ succ, total = output.split()[0].split('/')
+ return (int(total) - int(succ)) * float(self.interval)
+ else:
+ LOG.warning('Unexpected output obtained from the pinger: %s',
+ output)
diff --git a/tempest/config.py b/tempest/config.py
index ebde421..4098f32 100644
--- a/tempest/config.py
+++ b/tempest/config.py
@@ -965,6 +965,12 @@
default='ecdsa',
help='Type of key to use for ssh connections. '
'Valid types are rsa, ecdsa'),
+ cfg.IntOpt('allowed_network_downtime',
+ default=5.0,
+ help="Allowed VM network connection downtime during live "
+ "migration, in seconds. "
+ "When the measured downtime exceeds this value, an "
+ "exception is raised."),
]
volume_group = cfg.OptGroup(name='volume',
diff --git a/tempest/scenario/test_network_advanced_server_ops.py b/tempest/scenario/test_network_advanced_server_ops.py
index b48ac3c..1c00212 100644
--- a/tempest/scenario/test_network_advanced_server_ops.py
+++ b/tempest/scenario/test_network_advanced_server_ops.py
@@ -15,7 +15,9 @@
import testtools
+from oslo_log import log
from tempest.common import utils
+from tempest.common.utils import net_downtime
from tempest.common import waiters
from tempest import config
from tempest.lib import decorators
@@ -23,6 +25,8 @@
CONF = config.CONF
+LOG = log.getLogger(__name__)
+
class TestNetworkAdvancedServerOps(manager.NetworkScenarioTest):
"""Check VM connectivity after some advanced instance operations executed:
@@ -252,6 +256,11 @@
block_migration = (CONF.compute_feature_enabled.
block_migration_for_live_migration)
old_host = self.get_host_for_server(server['id'])
+
+ downtime_meter = net_downtime.NetDowntimeMeter(
+ floating_ip['floating_ip_address'])
+ self.useFixture(downtime_meter)
+
self.admin_servers_client.live_migrate_server(
server['id'], host=None, block_migration=block_migration,
disk_over_commit=False)
@@ -261,6 +270,16 @@
new_host = self.get_host_for_server(server['id'])
self.assertNotEqual(old_host, new_host, 'Server did not migrate')
+ downtime = downtime_meter.get_downtime()
+ self.assertIsNotNone(downtime)
+ LOG.debug("Downtime seconds measured with downtime_meter = %r",
+ downtime)
+ allowed_downtime = CONF.validation.allowed_network_downtime
+ self.assertLess(
+ downtime, allowed_downtime,
+ "Downtime of {} seconds is higher than expected '{}'".format(
+ downtime, allowed_downtime))
+
self._wait_server_status_and_check_network_connectivity(
server, keypair, floating_ip)