Merge "Validate network downtime during live migration"
diff --git a/releasenotes/notes/measure-downtime-during-live-migration-5e8305be270de680.yaml b/releasenotes/notes/measure-downtime-during-live-migration-5e8305be270de680.yaml
new file mode 100644
index 0000000..9f4abd1
--- /dev/null
+++ b/releasenotes/notes/measure-downtime-during-live-migration-5e8305be270de680.yaml
@@ -0,0 +1,9 @@
+---
+features:
+  - |
+    Added new module net_downtime including the fixture NetDowntimeMeter that
+    can be used to measure how long the connectivity with an IP is lost
+    during certain operations like a server live migration.
+    The configuration option allowed_network_downtime has been added with a
+    default value of 5.0 seconds, which would be the maximum time that
+    the connectivity downtime is expected to last.
diff --git a/tempest/common/utils/net_downtime.py b/tempest/common/utils/net_downtime.py
new file mode 100644
index 0000000..9675ec8
--- /dev/null
+++ b/tempest/common/utils/net_downtime.py
@@ -0,0 +1,63 @@
+# Copyright 2022 OpenStack Foundation
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import signal
+import subprocess
+
+import fixtures
+
+from oslo_log import log
+
+
+LOG = log.getLogger(__name__)
+
+
+class NetDowntimeMeter(fixtures.Fixture):
+    def __init__(self, dest_ip, interval='0.2'):
+        self.dest_ip = dest_ip
+        # Note: for intervals lower than 0.2 ping requires root privileges
+        self.interval = interval
+        self.ping_process = None
+
+    def _setUp(self):
+        self.start_background_pinger()
+
+    def start_background_pinger(self):
+        cmd = ['ping', '-q', '-s1']
+        cmd.append('-i{}'.format(self.interval))
+        cmd.append(self.dest_ip)
+        LOG.debug("Starting background pinger to '{}' with interval {}".format(
+            self.dest_ip, self.interval))
+        self.ping_process = subprocess.Popen(
+            cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        self.addCleanup(self.cleanup)
+
+    def cleanup(self):
+        if self.ping_process and self.ping_process.poll() is None:
+            LOG.debug('Terminating background pinger with pid {}'.format(
+                self.ping_process.pid))
+            self.ping_process.terminate()
+        self.ping_process = None
+
+    def get_downtime(self):
+        self.ping_process.send_signal(signal.SIGQUIT)
+        # Example of the expected output:
+        # 264/274 packets, 3% loss
+        output = self.ping_process.stderr.readline().strip().decode('utf-8')
+        if output and len(output.split()[0].split('/')) == 2:
+            succ, total = output.split()[0].split('/')
+            return (int(total) - int(succ)) * float(self.interval)
+        else:
+            LOG.warning('Unexpected output obtained from the pinger: %s',
+                        output)
diff --git a/tempest/config.py b/tempest/config.py
index ebde421..4098f32 100644
--- a/tempest/config.py
+++ b/tempest/config.py
@@ -965,6 +965,12 @@
                default='ecdsa',
                help='Type of key to use for ssh connections. '
                     'Valid types are rsa, ecdsa'),
+    cfg.IntOpt('allowed_network_downtime',
+               default=5.0,
+               help="Allowed VM network connection downtime during live "
+                    "migration, in seconds. "
+                    "When the measured downtime exceeds this value, an "
+                    "exception is raised."),
 ]
 
 volume_group = cfg.OptGroup(name='volume',
diff --git a/tempest/scenario/test_network_advanced_server_ops.py b/tempest/scenario/test_network_advanced_server_ops.py
index b48ac3c..1c00212 100644
--- a/tempest/scenario/test_network_advanced_server_ops.py
+++ b/tempest/scenario/test_network_advanced_server_ops.py
@@ -15,7 +15,9 @@
 
 import testtools
 
+from oslo_log import log
 from tempest.common import utils
+from tempest.common.utils import net_downtime
 from tempest.common import waiters
 from tempest import config
 from tempest.lib import decorators
@@ -23,6 +25,8 @@
 
 CONF = config.CONF
 
+LOG = log.getLogger(__name__)
+
 
 class TestNetworkAdvancedServerOps(manager.NetworkScenarioTest):
     """Check VM connectivity after some advanced instance operations executed:
@@ -252,6 +256,11 @@
         block_migration = (CONF.compute_feature_enabled.
                            block_migration_for_live_migration)
         old_host = self.get_host_for_server(server['id'])
+
+        downtime_meter = net_downtime.NetDowntimeMeter(
+            floating_ip['floating_ip_address'])
+        self.useFixture(downtime_meter)
+
         self.admin_servers_client.live_migrate_server(
             server['id'], host=None, block_migration=block_migration,
             disk_over_commit=False)
@@ -261,6 +270,16 @@
         new_host = self.get_host_for_server(server['id'])
         self.assertNotEqual(old_host, new_host, 'Server did not migrate')
 
+        downtime = downtime_meter.get_downtime()
+        self.assertIsNotNone(downtime)
+        LOG.debug("Downtime seconds measured with downtime_meter = %r",
+                  downtime)
+        allowed_downtime = CONF.validation.allowed_network_downtime
+        self.assertLess(
+            downtime, allowed_downtime,
+            "Downtime of {} seconds is higher than expected '{}'".format(
+                downtime, allowed_downtime))
+
         self._wait_server_status_and_check_network_connectivity(
             server, keypair, floating_ip)