Merge "Add steps to install exim4"

commit: 7fa0276cf1f6e61274e7974f12e2334aeb6fc0ef [log] [tgz]
author: Dennis Dmitriev <dis.xcom@gmail.com> Tue Dec 19 11:49:10 2017 -0500
committer: Gerrit Code Review <support@gerrithub.io> Tue Dec 19 11:49:10 2017 -0500
tree: d1ed2cff05bcbde2cf257d8a968d411cf9a129d2
parent: f15eb8a19b34e1bdb4a1ef26b7c03b4e6d55c84b [diff]
parent: 8daa1e3af70d53b2c6aa343f0748df0951036149 [diff]
diff --git a/tcp_tests/fixtures/ceph_fixtures.py b/tcp_tests/fixtures/ceph_fixtures.py
index c294542..0b2ef50 100644
--- a/tcp_tests/fixtures/ceph_fixtures.py
+++ b/tcp_tests/fixtures/ceph_fixtures.py

@@ -40,7 +40,7 @@
 @pytest.fixture(scope='function')
 def ceph_deployed(revert_snapshot, request, config,
                   hardware, underlay, common_services_deployed,
-                  ceph_actions):
+                  salt_deployed, ceph_actions):
     """Fixture to get or install Ceph services on environment
 
     :param revert_snapshot: fixture that reverts snapshot that is specified
@@ -72,6 +72,7 @@
         commands = underlay.read_template(steps_path)
         ceph_actions.install(commands)
         hardware.create_snapshot(ext.SNAPSHOT.ceph_deployed)
+        salt_deployed.sync_time()
 
     else:
         # 1. hardware environment created and powered on

diff --git a/tcp_tests/fixtures/common_fixtures.py b/tcp_tests/fixtures/common_fixtures.py
index 9d6ccba..31f0f1e 100644
--- a/tcp_tests/fixtures/common_fixtures.py
+++ b/tcp_tests/fixtures/common_fixtures.py

@@ -52,6 +52,7 @@
     finish_step = "FINISH {} TEST. TOOK {} min {} sec".format(
         step_name, minutes, seconds
     )
+    print("\n\n")
     foot = "\n" + "<" * 5 + "#" * 30 + "[ {} ]" + "#" * 30 + ">" * 5
     foot = foot.format(finish_step)
     LOG.info(foot)
@@ -69,3 +70,10 @@
     steps_mark = request.keywords.get('steps', None)
     steps = steps_mark.args[0]
     return steps
+
+
+@pytest.fixture(scope='function', autouse=True)
+def func_name(request):
+    """Name of the current test function"""
+    return getattr(request.node.function, '_name',
+                   request.node.function.__name__)

diff --git a/tcp_tests/fixtures/common_services_fixtures.py b/tcp_tests/fixtures/common_services_fixtures.py
index 5d4c56a..7d1c73f 100644
--- a/tcp_tests/fixtures/common_services_fixtures.py
+++ b/tcp_tests/fixtures/common_services_fixtures.py

@@ -71,6 +71,7 @@
         commands = underlay.read_template(steps_path)
         common_services_actions.install(commands)
         hardware.create_snapshot(ext.SNAPSHOT.common_services_deployed)
+        salt_deployed.sync_time()
 
     else:
         # 1. hardware environment created and powered on

diff --git a/tcp_tests/fixtures/decapod_fixtures.py b/tcp_tests/fixtures/decapod_fixtures.py
index 7f064c5..8e40b41 100644
--- a/tcp_tests/fixtures/decapod_fixtures.py
+++ b/tcp_tests/fixtures/decapod_fixtures.py

@@ -68,6 +68,7 @@
         commands = underlay.read_template(steps_path)
         decapod_actions.install(commands)
         hardware.create_snapshot(ext.SNAPSHOT.decapod_deployed)
+        salt_deployed.sync_time()
 
     else:
         # 1. hardware environment created and powered on

diff --git a/tcp_tests/fixtures/k8s_fixtures.py b/tcp_tests/fixtures/k8s_fixtures.py
index 3cacbaf..356a51b 100644
--- a/tcp_tests/fixtures/k8s_fixtures.py
+++ b/tcp_tests/fixtures/k8s_fixtures.py

@@ -38,7 +38,7 @@
 @pytest.mark.revert_snapshot(ext.SNAPSHOT.k8s_deployed)
 @pytest.fixture(scope='function')
 def k8s_deployed(revert_snapshot, request, config, hardware, underlay,
-                 common_services_deployed, k8s_actions):
+                 common_services_deployed, salt_deployed, k8s_actions):
     """Fixture to get or install k8s on environment
 
     :param revert_snapshot: fixture that reverts snapshot that is specified
@@ -71,6 +71,7 @@
         commands = underlay.read_template(steps_path)
         k8s_actions.install(commands)
         hardware.create_snapshot(ext.SNAPSHOT.k8s_deployed)
+        salt_deployed.sync_time()
 
     # Workaround for keepalived hang issue after env revert from snapshot
     # see https://mirantis.jira.com/browse/PROD-12038

diff --git a/tcp_tests/fixtures/openstack_fixtures.py b/tcp_tests/fixtures/openstack_fixtures.py
index 98e367c..8e92e77 100644
--- a/tcp_tests/fixtures/openstack_fixtures.py
+++ b/tcp_tests/fixtures/openstack_fixtures.py

@@ -41,7 +41,7 @@
 @pytest.fixture(scope='function')
 def openstack_deployed(revert_snapshot, request, config,
                        hardware, underlay, common_services_deployed,
-                       openstack_actions):
+                       salt_deployed, openstack_actions, rally):
     """Fixture to get or install OpenStack services on environment
 
     :param revert_snapshot: fixture that reverts snapshot that is specified
@@ -52,11 +52,13 @@
     :param underlay: fixture provides underlay manager
     :param common_services_deployed: fixture provides CommonServicesManager
     :param openstack_actions: fixture provides OpenstackManager instance
+    :param rally: fixture provides RallyManager instance
     :rtype: OpenstackManager
 
     If config.openstack.openstack_installed is not set, this fixture assumes
     that the openstack services were not installed, and do the following:
     - install openstack services
+    - [optional] prepare docker with rally container
     - make snapshot with name 'openstack_deployed'
     - return OpenstackManager instance
 
@@ -64,15 +66,40 @@
     the openstack services were already installed, and do the following:
     - return OpenstackManager instance
 
+    If you want to prepare 'rally', please use mark:
+    @pytest.mark.with_rally(rally_node=<str>,
+                            prepare_openstack=<bool>,
+                            prepare_tempest=<bool>)
+    :param rally_node: first chars of the node name where rally should
+                       be started
+    :param prepare_openstack: if True, prepare OpenStack objects for
+                              rally tasks: cirros image, private net04
+
     If you want to revert 'openstack_deployed' snapshot, please use mark:
     @pytest.mark.revert_snapshot("openstack_deployed")
     """
+
     # Deploy Openstack cluster
     if not config.openstack.openstack_installed:
         steps_path = config.openstack_deploy.openstack_steps_path
         commands = underlay.read_template(steps_path)
         openstack_actions.install(commands)
+
+        # If @pytest.mark.with_rally() is set, then prepare Rally
+        # container for 'openstack_deployed' snapshot.
+        with_rally = request.keywords.get('with_rally', None)
+        if with_rally:
+            prepare_openstack = with_rally.kwargs.get("prepare_openstack",
+                                                      False)
+            prepare_tempest = with_rally.kwargs.get("prepare_tempest", False)
+            if prepare_openstack:
+                rally.prepare_rally_task(target_node='ctl01')
+            if prepare_tempest:
+                rally.prepare_tempest_task()
+            rally.run_container()
+
         hardware.create_snapshot(ext.SNAPSHOT.openstack_deployed)
+        salt_deployed.sync_time()
 
     else:
         # 1. hardware environment created and powered on

diff --git a/tcp_tests/fixtures/oss_fixtures.py b/tcp_tests/fixtures/oss_fixtures.py
index d46427b..95bbc54 100644
--- a/tcp_tests/fixtures/oss_fixtures.py
+++ b/tcp_tests/fixtures/oss_fixtures.py

@@ -68,6 +68,7 @@
         commands = underlay.read_template(steps_path)
         oss_actions.install(commands)
         hardware.create_snapshot(ext.SNAPSHOT.oss_deployed)
+        salt_deployed.sync_time()
 
     else:
         # 1. hardware environment created and powered on
@@ -78,3 +79,18 @@
         pass
 
     return oss_actions
+
+
+@pytest.mark.revert_snapshot(ext.SNAPSHOT.oss_deployed)
+@pytest.fixture(scope='function')
+def oss_sl_os_deployed(revert_snapshot,
+                       sl_os_deployed,
+                       oss_deployed):
+    """Fixture to get or install SL and OpenStack services on environment
+
+    Uses fixtures openstack_deployed and sl_deployed, with 'sl_deployed'
+    top-level snapshot.
+
+    Returns SLManager instance object
+    """
+    return oss_deployed

diff --git a/tcp_tests/fixtures/rally_fixtures.py b/tcp_tests/fixtures/rally_fixtures.py
index 18fa179..335ab88 100644
--- a/tcp_tests/fixtures/rally_fixtures.py
+++ b/tcp_tests/fixtures/rally_fixtures.py

@@ -18,13 +18,19 @@
 
 
 @pytest.fixture(scope='function')
-def rally(config, underlay):
+def rally(request, config, underlay):
     """Fixture that provides various actions for TCP
 
+    :param request: fixture provides pytest data
     :param config: fixture provides oslo.config
     :param underlay: fixture provides underlay manager
     :rtype: RallyManager
 
     For use in tests or fixtures to deploy a custom TCP
     """
-    return rallymanager.RallyManager(underlay, config.salt.salt_master_host)
+    with_rally = request.keywords.get('with_rally', None)
+    rally_node = "gtw01."
+    if with_rally:
+        rally_node = with_rally.kwargs.get("rally_node", "gtw01.")
+
+    return rallymanager.RallyManager(underlay, rally_node)

diff --git a/tcp_tests/fixtures/salt_fixtures.py b/tcp_tests/fixtures/salt_fixtures.py
index d72b1fc..aff28dc 100644
--- a/tcp_tests/fixtures/salt_fixtures.py
+++ b/tcp_tests/fixtures/salt_fixtures.py

@@ -78,6 +78,7 @@
                         for n in config.underlay.ssh)]
 
         hardware.create_snapshot(ext.SNAPSHOT.salt_deployed)
+        salt_actions.sync_time()
 
     else:
         # 1. hardware environment created and powered on
@@ -87,4 +88,6 @@
         #    installed TCP API endpoint
         pass
 
+    salt_actions.sync_time()
+
     return salt_actions

diff --git a/tcp_tests/fixtures/stacklight_fixtures.py b/tcp_tests/fixtures/stacklight_fixtures.py
index 8028383..c1747b8 100644
--- a/tcp_tests/fixtures/stacklight_fixtures.py
+++ b/tcp_tests/fixtures/stacklight_fixtures.py

@@ -39,7 +39,7 @@
 @pytest.fixture(scope='function')
 def sl_deployed(revert_snapshot, request, config,
                 hardware, underlay, common_services_deployed,
-                sl_actions):
+                salt_deployed, sl_actions):
     """Fixture to get or install SL services on environment
 
     :param revert_snapshot: fixture that reverts snapshot that is specified
@@ -57,6 +57,7 @@
         commands = underlay.read_template(steps_path)
         sl_actions.install(commands)
         hardware.create_snapshot(ext.SNAPSHOT.sl_deployed)
+        salt_deployed.sync_time()
 
     else:
         # 1. hardware environment created and powered on

diff --git a/tcp_tests/fixtures/underlay_fixtures.py b/tcp_tests/fixtures/underlay_fixtures.py
index a1476e3..eacbec9 100644
--- a/tcp_tests/fixtures/underlay_fixtures.py
+++ b/tcp_tests/fixtures/underlay_fixtures.py

@@ -199,7 +199,7 @@
 
 
 @pytest.fixture(scope='function', autouse=True)
-def grab_versions(request, underlay):
+def grab_versions(request, func_name, underlay):
     """Fixture for grab package versions at the end of test
 
     Marks:
@@ -211,11 +211,10 @@
     grab_version = request.keywords.get('grab_versions', None)
 
     def test_fin():
-        default_name = getattr(request.node.function, '_name',
-                               request.node.function.__name__)
-        if hasattr(request.node, 'rep_call') and request.node.rep_call.passed \
+        if hasattr(request.node, 'rep_call') and \
+                (request.node.rep_call.passed or request.node.rep_call.failed)\
                 and grab_version:
             artifact_name = utils.extract_name_from_mark(grab_version) or \
-                "{}".format(default_name)
+                "{}".format(func_name)
             underlay.get_logs(artifact_name)
     request.addfinalizer(test_fin)

diff --git a/tcp_tests/helpers/exceptions.py b/tcp_tests/helpers/exceptions.py
index 259880e..f6c2310 100644
--- a/tcp_tests/helpers/exceptions.py
+++ b/tcp_tests/helpers/exceptions.py

@@ -121,3 +121,15 @@
 class BaseImageIsNotSet(BaseException):
     def __str__(self):
         return "Base image for creating VMs is not set!"
+
+
+class SaltPillarError(BaseException):
+    def __init__(self, minion_id, pillar, message=''):
+        super(SaltPillarError, self).__init__()
+        self.minion_id = minion_id
+        self.pillar = pillar
+        self.message = message
+
+    def __str__(self):
+        return ("Salt pillar '{0}' error on minion {1}: {2}"
+                .format(self.minion_id, self.pillar, self.message))

diff --git a/tcp_tests/helpers/log_step.py b/tcp_tests/helpers/log_step.py
index 6ba7164..dbffa58 100644
--- a/tcp_tests/helpers/log_step.py
+++ b/tcp_tests/helpers/log_step.py

@@ -60,7 +60,7 @@
                   "found!".format(step_num, func.__name__))
     test_case_steps = parse_test_doc(func.__doc__)['steps']
     try:
-        LOG.info(" *** [STEP#{0}] {1} ***".format(
+        LOG.info("\n\n*** [STEP#{0}] {1} ***".format(
             step_num,
             test_case_steps[step_num - 1]))
     except IndexError:

diff --git a/tcp_tests/managers/common_services_manager.py b/tcp_tests/managers/common_services_manager.py
index 658657a..c62114d 100644
--- a/tcp_tests/managers/common_services_manager.py
+++ b/tcp_tests/managers/common_services_manager.py

@@ -12,7 +12,11 @@
 #    License for the specific language governing permissions and limitations
 #    under the License.
 
+from tcp_tests.helpers import exceptions
 from tcp_tests.managers.execute_commands import ExecuteCommandsMixin
+from tcp_tests import logger
+
+LOG = logger.logger
 
 
 class CommonServicesManager(ExecuteCommandsMixin):
@@ -32,3 +36,193 @@
         self.execute_commands(commands,
                               label='Install common services')
         self.__config.common_services.common_services_installed = True
+
+    def get_keepalived_vip_minion_id(self, vip):
+        """Get minion ID where keepalived VIP is at the moment"""
+        tgt = 'I@keepalived:cluster:enabled:True'
+        grains = 'ip_interfaces'
+        # Refresh grains first
+        self._salt.run_state(tgt, 'saltutil.refresh_grains')
+        # Get grains
+        result = self._salt.get_grains(tgt=tgt, grains=grains)[0]
+        minion_ids = [
+            minion_id for minion_id, interfaces in result.items()
+            for interface, ips in interfaces.items()
+            for ip in ips
+            if ip == vip
+        ]
+        LOG.debug("VIP '{0}' found on minions {1}".format(vip, minion_ids))
+        if len(minion_ids) != 1:
+            raise Exception("VIP {0} is expected on a single node. Actual "
+                            "nodes with VIP: {1}".format(vip, minion_ids))
+        return minion_ids[0]
+
+    def get_keepalived_vips(self):
+        tgt = 'I@keepalived:cluster:enabled:True'
+        pillar = 'keepalived:cluster:instance'
+        return self._salt.get_pillar(tgt=tgt, pillar=pillar)[0]
+
+    def check_keepalived_pillar(self):
+        """Check the keepalived pillars for VIPs
+
+        Check for:
+        - the same VIP is used for the same 'virtual_router_id'
+        - the same password is used for the same 'virtual_router_id'
+        - no 'virtual_router_id' or VIP doubles in different
+          keepalived instances on the same node
+        - no 'priority' doubles inside the same 'virtual_router_id'
+          on different nodes
+
+        :param pillar_vips: dict {
+            <minion_id>: {
+                <keepalived instance>: {
+                    <address>: str,
+                    <password>: str,
+                    <virtual_router_id>: int,
+                    <priority>: int
+                },
+                ...
+            },
+        }
+        :return dict: {
+            <str:vip1> : {
+                'instance_name': <str>
+                'virtual_router_id': <int>,
+                'password': <str>,
+                'nodes' : {<str:node1>: <int:priority>,
+                           <str:node2>: <int:priority>,
+                           ...},
+            },
+            <str:vip2> : { ...
+            },
+        }
+        """
+
+        def check_single_address(vips, minion_id, instance, data):
+            for vip in vips:
+                if vips[vip]['virtual_router_id'] == data['virtual_router_id']\
+                        and (vip != data['address'] or
+                             vips[vip]['instance_name'] != instance):
+                    message = (
+                        "'virtual_router_id': {0} for keepalived instance "
+                        "{1}: {2} is already used for {3}: {4} on nodes {5}"
+                        .format(data['virtual_router_id'],
+                                instance, data['address'],
+                                vips[vip]['instance_name'],
+                                vip,
+                                vips[vip]['nodes'].keys())
+                    )
+                    raise exceptions.SaltPillarError(
+                        minion_id,
+                        'keepalived:cluster:instance',
+                        message)
+
+        def check_single_router_id(vips, minion_id, instance, data):
+            for vip in vips:
+                if vips[vip]['virtual_router_id'] != data['virtual_router_id']\
+                        and vip == data['address']:
+                    message = (
+                        "'virtual_router_id': {0} for keepalived instance "
+                        "{1}: {2} is not the same as for {3}: {4} on nodes {5}"
+                        .format(data['virtual_router_id'],
+                                instance, data['address'],
+                                vips[vip]['instance_name'],
+                                vip,
+                                vips[vip]['nodes'].keys())
+                    )
+                    raise exceptions.SaltPillarError(
+                        minion_id,
+                        'keepalived:cluster:instance',
+                        message)
+
+        pillar_vips = self.get_keepalived_vips()
+        vips = {}
+        for minion_id in pillar_vips:
+            for instance, data in pillar_vips[minion_id].items():
+                address = data['address']
+                password = data['password']
+                virtual_router_id = data['virtual_router_id']
+                priority = data['priority']
+
+                if address not in vips:
+                    # Check that there is the same VIP
+                    # for the same virtual_router_id
+                    check_single_address(vips, minion_id, instance, data)
+
+                    # Add new VIP
+                    vips[address] = {
+                        'instance_name': instance,
+                        'virtual_router_id': virtual_router_id,
+                        'password': password,
+                        'nodes': {
+                            minion_id: priority,
+                        }
+                    }
+                else:
+                    # Check that there is the same virtual_router_id
+                    # for the same VIP
+                    check_single_router_id(vips, minion_id, instance, data)
+                    if vips[address]['password'] != password:
+                        message = (
+                            "'password': {0} for keepalived instance "
+                            "{1}: {2} is not the same as for {3}: {4} on "
+                            "nodes {5}".format(data['password'],
+                                               instance, data['address'],
+                                               vips[address]['instance_name'],
+                                               address,
+                                               vips[address]['nodes'].keys())
+                        )
+                        raise exceptions.SaltPillarError(
+                            minion_id,
+                            'keepalived:cluster:instance',
+                            message)
+
+                    if any([priority == prio
+                            for node, prio in vips[address]['nodes'].items()]):
+                        message = (
+                            "'priority': {0} for keepalived instance "
+                            "{1}: {2} is the same as for {3}: {4} on "
+                            "nodes {5}".format(data['priority'],
+                                               instance, data['address'],
+                                               vips[address]['instance_name'],
+                                               address,
+                                               vips[address]['nodes'].keys())
+                        )
+                        raise exceptions.SaltPillarError(
+                            minion_id,
+                            'keepalived:cluster:instance',
+                            message)
+
+                    # Add data to the vips
+                    vips[address]['nodes'][minion_id] = priority
+
+        LOG.debug("keepalived pillars check passed: {0}".format(vips))
+        return vips
+
+    def get_haproxy_status(self, tgt):
+        """Get haproxy status for all backends on a specified minion"""
+        cmd = ("echo 'show stat' | "
+               "socat 'UNIX-CONNECT:/run/haproxy/admin.sock' STDIO")
+        # Refresh grains first
+        res = self._salt.run_state(tgt, 'cmd.run', cmd)
+        output = res[0]['return'][0]
+        assert len(output.keys()) == 1, "Please specify a single minion in tgt"
+        minion_id = output.keys()[0]
+
+        haproxy_status = {}
+        for line in output[minion_id].splitlines():
+            if line.startswith("#"):
+                continue
+            status = line.split(",")
+            pxname = status[0]
+            svname = status[1]
+            if pxname not in haproxy_status:
+                haproxy_status[pxname] = {}
+            haproxy_status[pxname][svname] = {
+                'scur': status[4],     # sessions current
+                'smax': status[5],     # sessions max
+                'status': status[17],  # status: UP or DOWN
+                'rate': status[33],    # sessions rate
+            }
+        LOG.debug("Haproxy status: \n{0}".format(haproxy_status))
+        return haproxy_status

diff --git a/tcp_tests/managers/rallymanager.py b/tcp_tests/managers/rallymanager.py
index dcf4309..8282bcc 100644
--- a/tcp_tests/managers/rallymanager.py
+++ b/tcp_tests/managers/rallymanager.py

@@ -14,7 +14,8 @@
 import datetime
 import json
 
-from junit_xml import TestSuite, TestCase
+from devops import error
+from functools32 import lru_cache
 
 from tcp_tests import logger
 from tcp_tests import settings
@@ -26,147 +27,253 @@
 class RallyManager(object):
     """docstring for RallyManager"""
 
-    image_name = 'rallyforge/rally'
-    image_version = '0.9.1'
+    image_name = (
+        'docker-prod-virtual.docker.mirantis.net/'
+        'mirantis/oscore/rally-tempest')
+    image_version = 'latest'
+    tempest_tag = "16.0.0"
+    designate_tag = "0.2.0"
 
-    def __init__(self, underlay, admin_host):
+    def __init__(self, underlay, rally_node='gtw01.'):
         super(RallyManager, self).__init__()
-        self._admin_host = admin_host
         self._underlay = underlay
+        self._node_name = self.get_target_node(target=rally_node)
 
-    def prepare(self):
-        content = """
-sed -i 's|#swift_operator_role = Member|swift_operator_role=SwiftOperator|g' /etc/rally/rally.conf  # noqa
-source /home/rally/openrc
-rally-manage db recreate
-rally deployment create --fromenv --name=tempest
-rally verify create-verifier --type tempest --name tempest-verifier
-rally verify configure-verifier
-rally verify configure-verifier --show
-"""
-        cmd = "cat > {path} << EOF\n{content}\nEOF".format(
-            path='/root/rally/install_tempest.sh', content=content)
-        cmd1 = "chmod +x /root/rally/install_tempest.sh"
-        cmd2 = "scp ctl01:/root/keystonercv3 /root/rally/openrc"
+    @property
+    @lru_cache(maxsize=None)
+    def image_id(self):
+        LOG.info("Getting image id")
+        cmd = ("docker images | grep {0}| grep {1}| awk '{{print $3}}'"
+               .format(self.image_name, self.image_version))
+        res = self._underlay.check_call(cmd, node_name=self._node_name)
+        image_id = res['stdout'][0].strip()
+        LOG.info("Image ID is {}".format(image_id))
+        return image_id
 
-        with self._underlay.remote(host=self._admin_host) as remote:
-            LOG.info("Create rally workdir")
-            remote.check_call('mkdir -p /root/rally')
-            LOG.info("Create install_tempest.sh")
-            remote.check_call(cmd)
-            LOG.info("Chmod +x install_tempest.sh")
-            remote.check_call(cmd1)
-            LOG.info("Copy openstackrc")
-            remote.check_call(cmd2)
+    @property
+    @lru_cache(maxsize=None)
+    def docker_id(self):
+        cmd = ("docker ps | grep {image_id} | "
+               "awk '{{print $1}}'| head -1").format(
+                   image_id=self.image_id)
+        LOG.info("Getting container id")
+        res = self._underlay.check_call(cmd, node_name=self._node_name)
+        docker_id = res['stdout'][0].strip()
+        LOG.info("Container ID is {}".format(docker_id))
+        return docker_id
 
-    def pull_image(self, version=None):
-        version = version or self.image_version
-        image = self.image_name
-        cmd = ("apt-get -y install docker.io &&"
-               " docker pull {image}:{version}".format(image=image,
-                                                       version=version))
-        with self._underlay.remote(host=self._admin_host) as remote:
-            LOG.info("Pull {image}:{version}".format(image=image,
-                                                     version=version))
-            remote.check_call(cmd)
+    # Move method to underlay
+    def get_target_node(self, target='gtw01.'):
+        return [node_name for node_name
+                in self._underlay.node_names()
+                if node_name.startswith(target)][0]
 
-        with self._underlay.remote(host=self._admin_host) as remote:
-            LOG.info("Getting image id")
-            cmd = "docker images | grep {0}| awk '{print $3}'".format(
-                self.image_version)
-            res = remote.check_call(cmd)
-            self.image_id = res['stdout'][0].strip()
-            LOG.info("Image ID is {}".format(self.image_id))
+    def _docker_exec(self, cmd, timeout=None, verbose=False):
+        docker_cmd = ('docker exec -i {docker_id} bash -c "{cmd}"'
+                      .format(cmd=cmd, docker_id=self.docker_id))
+        LOG.info("Executing: {docker_cmd}".format(docker_cmd=docker_cmd))
+        return self._underlay.check_call(docker_cmd, node_name=self._node_name,
+                                         verbose=verbose, timeout=timeout)
 
-    def run(self):
-        with self._underlay.remote(host=self._admin_host) as remote:
-            cmd = ("docker run --net host -v /root/rally:/home/rally "
-                   "-tid -u root {image_id}".format(image_id=self.image_id))
+    def _run(self):
+        """Start the rally container in the background"""
+        with self._underlay.remote(node_name=self._node_name) as remote:
+            cmd = ("docker run --net host -v /root/rally:/home/rally/.rally "
+                   "-v /etc/ssl/certs/:/etc/ssl/certs/ "
+                   "-tid -u root --entrypoint /bin/bash {image_id}"
+                   .format(image_id=self.image_id))
             LOG.info("Run Rally container")
             remote.check_call(cmd)
 
-            cmd = ("docker ps | grep {image_id} | "
-                   "awk '{{print $1}}'| head -1").format(
-                       image_id=self.image_id)
-            LOG.info("Getting container id")
-            res = remote.check_call(cmd)
-            self.docker_id = res['stdout'][0].strip()
-            LOG.info("Container ID is {}".format(self.docker_id))
+    def run_container(self, version=None):
+        """Install docker, configure and run rally container"""
+        version = version or self.image_version
+        image = self.image_name
+        LOG.info("Pull {image}:{version}".format(image=image,
+                                                 version=version))
+        cmd = ("apt-get -y install docker.io &&"
+               " docker pull {image}:{version}".format(image=image,
+                                                       version=version))
+        self._underlay.check_call(cmd, node_name=self._node_name)
 
-    def run_tempest(self, test=''):
-        docker_exec = ('docker exec -i {docker_id} bash -c "{cmd}"')
-        commands = [
-            docker_exec.format(cmd="./install_tempest.sh",
-                               docker_id=self.docker_id),
-            docker_exec.format(
-                cmd="source /home/rally/openrc && "
-                    "rally verify start {test}".format(test=test),
-                docker_id=self.docker_id),
-            docker_exec.format(
-                cmd="rally verify report --type json --to result.json",
-                docker_id=self.docker_id),
-            docker_exec.format(
-                cmd="rally verify report --type html --to result.html",
-                docker_id=self.docker_id),
+        LOG.info("Create rally workdir")
+        cmd = 'mkdir -p /root/rally; chown 65500 /root/rally'
+        self._underlay.check_call(cmd, node_name=self._node_name)
+
+        LOG.info("Copy keystonercv3")
+        cmd = "cp /root/keystonercv3 /root/rally/keystonercv3"
+        self._underlay.check_call(cmd, node_name=self._node_name)
+        self._run()
+
+        LOG.info("Create rally deployment")
+        self._docker_exec("rally-manage db recreate")
+        self._docker_exec("source /home/rally/.rally/keystonercv3;"
+                          "rally deployment create --fromenv --name=Abathur")
+        self._docker_exec("rally deployment list")
+
+    def prepare_rally_task(self, target_node='ctl01.'):
+        """Prepare cirros image and private network for rally task"""
+        ctl_node_name = self._underlay.get_target_node_names(
+            target=target_node)[0]
+        cmds = [
+            ". keystonercv3 ; openstack flavor create --public m1.tiny",
+            ("wget http://download.cirros-cloud.net/0.3.4/"
+             "cirros-0.3.4-i386-disk.img"),
+            (". /root/keystonercv3; glance --timeout 120 image-create "
+             "--name cirros-disk --visibility public --disk-format qcow2 "
+             "--container-format bare --progress "
+             "< /root/cirros-0.3.4-i386-disk.img"),
+            ". /root/keystonercv3; neutron net-create net04",
         ]
-        with self._underlay.remote(host=self._admin_host) as remote:
-            LOG.info("Run tempest inside Rally container")
-            for cmd in commands:
-                remote.check_call(cmd, verbose=True)
 
-    def get_results(self, store=True, store_file='tempest.xml'):
-        LOG.info('Storing tests results...')
-        res_file_name = 'result.json'
-        file_prefix = 'results_' + datetime.datetime.now().strftime(
-            '%Y%m%d_%H%M%S') + '_'
-        file_dst = '{0}/{1}{2}'.format(
-            settings.LOGS_DIR, file_prefix, res_file_name)
-        with self._underlay.remote(host=self._admin_host) as remote:
-            remote.download(
-                '/root/rally/{0}'.format(res_file_name),
-                file_dst)
-            res = json.load(remote.open('/root/rally/result.json'))
-        if not store:
-            return res
+        for cmd in cmds:
+            self._underlay.check_call(cmd, node_name=ctl_node_name)
 
-        formatted_tc = []
-        failed_cases = [res['test_cases'][case]
-                        for case in res['test_cases']
-                        if res['test_cases'][case]['status']
-                        in 'fail']
-        for case in failed_cases:
-            if case:
-                tc = TestCase(case['name'])
-                tc.add_failure_info(case['traceback'])
-                formatted_tc.append(tc)
+    def prepare_tempest_task(self):
+        """Configure rally.conf for tempest tests"""
+        pass
+#        LOG.info("Modify rally.conf")
+#        cmd = ("sed -i 's|#swift_operator_role = Member|"
+#               "swift_operator_role=SwiftOperator|g' "
+#               "/etc/rally/rally.conf")
+#        self._docker_exec(cmd)
 
-        skipped_cases = [res['test_cases'][case]
-                         for case in res['test_cases']
-                         if res['test_cases'][case]['status'] in 'skip']
-        for case in skipped_cases:
-            if case:
-                tc = TestCase(case['name'])
-                tc.add_skipped_info(case['reason'])
-                formatted_tc.append(tc)
+    def create_rally_task(self, task_path, task_content):
+        """Create a file with rally task definition
 
-        error_cases = [res['test_cases'][case] for case in res['test_cases']
-                       if res['test_cases'][case]['status'] in 'error']
+        :param task_path: path to JSON or YAML file on target node
+        :task_content: string with json or yaml content to store in file
+        """
+        cmd = "cat > {task_path} << EOF\n{task_content}\nEOF".format(
+            task_path=task_path, task_content=task_content)
+        self._underlay.check_call(cmd, node_name=self._node_name)
 
-        for case in error_cases:
-            if case:
-                tc = TestCase(case['name'])
-                tc.add_error_info(case['traceback'])
-                formatted_tc.append(tc)
+    def run_task(self, task='', timeout=None, raise_on_timeout=True,
+                 verbose=False):
+        """Run rally task
 
-        success = [res['test_cases'][case] for case in res['test_cases']
-                   if res['test_cases'][case]['status'] in 'success']
-        for case in success:
-            if case:
-                tc = TestCase(case['name'])
-                formatted_tc.append(tc)
-
-        ts = TestSuite("tempest", formatted_tc)
-        with open(store_file, 'w') as f:
-            ts.to_file(f, [ts], prettyprint=False)
-
+        :param taks: path to json or yaml file with the task definition
+        :param raise_on_timeout: bool, ignore TimeoutError if False
+        :param verbose: show rally output to console if True
+        """
+        try:
+            res = self._docker_exec(
+                "rally task start {task}".format(task=task),
+                timeout=timeout,
+                verbose=verbose)
+        except error.TimeoutError:
+            if raise_on_timeout:
+                raise
+            else:
+                res = None
+                pass
         return res
+
+    # Updated to replace the OpenStackManager method run_tempest
+    def run_tempest(self, conf_name='/var/lib/lvm_mcp.conf',
+                    pattern='set=smoke', concurrency=0, timeout=None,
+                    report_prefix='', report_types=None):
+        """Run tempest tests
+
+        :param conf_name: tempest config placed in the rally container
+        :param pattern: tempest testcase name or one of existing 'set=...'
+        :param concurrency: how many threads to use in parallel. 0 means
+                            to take the amount of the cores on the node
+                            <self._node_name>.
+        :param timeout: stop tempest tests after specified timeout.
+        :param report_prefix: str, prefix for report filenames. Usually the
+                              output of the fixture 'func_name'
+        :param report_types: list of the report types that need to download
+                             from the environment: ['html', 'xml', 'json'].
+                             None by default.
+        """
+        report_types = report_types or []
+
+        cmd = (
+            "cat > /root/rally/install_tempest.sh << EOF\n"
+            "rally verify create-verifier"
+            "  --type tempest "
+            "  --name tempest-verifier"
+            "  --source /var/lib/tempest"
+            "  --version {tempest_tag}"
+            "  --system-wide\n"
+            "rally verify add-verifier-ext"
+            "  --source /var/lib/designate-tempest-plugin"
+            "  --version {designate_tag}\n"
+            "rally verify configure-verifier --extend {tempest_conf}\n"
+            "rally verify configure-verifier --show\n"
+            "EOF".format(tempest_tag=self.tempest_tag,
+                         designate_tag=self.designate_tag,
+                         tempest_conf=conf_name))
+        with self._underlay.remote(node_name=self._node_name) as remote:
+            LOG.info("Create install_tempest.sh")
+            remote.check_call(cmd)
+            remote.check_call("chmod +x /root/rally/install_tempest.sh")
+
+        LOG.info("Run tempest inside Rally container")
+        self._docker_exec("/home/rally/.rally/install_tempest.sh")
+        self._docker_exec(
+            ("source /home/rally/.rally/keystonercv3 && "
+             "rally verify start --skip-list /var/lib/mcp_skip.list "
+             "  --concurrency {concurrency} --pattern {pattern}"
+             .format(concurrency=concurrency, pattern=pattern)),
+            timeout=timeout, verbose=True)
+        if report_prefix:
+            report_filename = '{0}_report_{1}'.format(
+                report_prefix,
+                datetime.datetime.now().strftime('%Y%m%d_%H%M%S'))
+        else:
+            report_filename = 'report_{1}'.format(
+                datetime.datetime.now().strftime('%Y%m%d_%H%M%S'))
+        docker_file_prefix = '/home/rally/.rally/' + report_filename
+
+        # Create reports
+        if 'xml' in report_types:
+            self._docker_exec(
+                "rally verify report --type junit-xml --to {0}.xml"
+                .format(docker_file_prefix))
+        if 'html' in report_types:
+            self._docker_exec(
+                "rally verify report --type html --to {0}.html"
+                .format(docker_file_prefix))
+        # Always create report in JSON to return results into test case
+        # However, it won't be downloaded until ('json' in report_prefix)
+        self._docker_exec("rally verify report --type json --to {0}.json"
+                          .format(docker_file_prefix))
+
+        # Download reports to the settings.LOGS_DIR
+        file_src_prefix = '/root/rally/{0}'.format(report_filename)
+        file_dst_prefix = '{0}/{1}'.format(settings.LOGS_DIR, report_filename)
+        with self._underlay.remote(node_name=self._node_name) as remote:
+            for suffix in report_types:
+                remote.download(file_src_prefix + '.' + suffix,
+                                file_dst_prefix + '.' + suffix)
+            res = json.load(remote.open(file_src_prefix + '.json'))
+
+        # Get latest verification ID to find the lates testcases in the report
+        vtime = {vdata['finished_at']: vid
+                 for vid, vdata in res['verifications'].items()}
+        vlatest_id = vtime[max(vtime.keys())]
+
+        # Each status has the dict with pairs:
+        #   <status>: {
+        #       <case_name>: <case_details>,
+        #    }
+        formatted_tc = {
+            'success': {},
+            'fail': {},
+            'xfail': {},
+            'skip': {}
+        }
+
+        for tname, tdata in res['tests'].items():
+            status = tdata['by_verification'][vlatest_id]['status']
+            details = tdata['by_verification'][vlatest_id].get('details', '')
+            if status not in formatted_tc:
+                # Fail if tempest return a new status that may be
+                # necessary to take into account in test cases
+                raise Exception("Unknown testcase {0} status: {1} "
+                                .format(tname, status))
+            formatted_tc[status][tdata['name']] = details
+        LOG.debug("Formatted testcases: {0}".format(formatted_tc))
+        return formatted_tc

diff --git a/tcp_tests/managers/saltmanager.py b/tcp_tests/managers/saltmanager.py
index 4a58e93..1ff5324 100644
--- a/tcp_tests/managers/saltmanager.py
+++ b/tcp_tests/managers/saltmanager.py

@@ -17,7 +17,8 @@
 from collections import defaultdict
 
 from datetime import datetime
-from pepper.libpepper import Pepper
+from pepper import libpepper
+from tcp_tests.helpers import utils
 from tcp_tests import settings
 from tcp_tests import logger
 from tcp_tests.managers.execute_commands import ExecuteCommandsMixin
@@ -94,7 +95,7 @@
         url = "http://{host}:{port}".format(
             host=self.host, port=self.port)
         LOG.info("Connecting to Salt API {0}".format(url))
-        self.__api = Pepper(url)
+        self.__api = libpepper.Pepper(url)
         self.__session_start = login()
         return self.__api
 
@@ -158,6 +159,10 @@
         result = self.local(tgt=tgt, fun='pillar.get', args=pillar)
         return result['return']
 
+    def get_grains(self, tgt, grains):
+        result = self.local(tgt=tgt, fun='grains.get', args=grains)
+        return result['return']
+
     def get_ssh_data(self):
         """Generate ssh config for Underlay
 
@@ -192,3 +197,30 @@
             host(k, next(i for i in v['ipv4'] if i in pool_net))
             for k, v in hosts.items()
             if next(i for i in v['ipv4'] if i in pool_net)]
+
+    def service_status(self, tgt, service):
+        result = self.local(tgt=tgt, fun='service.status', args=service)
+        return result['return']
+
+    def service_restart(self, tgt, service):
+        result = self.local(tgt=tgt, fun='service.restart', args=service)
+        return result['return']
+
+    def service_stop(self, tgt, service):
+        result = self.local(tgt=tgt, fun='service.stop', args=service)
+        return result['return']
+
+    @utils.retry(3, exception=libpepper.PepperException)
+    def sync_time(self, tgt='*'):
+        LOG.info("NTP time sync on the salt minions '{0}'".format(tgt))
+        # Force authentication update on the next API access
+        # because previous authentication most probably is not valid
+        # before or after time sync.
+        self.__api = None
+        self.run_state(
+            tgt,
+            'cmd.run', 'service ntp stop; ntpd -gq; service ntp start')
+        new_time_res = self.run_state(tgt, 'cmd.run', 'date')
+        for node_name, time in sorted(new_time_res[0]['return'][0].items()):
+            LOG.info("{0}: {1}".format(node_name, time))
+        self.__api = None

diff --git a/tcp_tests/managers/underlay_ssh_manager.py b/tcp_tests/managers/underlay_ssh_manager.py
index 8b3da06..7d3da96 100644
--- a/tcp_tests/managers/underlay_ssh_manager.py
+++ b/tcp_tests/managers/underlay_ssh_manager.py

@@ -363,12 +363,13 @@
         with self.remote(node_name=host) as remote:
             remote.upload(source, destination)
 
-    def get_random_node(self):
+    def get_random_node(self, node_names=None):
         """Get random node name
 
+        :param node_names: list of strings
         :return: str, name of node
         """
-        return random.choice(self.node_names())
+        return random.choice(node_names or self.node_names())
 
     def yaml_editor(self, file_path, node_name=None, host=None,
                     address_pool=None):
@@ -421,21 +422,24 @@
                        t='{0}_log.tar.gz'.format(artifact_name), d='/var/log'))
         minion_nodes = [ssh for ssh in self.config_ssh
                         if node_role not in ssh['roles']]
-        for node in minion_nodes:
-            try:
-                with self.remote(host=node['host']) as r_node:
-                    r_node.check_call((
-                        'tar '
-                        '--absolute-names '
-                        '--warning=no-file-changed '
-                        '-czf {t} {d}'.format(
-                            t='{0}.tar.gz'.format(node['node_name']),
-                            d='/var/log')),
-                            verbose=True, raise_on_err=False)
-            except Exception:
-                LOG.info("Can not ssh for node {}".format(node))
+
         with self.remote(master_node['node_name']) as r:
             for node in minion_nodes:
+                LOG.info("Archiving logs on the node {0}"
+                         .format(node['node_name']))
+                r.check_call((
+                    "salt '{n}*' cmd.run "
+                    "'tar "
+                    "--absolute-names "
+                    "--warning=no-file-changed "
+                    "-czf {t} {d}'".format(
+                        n=node['node_name'],
+                        t='{0}.tar.gz'.format(node['node_name']),
+                        d='/var/log')),
+                        raise_on_err=False)
+
+                LOG.info("Copying logs from {0} to {1}"
+                         .format(node['node_name'], master_node['node_name']))
                 packages_minion_cmd = ("salt '{0}*' cmd.run "
                                        "'dpkg -l' > /var/log/"
                                        "{0}_packages.output".format(
@@ -443,9 +447,58 @@
                 r.check_call(packages_minion_cmd)
                 r.check_call("rsync {0}:/root/*.tar.gz "
                              "/var/log/".format(node['node_name']),
-                             verbose=True, raise_on_err=False)
-            r.check_call(cmd)
+                             raise_on_err=False)
 
+            r.check_call(cmd)
             r.check_call(tar_cmd)
-            r.download(destination='{0}_log.tar.gz'.format(artifact_name),
-                       target=os.getcwd())
+
+            destination_name = '{0}_log.tar.gz'.format(artifact_name)
+            LOG.info("Downloading the artifact {0}".format(destination_name))
+            r.download(destination=destination_name, target=os.getcwd())
+
+    def delayed_call(
+            self, cmd,
+            node_name=None, host=None, address_pool=None,
+            verbose=True, timeout=5,
+            delay_min=None, delay_max=None):
+        """Delayed call of the specified command in background
+
+        :param delay_min: minimum delay in minutes before run
+                          the command
+        :param delay_max: maximum delay in minutes before run
+                          the command
+        The command will be started at random time in the range
+        from delay_min to delay_max in minutes from 'now'
+        using the command 'at'.
+
+        'now' is rounded to integer by 'at' command, i.e.:
+          now(28 min 59 sec) == 28 min 00 sec.
+
+        So, if delay_min=1 , the command may start in range from
+        1 sec to 60 sec.
+
+        If delay_min and delay_max are None, then the command will
+        be executed in the background right now.
+        """
+        time_min = delay_min or delay_max
+        time_max = delay_max or delay_min
+
+        delay = None
+        if time_min is not None and time_max is not None:
+            delay = random.randint(time_min, time_max)
+
+        delay_str = ''
+        if delay:
+            delay_str = " + {0} min".format(delay)
+
+        delay_cmd = "cat << EOF | at now {0}\n{1}\nEOF".format(delay_str, cmd)
+
+        self.check_call(delay_cmd, node_name=node_name, host=host,
+                        address_pool=address_pool, verbose=verbose,
+                        timeout=timeout)
+
+    def get_target_node_names(self, target='gtw01.'):
+        """Get all node names which names starts with <target>"""
+        return [node_name for node_name
+                in self.node_names()
+                if node_name.startswith(target)]

diff --git a/tcp_tests/tests/system/conftest.py b/tcp_tests/tests/system/conftest.py
index a4a72a2..64288ab 100644
--- a/tcp_tests/tests/system/conftest.py
+++ b/tcp_tests/tests/system/conftest.py

@@ -32,6 +32,7 @@
     'show_step',
     'revert_snapshot',
     'snapshot',
+    'func_name',
     # config_fixtures
     'config',
     # underlay_fixtures
@@ -51,6 +52,7 @@
     # oss_fixtures
     'oss_actions',
     'oss_deployed',
+    'oss_sl_os_deployed',
     # decapod_fixtures
     'decapod_actions',
     'decapod_deployed',

diff --git a/tcp_tests/tests/system/test_failover_openstack_services.py b/tcp_tests/tests/system/test_failover_openstack_services.py
new file mode 100644
index 0000000..37cff72
--- /dev/null
+++ b/tcp_tests/tests/system/test_failover_openstack_services.py

@@ -0,0 +1,602 @@
+#    Copyright 2017 Mirantis, Inc.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import pytest
+
+from tcp_tests import logger
+
+LOG = logger.logger
+
+
+def rally_load_task(times=10, concurrency=2):
+    return """{{
+        "NovaServers.boot_and_delete_server": [
+            {{
+                "args": {{
+                    "flavor": {{
+                        "name": "m1.tiny"
+                    }},
+                    "image": {{
+                        "name": "^cirros.*-disk$"
+                    }},
+                    "auto_assign_nic": true
+                }},
+                "runner": {{
+                    "type": "constant",
+                    "times": {times},
+                    "concurrency": {concurrency}
+                }},
+                "context": {{
+                    "users": {{
+                        "tenants": 3,
+                        "users_per_tenant": 2
+                    }},
+                    "network": {{
+                        "start_cidr": "10.2.0.0/24",
+                        "networks_per_tenant": 2
+                    }}
+                }}
+            }}
+        ]
+    }}""".format(times=times, concurrency=concurrency)
+
+
+class TestFailoverOpenStackServices(object):
+    """Test class for testing MCP services failover"""
+
+    def show_failed_msg(self, failed):
+        return "There are failed tempest tests:\n\n  {0}".format(
+            '\n\n  '.join([(name + ': ' + detail)
+                           for name, detail in failed.items()]))
+
+    def create_and_run_rally_load_task(
+            self, rally, times, concurrency, timeout, raise_on_timeout=False):
+
+        rally.create_rally_task('/root/rally/rally_load_task.json',
+                                rally_load_task(times, concurrency))
+        LOG.info("Running rally load task: {0} iterations with concurrency {1}"
+                 ", timeout: {2} sec".format(times, concurrency, timeout))
+
+        # Run rally task with created task file
+        res = rally.run_task('/home/rally/.rally/rally_load_task.json',
+                             timeout=timeout,
+                             raise_on_timeout=raise_on_timeout,
+                             verbose=False)
+        # LOG only lines related to the task iterations,
+        # skip all other setup/teardown messages
+        for line in res['stdout']:
+            if 'rally.task.runner' in line:
+                LOG.info(line.strip())
+
+    def get_ps_time(self, underlay, process_name, node_names):
+        """Get the started datetime of the process on the specified nodes
+
+        Returns the dict {<node_name>: <str>, } where <str> is the 'ps' output
+        """
+        res = {
+            node_name: underlay.check_call(
+                "ps -eo lstart,cmd|grep [^]]{0}".format(process_name),
+                node_name=node_name, raise_on_err=False)['stdout_str']
+            for node_name in node_names
+        }
+        return res
+
+    @pytest.mark.grab_versions
+    @pytest.mark.fail_snapshot
+    @pytest.mark.with_rally(rally_node="gtw01.", prepare_openstack=True)
+    def test_restart_keepalived(self, func_name, underlay, config,
+                                openstack_deployed,  sl_os_deployed,
+                                common_services_actions,
+                                salt_actions, openstack_actions,
+                                rally, show_step):
+        """Test restart keepalived on ctl* nodes
+
+        Scenario:
+            1. Set keepalived to restart on ctl* nodes in few minutes
+            2. Run rally task to generate load (some tasks should fail
+               because of step 2)
+            3. Check that keepalived was restarted on ctl* nodes
+            4. Run tempest smoke after failover
+            5. Check tempest report for failed tests
+
+        Requiremets:
+            - Salt cluster
+            - OpenStack cluster
+        """
+        # TR case #4756965
+        common_services_actions.check_keepalived_pillar()
+        salt = salt_actions
+
+        ctl_node_names = underlay.get_target_node_names(
+            target='ctl')
+
+        # Get the ps output with datetime of the process
+        ps_before = self.get_ps_time(underlay, "keepalived", ctl_node_names)
+        assert all(["keepalived" in p for n, p in ps_before.items()]), (
+            "'keepalived' is not running on some nodes: {0}".format(ps_before))
+
+        # STEP #1
+        show_step(1)
+        underlay.delayed_call(
+            "salt 'ctl*' service.restart keepalived",
+            host=config.salt.salt_master_host,
+            delay_min=2,
+            delay_max=3)
+
+        # STEP #2
+        show_step(2)
+        # Run rally task with created task file
+        self.create_and_run_rally_load_task(
+            rally, times=60, concurrency=6, timeout=900)
+
+        # STEP #3
+        show_step(3)
+        ret = salt.service_status("I@nova:controller:enabled:True",
+                                  "keepalived")
+        LOG.info(ret)
+        ps_after = self.get_ps_time(underlay, "keepalived", ctl_node_names)
+        for node_name, ps in ps_before.items():
+            assert ps_after[node_name] and (ps != ps_after[node_name]), (
+                "Keepalived wasn't restarted on node {0}".format(node_name))
+
+        # STEP #4
+        show_step(4)
+        results = rally.run_tempest(pattern='set=smoke',
+                                    report_prefix=func_name,
+                                    timeout=1800)
+        # Step #5
+        show_step(5)
+        assert not results['fail'], self.show_failed_msg(results['fail'])
+
+        LOG.info("*************** DONE **************")
+
+    @pytest.mark.grab_versions
+    @pytest.mark.fail_snapshot
+    @pytest.mark.with_rally(rally_node="gtw01.", prepare_openstack=True)
+    def test_stop_keepalived(self, func_name, underlay, config,
+                             openstack_deployed,  sl_os_deployed,
+                             common_services_actions,
+                             salt_actions, openstack_actions,
+                             rally, show_step):
+        """Test stop keepalived on ctl node with VIP under load
+
+        Scenario:
+            1. Find controller minion id with VIP
+            2. Set keepalived to stop on the ctl node with VIP in few minutes
+            3. Run rally task to generate load (some tasks should fail
+               because of step 2)
+            4. Check that keepalived was stopped on the ctl node with VIP
+            5. Run tempest smoke after failover
+            6. Check tempest report for failed tests
+
+        Requiremets:
+            - Salt cluster
+            - OpenStack cluster
+        """
+        # TR case #3385682
+        common_services_actions.check_keepalived_pillar()
+        salt = salt_actions
+
+        ctl_node_names = underlay.get_target_node_names(
+            target='ctl')
+
+        # Get the ps output with datetime of the process
+        ps_before = self.get_ps_time(underlay, "keepalived", ctl_node_names)
+        assert all(["keepalived" in p for n, p in ps_before.items()]), (
+            "'keepalived' is not running on some nodes: {0}".format(ps_before))
+
+        # STEP #1
+        show_step(1)
+        ctl_vip_pillar = salt.get_pillar(
+            tgt="I@nova:controller:enabled:True",
+            pillar="_param:cluster_vip_address")[0]
+        vip = [vip for minion_id, vip in ctl_vip_pillar.items()][0]
+        minion_vip = common_services_actions.get_keepalived_vip_minion_id(vip)
+        LOG.info("VIP {0} is on {1}".format(vip, minion_vip))
+
+        # STEP #2
+        show_step(2)
+        underlay.delayed_call(
+            "salt '{0}' service.stop keepalived".format(minion_vip),
+            host=config.salt.salt_master_host,
+            delay_min=2,
+            delay_max=3)
+
+        # STEP #3
+        show_step(3)
+        # Run rally task with created task file
+        self.create_and_run_rally_load_task(
+            rally, times=60, concurrency=6, timeout=900)
+
+        # STEP #4
+        show_step(4)
+        ret = salt.service_status("I@nova:controller:enabled:True",
+                                  "keepalived")
+        LOG.info(ret)
+        ps_after = self.get_ps_time(underlay, "keepalived", ctl_node_names)
+
+        for node_name, ps in ps_before.items():
+            if node_name == minion_vip:
+                # Check that keepalived actually stopped on <minion_vip> node
+                assert not ps_after[node_name], (
+                    "Keepalived was not stopped on node {0}"
+                    .format(minion_vip))
+            else:
+                # Check that keepalived on other ctl nodes was not restarted
+                assert ps == ps_after[node_name], (
+                   "Keepalived was restarted while it shouldn't!")
+
+        # STEP #5
+        show_step(5)
+        results = rally.run_tempest(pattern='set=smoke',
+                                    report_prefix=func_name,
+                                    timeout=1800)
+        # Step #6
+        show_step(6)
+        assert not results['fail'], self.show_failed_msg(results['fail'])
+
+        LOG.info("*************** DONE **************")
+
+    @pytest.mark.grab_versions
+    @pytest.mark.fail_snapshot
+    @pytest.mark.with_rally(rally_node="gtw01.", prepare_openstack=True)
+    def test_kill_keepalived(self, func_name, underlay, config,
+                             openstack_deployed,  sl_os_deployed,
+                             common_services_actions,
+                             salt_actions, openstack_actions,
+                             rally, show_step):
+        """Test kill keepalived and haproxy on ctl node with VIP under load
+
+        Scenario:
+            1. Find controller minion id with VIP
+            2. Set keepalived to be killed on the ctl node with VIP
+               in few minutes, TR case #3385683
+            3. Run rally task to generate load (some tasks should fail
+               because of step 2)
+            4. Check that keepalived was killed on the ctl node with VIP
+            5. Check that SL sent a e-mail notification about the failed
+                keepalived service, and then remove the VIP remaining
+                on the previous VIP node during running rally task with
+                load.
+            6. Check that VIP was actually migrated on a new node
+            7. Find controller minion id with migrated VIP
+            8. Set haproxy to be killed on the ctl node with VIP
+               in few minutes, TR case #4753980
+            9. Run rally task to generate load (some tasks should fail
+               because of step 7)
+            10. Check that haproxy was killed on the ctl node with VIP
+               and started again by systemd
+            11. Run tempest smoke after failover
+            12. Check tempest report for failed tests
+
+        Requiremets:
+            - Salt cluster
+            - OpenStack cluster
+        """
+        common_services_actions.check_keepalived_pillar()
+        salt = salt_actions
+
+        ctl_node_names = underlay.get_target_node_names(
+            target='ctl')
+
+        # Keepalived case
+        # STEP #1
+        show_step(1)
+        # Get the ps output with datetime of the process
+        ps_before = self.get_ps_time(underlay, "keepalived", ctl_node_names)
+        assert all(["keepalived" in p for n, p in ps_before.items()]), (
+            "'keepalived' is not running on some nodes: {0}".format(ps_before))
+
+        ctl_vip_pillar = salt.get_pillar(
+            tgt="I@nova:controller:enabled:True",
+            pillar="_param:cluster_vip_address")[0]
+        vip = [vip for minion_id, vip in ctl_vip_pillar.items()][0]
+        minion_vip = common_services_actions.get_keepalived_vip_minion_id(vip)
+        LOG.info("VIP {0} is on {1}".format(vip, minion_vip))
+
+        # STEP #2
+        show_step(2)
+        underlay.delayed_call(
+            "salt '{0}' cmd.run 'killall -9 keepalived'".format(minion_vip),
+            host=config.salt.salt_master_host,
+            delay_min=2,
+            delay_max=3)
+
+        LOG.info("'at -l':\n" + underlay.check_call(
+            "at -l", host=config.salt.salt_master_host)['stdout_str'])
+
+        # STEP #3
+        show_step(3)
+        # Run rally task with created task file
+        self.create_and_run_rally_load_task(
+            rally, times=60, concurrency=4, timeout=900)
+
+        # STEP #4
+        show_step(4)
+        ret = salt.service_status("I@nova:controller:enabled:True",
+                                  "keepalived")
+        LOG.info(ret)
+        ps_after = self.get_ps_time(underlay, "keepalived", ctl_node_names)
+
+        for node_name, ps in ps_before.items():
+            if node_name == minion_vip:
+                # Check that keepalived actually stopped on <minion_vip> node
+                assert not ps_after[node_name], (
+                    "Keepalived was not stopped on node {0}"
+                    .format(minion_vip))
+            else:
+                # Check that keepalived on other ctl nodes was not restarted
+                assert ps == ps_after[node_name], (
+                   "Keepalived was restarted while it shouldn't!")
+        # STEP #5
+        show_step(5)
+        # TODO(ddmitriev):
+        #        5. Check that SL sent a e-mail notification about the failed
+        #        keepalived service, and then remove the VIP remaining
+        #        on the node after killing keepalived.
+        #        Alternative: check prometheus alerts list on mon*:
+        #        curl http://localhost:15011/api/v1/alerts
+
+        # Remove the VIP address manually because
+        # the killed keepalived cannot do it
+        underlay.delayed_call(
+            "salt '{0}' cmd.run 'ip a d {1}/32 dev ens4'"
+            .format(minion_vip, vip),
+            host=config.salt.salt_master_host,
+            delay_min=2,
+            delay_max=3)
+        # Run rally task with created task file
+        self.create_and_run_rally_load_task(
+            rally, times=60, concurrency=4, timeout=900)
+
+        # STEP #6
+        show_step(6)
+        # Check that VIP has been actually migrated to a new node
+        new_minion_vip = common_services_actions.get_keepalived_vip_minion_id(
+            vip)
+        LOG.info("Migrated VIP {0} is on {1}".format(vip, new_minion_vip))
+        assert new_minion_vip != minion_vip, (
+            "VIP {0} wasn't migrated from {1} after killing keepalived!"
+            .format(vip, new_minion_vip))
+        common_services_actions.check_keepalived_pillar()
+
+        # Haproxy case
+        # STEP #7
+        show_step(7)
+        # Get the ps output with datetime of the process
+        ps_before = self.get_ps_time(underlay, "haproxy", ctl_node_names)
+        assert all(["haproxy" in p for n, p in ps_before.items()]), (
+            "'haproxy' is not running on some nodes: {0}".format(ps_before))
+
+        # STEP #8
+        show_step(8)
+        underlay.delayed_call(
+            "salt '{0}' cmd.run 'killall -9 haproxy'".format(new_minion_vip),
+            host=config.salt.salt_master_host,
+            delay_min=2,
+            delay_max=3)
+
+        LOG.info("'at -l':\n" + underlay.check_call(
+            "at -l", host=config.salt.salt_master_host)['stdout_str'])
+
+        # STEP #9
+        show_step(9)
+        # Run rally task with created task file
+        self.create_and_run_rally_load_task(
+            rally, times=200, concurrency=4, timeout=1800)
+
+        # STEP #10
+        show_step(10)
+        ret = salt.service_status("I@nova:controller:enabled:True",
+                                  "haproxy")
+        LOG.info(ret)
+        ps_after = self.get_ps_time(underlay, "haproxy", ctl_node_names)
+
+        for node_name, ps in ps_before.items():
+            if node_name == new_minion_vip:
+                # Check that haproxy has been actually restarted
+                # on <new_minion_vip> node
+                assert ps_after[node_name] and (ps != ps_after[node_name]), (
+                    "Haproxy wasn't restarted on node {0}: {1}"
+                    .format(node_name, ps_after[node_name]))
+            else:
+                # Check that haproxy on other ctl nodes was not restarted
+                assert ps == ps_after[node_name], (
+                   "Haproxy was restarted while it shouldn't on node {0}"
+                   .format(node_name))
+
+        # STEP #11
+        show_step(11)
+        results = rally.run_tempest(pattern='set=smoke',
+                                    report_prefix=func_name,
+                                    timeout=1800)
+        # Step #12
+        show_step(12)
+        assert not results['fail'], self.show_failed_msg(results['fail'])
+
+        LOG.info("*************** DONE **************")
+
+    @pytest.mark.grab_versions
+    @pytest.mark.fail_snapshot
+    @pytest.mark.with_rally(rally_node="gtw01.", prepare_openstack=True)
+    def test_kill_rabbit_galera(self, func_name, underlay, config,
+                                openstack_deployed, sl_os_deployed,
+                                common_services_actions,
+                                salt_actions, openstack_actions,
+                                rally, show_step):
+        """Test kill rabbitmq and galera on ctl node with VIP under load
+
+        Scenario:
+            1. Find controller minion id with VIP
+            2. Set rabbitmq_server to be killed on a random ctl node
+               in few minutes, TR case #3385677
+            3. Run rally task to generate load
+            4. Check that rabbitmq_server was killed on the ctl node with VIP
+            5. Find controller minion id with Galera which is receiving
+               connections
+            6. Set mysql server to be killed in few minutes, TR case #4753976
+            7. Run rally task to generate load
+            8. Check that mysql was killed and started again by systemd
+            9. Check galera cluster status and replication
+            10. Run tempest smoke after failover
+            11. Check tempest report for failed tests
+
+        Requiremets:
+            - Salt cluster
+            - OpenStack cluster
+        """
+        common_services_actions.check_keepalived_pillar()
+        salt = salt_actions
+
+        ctl_node_names = underlay.get_target_node_names(
+            target='ctl')
+
+        # Rabbitmq case
+        # STEP #1
+        show_step(1)
+        # Get the ps output with datetime of the process
+        ps_before = self.get_ps_time(
+            underlay, "rabbitmq_server", ctl_node_names)
+        assert all(["rabbitmq_server" in p for n, p in ps_before.items()]), (
+            "'rabbitmq_server' is not running on some nodes: {0}"
+            .format(ps_before))
+
+        ctl_vip_pillar = salt.get_pillar(
+            tgt="I@nova:controller:enabled:True",
+            pillar="_param:cluster_vip_address")[0]
+        vip = [vip for minion_id, vip in ctl_vip_pillar.items()][0]
+        ctl_minions = ctl_vip_pillar.keys()
+        minion_vip = common_services_actions.get_keepalived_vip_minion_id(vip)
+        LOG.info("VIP {0} is on {1}".format(vip, minion_vip))
+
+        # STEP #2
+        show_step(2)
+
+        ctl_minion = underlay.get_random_node(ctl_minions)
+        ctl_node_name = salt_actions.get_grains(
+            tgt=ctl_minion, grains='fqdn')[0][ctl_minion]
+        LOG.info("Scheduling to kill rabbitmq on the minion {0}"
+                 .format(ctl_minion))
+        underlay.delayed_call(
+            "salt '{0}' cmd.run 'killall -9 -u rabbitmq'".format(ctl_minion),
+            host=config.salt.salt_master_host,
+            delay_min=2,
+            delay_max=3)
+
+        LOG.info("'at -l':\n" + underlay.check_call(
+            "at -l", host=config.salt.salt_master_host)['stdout_str'])
+
+        # STEP #3
+        show_step(3)
+        # Run rally task with created task file
+        self.create_and_run_rally_load_task(
+            rally, times=60, concurrency=4, timeout=900)
+
+        # STEP #4
+        show_step(4)
+        ps_after = self.get_ps_time(underlay,
+                                    "rabbitmq_server",
+                                    ctl_node_names)
+
+        for node_name, ps in ps_before.items():
+            if node_name == ctl_node_name:
+                # Check that rabbitmq_server has been actually stopped
+                # on <minion_vip> node
+                assert not ps_after[node_name], (
+                    "'rabbitmq_server' was not stopped on node {0}"
+                    .format(minion_vip))
+            else:
+                # Check that rabbitmq_server on other ctl nodes
+                # was not restarted
+                assert ps == ps_after[node_name], (
+                   "'rabbitmq_server' was restarted while it shouldn't!")
+
+        # Mysql case
+        # STEP #5
+        show_step(5)
+        # At first, ensure that mysql is running on all controllers
+        ps_before = self.get_ps_time(
+            underlay, "mysqld", ctl_node_names)
+        assert all(["mysqld" in p for n, p in ps_before.items()]), (
+            "'mysqld' is not running on some nodes: {0}"
+            .format(ps_before))
+
+        # Check haproxy status on the node with VIP and find the mysql backend
+        # which is receiving the connections
+        haproxy_status = common_services_actions.get_haproxy_status(minion_vip)
+        mysql_status = haproxy_status['mysql_cluster']
+        mysql_tgt = ''
+        scur = 0
+        for svname in mysql_status.keys():
+            if svname == "FRONTEND" or svname == "BACKEND":
+                continue
+            snew = int(mysql_status[svname]['scur'])
+            if scur < snew:
+                scur = snew
+                mysql_tgt = svname + '*'
+        assert scur > 0, ("No sessions to 'mysql_cluster' haproxy backend on "
+                          "the node with VIP, something wrong with cluster.")
+
+        # STEP #6
+        show_step(6)
+        LOG.info("Scheduling to kill mysqld on the minion {0}"
+                 .format(ctl_minion))
+        underlay.delayed_call(
+            "salt '{0}' cmd.run 'killall -9 -u mysql'".format(mysql_tgt),
+            host=config.salt.salt_master_host,
+            delay_min=2,
+            delay_max=3)
+
+        LOG.info("'at -l':\n" + underlay.check_call(
+            "at -l", host=config.salt.salt_master_host)['stdout_str'])
+
+        # STEP #7
+        show_step(7)
+        # Run rally task with created task file
+        self.create_and_run_rally_load_task(
+            rally, times=60, concurrency=4, timeout=900)
+
+        # STEP #8
+        show_step(8)
+        ret = salt.service_status("I@nova:controller:enabled:True",
+                                  "mysql")
+        LOG.info(ret)
+        ps_after = self.get_ps_time(underlay, "mysqld", ctl_node_names)
+
+        for node_name, ps in ps_before.items():
+            if node_name == minion_vip:
+                # Check that mysql actually restarted on <minion_vip> node
+                assert ps_after[node_name] and (ps != ps_after[node_name]), (
+                    "Mysql wasn't restarted on node {0}: {1}"
+                    .format(node_name, ps_after[node_name]))
+            else:
+                # Check that Mysql on other ctl nodes was not restarted
+                assert ps == ps_after[node_name], (
+                   "Mysql was restarted while it shouldn't on node {0}"
+                   .format(node_name))
+
+        # STEP #9
+        show_step(9)
+        # TODO(ddmitriev): check galera cluster status and replication
+        # like it was checked in OSTF.
+
+        # STEP #10
+        show_step(10)
+        results = rally.run_tempest(pattern='set=smoke',
+                                    report_prefix=func_name,
+                                    timeout=1800)
+        # Step #11
+        show_step(11)
+        assert not results['fail'], self.show_failed_msg(results['fail'])
+
+        LOG.info("*************** DONE **************")
commit	7fa0276cf1f6e61274e7974f12e2334aeb6fc0ef	[log] [tgz]
author	Dennis Dmitriev <dis.xcom@gmail.com>	Tue Dec 19 11:49:10 2017 -0500
committer	Gerrit Code Review <support@gerrithub.io>	Tue Dec 19 11:49:10 2017 -0500
tree	d1ed2cff05bcbde2cf257d8a968d411cf9a129d2
parent	f15eb8a19b34e1bdb4a1ef26b7c03b4e6d55c84b [diff]
parent	8daa1e3af70d53b2c6aa343f0748df0951036149 [diff]