Testcases for services failover - keepalived restart # 4756965 - keepalived stop # 3385682 Changes: - RallyManager refactored to use updated rally container with tempest - Added 'rally.create_rally_task' and 'rally.run_task' methods to generate load on the OpenStack cluster with the specified task config - new mark for test cases that configure 'rally' fixture: @pytest.mark.with_rally(rally_node=<str>, prepare_openstack=<bool>, prepare_tempest=<bool>) - a new method common_services_deployed.check_keepalived_pillar() to check the keepalived pillar settings consistency - a new fixture 'func_name' returns the current test function name - a new method 'underlay.get_target_node_names(target='ctl')' to get a list of all nodes which name starts with the specified target string - a new method underlay.delayed_call() which can postpone the specified shell command to run in several minutes later in the background on the specified node - fixture 'grab_versions' now works also for failed tests Change-Id: Icede63163ae0b3569e8463563cb548e2d314899d

commit: 2d643bc8205cb995342fe10934b19e0f8a39ca7d [log] [tgz]
author: Dennis Dmitriev <ddmitriev@mirantis.com> Mon Dec 04 12:23:47 2017 +0200
committer: Dennis Dmitriev <ddmitriev@mirantis.com> Wed Dec 13 19:33:58 2017 +0200
tree: 45e8c14a0960decc19c751abdf9f7d0ba7b2784d
parent: 56005dad5dad374c682caeb21fde243cc9b8093f [diff]
diff --git a/tcp_tests/fixtures/common_fixtures.py b/tcp_tests/fixtures/common_fixtures.py
index 9d6ccba..31f0f1e 100644
--- a/tcp_tests/fixtures/common_fixtures.py
+++ b/tcp_tests/fixtures/common_fixtures.py

@@ -52,6 +52,7 @@
     finish_step = "FINISH {} TEST. TOOK {} min {} sec".format(
         step_name, minutes, seconds
     )
+    print("\n\n")
     foot = "\n" + "<" * 5 + "#" * 30 + "[ {} ]" + "#" * 30 + ">" * 5
     foot = foot.format(finish_step)
     LOG.info(foot)
@@ -69,3 +70,10 @@
     steps_mark = request.keywords.get('steps', None)
     steps = steps_mark.args[0]
     return steps
+
+
+@pytest.fixture(scope='function', autouse=True)
+def func_name(request):
+    """Name of the current test function"""
+    return getattr(request.node.function, '_name',
+                   request.node.function.__name__)

diff --git a/tcp_tests/fixtures/openstack_fixtures.py b/tcp_tests/fixtures/openstack_fixtures.py
index 98e367c..1926299 100644
--- a/tcp_tests/fixtures/openstack_fixtures.py
+++ b/tcp_tests/fixtures/openstack_fixtures.py

@@ -41,7 +41,7 @@
 @pytest.fixture(scope='function')
 def openstack_deployed(revert_snapshot, request, config,
                        hardware, underlay, common_services_deployed,
-                       openstack_actions):
+                       openstack_actions, rally):
     """Fixture to get or install OpenStack services on environment
 
     :param revert_snapshot: fixture that reverts snapshot that is specified
@@ -52,11 +52,13 @@
     :param underlay: fixture provides underlay manager
     :param common_services_deployed: fixture provides CommonServicesManager
     :param openstack_actions: fixture provides OpenstackManager instance
+    :param rally: fixture provides RallyManager instance
     :rtype: OpenstackManager
 
     If config.openstack.openstack_installed is not set, this fixture assumes
     that the openstack services were not installed, and do the following:
     - install openstack services
+    - [optional] prepare docker with rally container
     - make snapshot with name 'openstack_deployed'
     - return OpenstackManager instance
 
@@ -64,14 +66,38 @@
     the openstack services were already installed, and do the following:
     - return OpenstackManager instance
 
+    If you want to prepare 'rally', please use mark:
+    @pytest.mark.with_rally(rally_node=<str>,
+                            prepare_openstack=<bool>,
+                            prepare_tempest=<bool>)
+    :param rally_node: first chars of the node name where rally should
+                       be started
+    :param prepare_openstack: if True, prepare OpenStack objects for
+                              rally tasks: cirros image, private net04
+
     If you want to revert 'openstack_deployed' snapshot, please use mark:
     @pytest.mark.revert_snapshot("openstack_deployed")
     """
+
     # Deploy Openstack cluster
     if not config.openstack.openstack_installed:
         steps_path = config.openstack_deploy.openstack_steps_path
         commands = underlay.read_template(steps_path)
         openstack_actions.install(commands)
+
+        # If @pytest.mark.with_rally() is set, then prepare Rally
+        # container for 'openstack_deployed' snapshot.
+        with_rally = request.keywords.get('with_rally', None)
+        if with_rally:
+            prepare_openstack = with_rally.kwargs.get("prepare_openstack",
+                                                      False)
+            prepare_tempest = with_rally.kwargs.get("prepare_tempest", False)
+            if prepare_openstack:
+                rally.prepare_rally_task(target_node='ctl01')
+            if prepare_tempest:
+                rally.prepare_tempest_task()
+            rally.run_container()
+
         hardware.create_snapshot(ext.SNAPSHOT.openstack_deployed)
 
     else:

diff --git a/tcp_tests/fixtures/rally_fixtures.py b/tcp_tests/fixtures/rally_fixtures.py
index 18fa179..335ab88 100644
--- a/tcp_tests/fixtures/rally_fixtures.py
+++ b/tcp_tests/fixtures/rally_fixtures.py

@@ -18,13 +18,19 @@
 
 
 @pytest.fixture(scope='function')
-def rally(config, underlay):
+def rally(request, config, underlay):
     """Fixture that provides various actions for TCP
 
+    :param request: fixture provides pytest data
     :param config: fixture provides oslo.config
     :param underlay: fixture provides underlay manager
     :rtype: RallyManager
 
     For use in tests or fixtures to deploy a custom TCP
     """
-    return rallymanager.RallyManager(underlay, config.salt.salt_master_host)
+    with_rally = request.keywords.get('with_rally', None)
+    rally_node = "gtw01."
+    if with_rally:
+        rally_node = with_rally.kwargs.get("rally_node", "gtw01.")
+
+    return rallymanager.RallyManager(underlay, rally_node)

diff --git a/tcp_tests/fixtures/underlay_fixtures.py b/tcp_tests/fixtures/underlay_fixtures.py
index a1476e3..eacbec9 100644
--- a/tcp_tests/fixtures/underlay_fixtures.py
+++ b/tcp_tests/fixtures/underlay_fixtures.py

@@ -199,7 +199,7 @@
 
 
 @pytest.fixture(scope='function', autouse=True)
-def grab_versions(request, underlay):
+def grab_versions(request, func_name, underlay):
     """Fixture for grab package versions at the end of test
 
     Marks:
@@ -211,11 +211,10 @@
     grab_version = request.keywords.get('grab_versions', None)
 
     def test_fin():
-        default_name = getattr(request.node.function, '_name',
-                               request.node.function.__name__)
-        if hasattr(request.node, 'rep_call') and request.node.rep_call.passed \
+        if hasattr(request.node, 'rep_call') and \
+                (request.node.rep_call.passed or request.node.rep_call.failed)\
                 and grab_version:
             artifact_name = utils.extract_name_from_mark(grab_version) or \
-                "{}".format(default_name)
+                "{}".format(func_name)
             underlay.get_logs(artifact_name)
     request.addfinalizer(test_fin)

diff --git a/tcp_tests/helpers/exceptions.py b/tcp_tests/helpers/exceptions.py
index 259880e..f6c2310 100644
--- a/tcp_tests/helpers/exceptions.py
+++ b/tcp_tests/helpers/exceptions.py

@@ -121,3 +121,15 @@
 class BaseImageIsNotSet(BaseException):
     def __str__(self):
         return "Base image for creating VMs is not set!"
+
+
+class SaltPillarError(BaseException):
+    def __init__(self, minion_id, pillar, message=''):
+        super(SaltPillarError, self).__init__()
+        self.minion_id = minion_id
+        self.pillar = pillar
+        self.message = message
+
+    def __str__(self):
+        return ("Salt pillar '{0}' error on minion {1}: {2}"
+                .format(self.minion_id, self.pillar, self.message))

diff --git a/tcp_tests/helpers/log_step.py b/tcp_tests/helpers/log_step.py
index 6ba7164..dbffa58 100644
--- a/tcp_tests/helpers/log_step.py
+++ b/tcp_tests/helpers/log_step.py

@@ -60,7 +60,7 @@
                   "found!".format(step_num, func.__name__))
     test_case_steps = parse_test_doc(func.__doc__)['steps']
     try:
-        LOG.info(" *** [STEP#{0}] {1} ***".format(
+        LOG.info("\n\n*** [STEP#{0}] {1} ***".format(
             step_num,
             test_case_steps[step_num - 1]))
     except IndexError:

diff --git a/tcp_tests/managers/common_services_manager.py b/tcp_tests/managers/common_services_manager.py
index 658657a..e29cdd6 100644
--- a/tcp_tests/managers/common_services_manager.py
+++ b/tcp_tests/managers/common_services_manager.py

@@ -12,7 +12,11 @@
 #    License for the specific language governing permissions and limitations
 #    under the License.
 
+from tcp_tests.helpers import exceptions
 from tcp_tests.managers.execute_commands import ExecuteCommandsMixin
+from tcp_tests import logger
+
+LOG = logger.logger
 
 
 class CommonServicesManager(ExecuteCommandsMixin):
@@ -32,3 +36,162 @@
         self.execute_commands(commands,
                               label='Install common services')
         self.__config.common_services.common_services_installed = True
+
+    def get_keepalived_vip_minion_id(self, vip):
+        """Get minion ID where keepalived VIP is at the moment"""
+        tgt = 'I@keepalived:cluster:enabled:True'
+        grains = 'ip_interfaces'
+        result = self._salt.get_grains(tgt=tgt, grains=grains)[0]
+        minion_ids = [
+            minion_id for minion_id, interfaces in result.items()
+            for interface, ips in interfaces.items()
+            for ip in ips
+            if ip == vip
+        ]
+        LOG.debug("VIP '{0}' found on minions {1}".format(vip, minion_ids))
+        if len(minion_ids) != 1:
+            raise Exception("VIP {0} is expected on a single node. Actual "
+                            "nodes with VIP: {1}".format(vip, minion_ids))
+        return minion_ids[0]
+
+    def get_keepalived_vips(self):
+        tgt = 'I@keepalived:cluster:enabled:True'
+        pillar = 'keepalived:cluster:instance'
+        return self._salt.get_pillar(tgt=tgt, pillar=pillar)[0]
+
+    def check_keepalived_pillar(self):
+        """Check the keepalived pillars for VIPs
+
+        Check for:
+        - the same VIP is used for the same 'virtual_router_id'
+        - the same password is used for the same 'virtual_router_id'
+        - no 'virtual_router_id' or VIP doubles in different
+          keepalived instances on the same node
+        - no 'priority' doubles inside the same 'virtual_router_id'
+          on different nodes
+
+        :param pillar_vips: dict {
+            <minion_id>: {
+                <keepalived instance>: {
+                    <address>: str,
+                    <password>: str,
+                    <virtual_router_id>: int,
+                    <priority>: int
+                },
+                ...
+            },
+        }
+        :return dict: {
+            <str:vip1> : {
+                'instance_name': <str>
+                'virtual_router_id': <int>,
+                'password': <str>,
+                'nodes' : {<str:node1>: <int:priority>,
+                           <str:node2>: <int:priority>,
+                           ...},
+            },
+            <str:vip2> : { ...
+            },
+        }
+        """
+
+        def check_single_address(vips, minion_id, instance, data):
+            for vip in vips:
+                if vips[vip]['virtual_router_id'] == data['virtual_router_id']\
+                        and (vip != data['address'] or
+                             vips[vip]['instance_name'] != instance):
+                    message = (
+                        "'virtual_router_id': {0} for keepalived instance "
+                        "{1}: {2} is already used for {3}: {4} on nodes {5}"
+                        .format(data['virtual_router_id'],
+                                instance, data['address'],
+                                vips[vip]['instance_name'],
+                                vip,
+                                vips[vip]['nodes'].keys())
+                    )
+                    raise exceptions.SaltPillarError(
+                        minion_id,
+                        'keepalived:cluster:instance',
+                        message)
+
+        def check_single_router_id(vips, minion_id, instance, data):
+            for vip in vips:
+                if vips[vip]['virtual_router_id'] != data['virtual_router_id']\
+                        and vip == data['address']:
+                    message = (
+                        "'virtual_router_id': {0} for keepalived instance "
+                        "{1}: {2} is not the same as for {3}: {4} on nodes {5}"
+                        .format(data['virtual_router_id'],
+                                instance, data['address'],
+                                vips[vip]['instance_name'],
+                                vip,
+                                vips[vip]['nodes'].keys())
+                    )
+                    raise exceptions.SaltPillarError(
+                        minion_id,
+                        'keepalived:cluster:instance',
+                        message)
+
+        pillar_vips = self.get_keepalived_vips()
+        vips = {}
+        for minion_id in pillar_vips:
+            for instance, data in pillar_vips[minion_id].items():
+                address = data['address']
+                password = data['password']
+                virtual_router_id = data['virtual_router_id']
+                priority = data['priority']
+
+                if address not in vips:
+                    # Check that there is the same VIP
+                    # for the same virtual_router_id
+                    check_single_address(vips, minion_id, instance, data)
+
+                    # Add new VIP
+                    vips[address] = {
+                        'instance_name': instance,
+                        'virtual_router_id': virtual_router_id,
+                        'password': password,
+                        'nodes': {
+                            minion_id: priority,
+                        }
+                    }
+                else:
+                    # Check that there is the same virtual_router_id
+                    # for the same VIP
+                    check_single_router_id(vips, minion_id, instance, data)
+                    if vips[address]['password'] != password:
+                        message = (
+                            "'password': {0} for keepalived instance "
+                            "{1}: {2} is not the same as for {3}: {4} on "
+                            "nodes {5}".format(data['password'],
+                                               instance, data['address'],
+                                               vips[address]['instance_name'],
+                                               address,
+                                               vips[address]['nodes'].keys())
+                        )
+                        raise exceptions.SaltPillarError(
+                            minion_id,
+                            'keepalived:cluster:instance',
+                            message)
+
+                    if any([priority == prio
+                            for node, prio in vips[address]['nodes'].items()]):
+                        message = (
+                            "'priority': {0} for keepalived instance "
+                            "{1}: {2} is the same as for {3}: {4} on "
+                            "nodes {5}".format(data['priority'],
+                                               instance, data['address'],
+                                               vips[address]['instance_name'],
+                                               address,
+                                               vips[address]['nodes'].keys())
+                        )
+                        raise exceptions.SaltPillarError(
+                            minion_id,
+                            'keepalived:cluster:instance',
+                            message)
+
+                    # Add data to the vips
+                    vips[address]['nodes'][minion_id] = priority
+
+        LOG.debug("keepalived pillars check passed: {0}".format(vips))
+        return vips

diff --git a/tcp_tests/managers/rallymanager.py b/tcp_tests/managers/rallymanager.py
index dcf4309..87f8805 100644
--- a/tcp_tests/managers/rallymanager.py
+++ b/tcp_tests/managers/rallymanager.py

@@ -14,7 +14,8 @@
 import datetime
 import json
 
-from junit_xml import TestSuite, TestCase
+from devops import error
+from functools32 import lru_cache
 
 from tcp_tests import logger
 from tcp_tests import settings
@@ -26,147 +27,247 @@
 class RallyManager(object):
     """docstring for RallyManager"""
 
-    image_name = 'rallyforge/rally'
-    image_version = '0.9.1'
+    image_name = (
+        'docker-prod-virtual.docker.mirantis.net/'
+        'mirantis/oscore/rally-tempest')
+    image_version = 'latest'
+    tempest_tag = "16.0.0"
+    designate_tag = "0.2.0"
 
-    def __init__(self, underlay, admin_host):
+    def __init__(self, underlay, rally_node='gtw01.'):
         super(RallyManager, self).__init__()
-        self._admin_host = admin_host
         self._underlay = underlay
+        self._node_name = self.get_target_node(target=rally_node)
 
-    def prepare(self):
-        content = """
-sed -i 's|#swift_operator_role = Member|swift_operator_role=SwiftOperator|g' /etc/rally/rally.conf  # noqa
-source /home/rally/openrc
-rally-manage db recreate
-rally deployment create --fromenv --name=tempest
-rally verify create-verifier --type tempest --name tempest-verifier
-rally verify configure-verifier
-rally verify configure-verifier --show
-"""
-        cmd = "cat > {path} << EOF\n{content}\nEOF".format(
-            path='/root/rally/install_tempest.sh', content=content)
-        cmd1 = "chmod +x /root/rally/install_tempest.sh"
-        cmd2 = "scp ctl01:/root/keystonercv3 /root/rally/openrc"
+    @property
+    @lru_cache(maxsize=None)
+    def image_id(self):
+        LOG.info("Getting image id")
+        cmd = ("docker images | grep {0}| grep {1}| awk '{{print $3}}'"
+               .format(self.image_name, self.image_version))
+        res = self._underlay.check_call(cmd, node_name=self._node_name)
+        image_id = res['stdout'][0].strip()
+        LOG.info("Image ID is {}".format(image_id))
+        return image_id
 
-        with self._underlay.remote(host=self._admin_host) as remote:
-            LOG.info("Create rally workdir")
-            remote.check_call('mkdir -p /root/rally')
-            LOG.info("Create install_tempest.sh")
-            remote.check_call(cmd)
-            LOG.info("Chmod +x install_tempest.sh")
-            remote.check_call(cmd1)
-            LOG.info("Copy openstackrc")
-            remote.check_call(cmd2)
+    @property
+    @lru_cache(maxsize=None)
+    def docker_id(self):
+        cmd = ("docker ps | grep {image_id} | "
+               "awk '{{print $1}}'| head -1").format(
+                   image_id=self.image_id)
+        LOG.info("Getting container id")
+        res = self._underlay.check_call(cmd, node_name=self._node_name)
+        docker_id = res['stdout'][0].strip()
+        LOG.info("Container ID is {}".format(docker_id))
+        return docker_id
 
-    def pull_image(self, version=None):
-        version = version or self.image_version
-        image = self.image_name
-        cmd = ("apt-get -y install docker.io &&"
-               " docker pull {image}:{version}".format(image=image,
-                                                       version=version))
-        with self._underlay.remote(host=self._admin_host) as remote:
-            LOG.info("Pull {image}:{version}".format(image=image,
-                                                     version=version))
-            remote.check_call(cmd)
+    # Move method to underlay
+    def get_target_node(self, target='gtw01.'):
+        return [node_name for node_name
+                in self._underlay.node_names()
+                if node_name.startswith(target)][0]
 
-        with self._underlay.remote(host=self._admin_host) as remote:
-            LOG.info("Getting image id")
-            cmd = "docker images | grep {0}| awk '{print $3}'".format(
-                self.image_version)
-            res = remote.check_call(cmd)
-            self.image_id = res['stdout'][0].strip()
-            LOG.info("Image ID is {}".format(self.image_id))
+    def _docker_exec(self, cmd, timeout=None, verbose=False):
+        docker_cmd = ('docker exec -i {docker_id} bash -c "{cmd}"'
+                      .format(cmd=cmd, docker_id=self.docker_id))
+        LOG.info("Executing: {docker_cmd}".format(docker_cmd=docker_cmd))
+        self._underlay.check_call(docker_cmd, node_name=self._node_name,
+                                  verbose=verbose, timeout=timeout)
 
-    def run(self):
-        with self._underlay.remote(host=self._admin_host) as remote:
-            cmd = ("docker run --net host -v /root/rally:/home/rally "
-                   "-tid -u root {image_id}".format(image_id=self.image_id))
+    def _run(self):
+        """Start the rally container in the background"""
+        with self._underlay.remote(node_name=self._node_name) as remote:
+            cmd = ("docker run --net host -v /root/rally:/home/rally/.rally "
+                   "-v /etc/ssl/certs/:/etc/ssl/certs/ "
+                   "-tid -u root --entrypoint /bin/bash {image_id}"
+                   .format(image_id=self.image_id))
             LOG.info("Run Rally container")
             remote.check_call(cmd)
 
-            cmd = ("docker ps | grep {image_id} | "
-                   "awk '{{print $1}}'| head -1").format(
-                       image_id=self.image_id)
-            LOG.info("Getting container id")
-            res = remote.check_call(cmd)
-            self.docker_id = res['stdout'][0].strip()
-            LOG.info("Container ID is {}".format(self.docker_id))
+    def run_container(self, version=None):
+        """Install docker, configure and run rally container"""
+        version = version or self.image_version
+        image = self.image_name
+        LOG.info("Pull {image}:{version}".format(image=image,
+                                                 version=version))
+        cmd = ("apt-get -y install docker.io &&"
+               " docker pull {image}:{version}".format(image=image,
+                                                       version=version))
+        self._underlay.check_call(cmd, node_name=self._node_name)
 
-    def run_tempest(self, test=''):
-        docker_exec = ('docker exec -i {docker_id} bash -c "{cmd}"')
-        commands = [
-            docker_exec.format(cmd="./install_tempest.sh",
-                               docker_id=self.docker_id),
-            docker_exec.format(
-                cmd="source /home/rally/openrc && "
-                    "rally verify start {test}".format(test=test),
-                docker_id=self.docker_id),
-            docker_exec.format(
-                cmd="rally verify report --type json --to result.json",
-                docker_id=self.docker_id),
-            docker_exec.format(
-                cmd="rally verify report --type html --to result.html",
-                docker_id=self.docker_id),
+        LOG.info("Create rally workdir")
+        cmd = 'mkdir -p /root/rally; chown 65500 /root/rally'
+        self._underlay.check_call(cmd, node_name=self._node_name)
+
+        LOG.info("Copy keystonercv3")
+        cmd = "cp /root/keystonercv3 /root/rally/keystonercv3"
+        self._underlay.check_call(cmd, node_name=self._node_name)
+        self._run()
+
+        LOG.info("Create rally deployment")
+        self._docker_exec("rally-manage db recreate")
+        self._docker_exec("source /home/rally/.rally/keystonercv3;"
+                          "rally deployment create --fromenv --name=Abathur")
+        self._docker_exec("rally deployment list")
+
+    def prepare_rally_task(self, target_node='ctl01.'):
+        """Prepare cirros image and private network for rally task"""
+        ctl_node_name = self._underlay.get_target_node_names(
+            target=target_node)[0]
+        cmds = [
+            ". keystonercv3 ; openstack flavor create --public m1.tiny",
+            ("wget http://download.cirros-cloud.net/0.3.4/"
+             "cirros-0.3.4-i386-disk.img"),
+            (". /root/keystonercv3; glance --timeout 120 image-create "
+             "--name cirros-disk --visibility public --disk-format qcow2 "
+             "--container-format bare --progress "
+             "< /root/cirros-0.3.4-i386-disk.img"),
+            ". /root/keystonercv3; neutron net-create net04",
         ]
-        with self._underlay.remote(host=self._admin_host) as remote:
-            LOG.info("Run tempest inside Rally container")
-            for cmd in commands:
-                remote.check_call(cmd, verbose=True)
 
-    def get_results(self, store=True, store_file='tempest.xml'):
-        LOG.info('Storing tests results...')
-        res_file_name = 'result.json'
-        file_prefix = 'results_' + datetime.datetime.now().strftime(
-            '%Y%m%d_%H%M%S') + '_'
-        file_dst = '{0}/{1}{2}'.format(
-            settings.LOGS_DIR, file_prefix, res_file_name)
-        with self._underlay.remote(host=self._admin_host) as remote:
-            remote.download(
-                '/root/rally/{0}'.format(res_file_name),
-                file_dst)
-            res = json.load(remote.open('/root/rally/result.json'))
-        if not store:
-            return res
+        for cmd in cmds:
+            self._underlay.check_call(cmd, node_name=ctl_node_name)
 
-        formatted_tc = []
-        failed_cases = [res['test_cases'][case]
-                        for case in res['test_cases']
-                        if res['test_cases'][case]['status']
-                        in 'fail']
-        for case in failed_cases:
-            if case:
-                tc = TestCase(case['name'])
-                tc.add_failure_info(case['traceback'])
-                formatted_tc.append(tc)
+    def prepare_tempest_task(self):
+        """Configure rally.conf for tempest tests"""
+        pass
+#        LOG.info("Modify rally.conf")
+#        cmd = ("sed -i 's|#swift_operator_role = Member|"
+#               "swift_operator_role=SwiftOperator|g' "
+#               "/etc/rally/rally.conf")
+#        self._docker_exec(cmd)
 
-        skipped_cases = [res['test_cases'][case]
-                         for case in res['test_cases']
-                         if res['test_cases'][case]['status'] in 'skip']
-        for case in skipped_cases:
-            if case:
-                tc = TestCase(case['name'])
-                tc.add_skipped_info(case['reason'])
-                formatted_tc.append(tc)
+    def create_rally_task(self, task_path, task_content):
+        """Create a file with rally task definition
 
-        error_cases = [res['test_cases'][case] for case in res['test_cases']
-                       if res['test_cases'][case]['status'] in 'error']
+        :param task_path: path to JSON or YAML file on target node
+        :task_content: string with json or yaml content to store in file
+        """
+        cmd = "cat > {task_path} << EOF\n{task_content}\nEOF".format(
+            task_path=task_path, task_content=task_content)
+        self._underlay.check_call(cmd, node_name=self._node_name)
 
-        for case in error_cases:
-            if case:
-                tc = TestCase(case['name'])
-                tc.add_error_info(case['traceback'])
-                formatted_tc.append(tc)
+    def run_task(self, task='', timeout=None, raise_on_timeout=True):
+        """Run rally task
 
-        success = [res['test_cases'][case] for case in res['test_cases']
-                   if res['test_cases'][case]['status'] in 'success']
-        for case in success:
-            if case:
-                tc = TestCase(case['name'])
-                formatted_tc.append(tc)
+        :param taks: path to json or yaml file with the task definition
+        :param raise_on_timeout: bool, ignore TimeoutError if False
+        """
+        try:
+            self._docker_exec("rally task start {task}".format(task=task),
+                              timeout=timeout, verbose=True)
+        except error.TimeoutError:
+            if raise_on_timeout:
+                raise
+            else:
+                pass
 
-        ts = TestSuite("tempest", formatted_tc)
-        with open(store_file, 'w') as f:
-            ts.to_file(f, [ts], prettyprint=False)
+    # Updated to replace the OpenStackManager method run_tempest
+    def run_tempest(self, conf_name='/var/lib/lvm_mcp.conf',
+                    pattern='set=smoke', concurrency=0, timeout=None,
+                    report_prefix='', report_types=None):
+        """Run tempest tests
 
-        return res
+        :param conf_name: tempest config placed in the rally container
+        :param pattern: tempest testcase name or one of existing 'set=...'
+        :param concurrency: how many threads to use in parallel. 0 means
+                            to take the amount of the cores on the node
+                            <self._node_name>.
+        :param timeout: stop tempest tests after specified timeout.
+        :param report_prefix: str, prefix for report filenames. Usually the
+                              output of the fixture 'func_name'
+        :param report_types: list of the report types that need to download
+                             from the environment: ['html', 'xml', 'json'].
+                             None by default.
+        """
+        report_types = report_types or []
+
+        cmd = (
+            "cat > /root/rally/install_tempest.sh << EOF\n"
+            "rally verify create-verifier"
+            "  --type tempest "
+            "  --name tempest-verifier"
+            "  --source /var/lib/tempest"
+            "  --version {tempest_tag}"
+            "  --system-wide\n"
+            "rally verify add-verifier-ext"
+            "  --source /var/lib/designate-tempest-plugin"
+            "  --version {designate_tag}\n"
+            "rally verify configure-verifier --extend {tempest_conf}\n"
+            "rally verify configure-verifier --show\n"
+            "EOF".format(tempest_tag=self.tempest_tag,
+                         designate_tag=self.designate_tag,
+                         tempest_conf=conf_name))
+        with self._underlay.remote(node_name=self._node_name) as remote:
+            LOG.info("Create install_tempest.sh")
+            remote.check_call(cmd)
+            remote.check_call("chmod +x /root/rally/install_tempest.sh")
+
+        LOG.info("Run tempest inside Rally container")
+        self._docker_exec("/home/rally/.rally/install_tempest.sh")
+        self._docker_exec(
+            ("source /home/rally/.rally/keystonercv3 && "
+             "rally verify start --skip-list /var/lib/mcp_skip.list "
+             "  --concurrency {concurrency} --pattern {pattern}"
+             .format(concurrency=concurrency, pattern=pattern)),
+            timeout=timeout, verbose=True)
+        if report_prefix:
+            report_filename = '{0}_report_{1}'.format(
+                report_prefix,
+                datetime.datetime.now().strftime('%Y%m%d_%H%M%S'))
+        else:
+            report_filename = 'report_{1}'.format(
+                datetime.datetime.now().strftime('%Y%m%d_%H%M%S'))
+        docker_file_prefix = '/home/rally/.rally/' + report_filename
+
+        # Create reports
+        if 'xml' in report_types:
+            self._docker_exec(
+                "rally verify report --type junit-xml --to {0}.xml"
+                .format(docker_file_prefix))
+        if 'html' in report_types:
+            self._docker_exec(
+                "rally verify report --type html --to {0}.html"
+                .format(docker_file_prefix))
+        # Always create report in JSON to return results into test case
+        # However, it won't be downloaded until ('json' in report_prefix)
+        self._docker_exec("rally verify report --type json --to {0}.json"
+                          .format(docker_file_prefix))
+
+        # Download reports to the settings.LOGS_DIR
+        file_src_prefix = '/root/rally/{0}'.format(report_filename)
+        file_dst_prefix = '{0}/{1}'.format(settings.LOGS_DIR, report_filename)
+        with self._underlay.remote(node_name=self._node_name) as remote:
+            for suffix in report_types:
+                remote.download(file_src_prefix + '.' + suffix,
+                                file_dst_prefix + '.' + suffix)
+            res = json.load(remote.open(file_src_prefix + '.json'))
+
+        # Get latest verification ID to find the lates testcases in the report
+        vtime = {vdata['finished_at']: vid
+                 for vid, vdata in res['verifications'].items()}
+        vlatest_id = vtime[max(vtime.keys())]
+
+        # Each status has the dict with pairs:
+        #   <status>: {
+        #       <case_name>: <case_details>,
+        #    }
+        formatted_tc = {
+            'success': {},
+            'fail': {},
+            'xfail': {},
+            'skip': {}
+        }
+
+        for tname, tdata in res['tests'].items():
+            status = tdata['by_verification'][vlatest_id]['status']
+            details = tdata['by_verification'][vlatest_id].get('details', '')
+            if status not in formatted_tc:
+                # Fail if tempest return a new status that may be
+                # necessary to take into account in test cases
+                raise Exception("Unknown testcase {0} status: {1} "
+                                .format(tname, status))
+            formatted_tc[status][tdata['name']] = details
+        LOG.debug("Formatted testcases: {0}".format(formatted_tc))
+        return formatted_tc

diff --git a/tcp_tests/managers/saltmanager.py b/tcp_tests/managers/saltmanager.py
index 4a58e93..5249186 100644
--- a/tcp_tests/managers/saltmanager.py
+++ b/tcp_tests/managers/saltmanager.py

@@ -158,6 +158,10 @@
         result = self.local(tgt=tgt, fun='pillar.get', args=pillar)
         return result['return']
 
+    def get_grains(self, tgt, grains):
+        result = self.local(tgt=tgt, fun='grains.get', args=grains)
+        return result['return']
+
     def get_ssh_data(self):
         """Generate ssh config for Underlay
 
@@ -192,3 +196,15 @@
             host(k, next(i for i in v['ipv4'] if i in pool_net))
             for k, v in hosts.items()
             if next(i for i in v['ipv4'] if i in pool_net)]
+
+    def service_status(self, tgt, service):
+        result = self.local(tgt=tgt, fun='service.status', args=service)
+        return result['return']
+
+    def service_restart(self, tgt, service):
+        result = self.local(tgt=tgt, fun='service.restart', args=service)
+        return result['return']
+
+    def service_stop(self, tgt, service):
+        result = self.local(tgt=tgt, fun='service.stop', args=service)
+        return result['return']

diff --git a/tcp_tests/managers/underlay_ssh_manager.py b/tcp_tests/managers/underlay_ssh_manager.py
index 8b3da06..3f5d712 100644
--- a/tcp_tests/managers/underlay_ssh_manager.py
+++ b/tcp_tests/managers/underlay_ssh_manager.py

@@ -422,6 +422,8 @@
         minion_nodes = [ssh for ssh in self.config_ssh
                         if node_role not in ssh['roles']]
         for node in minion_nodes:
+            LOG.info("Archiving logs on the node {0}"
+                     .format(node['node_name']))
             try:
                 with self.remote(host=node['host']) as r_node:
                     r_node.check_call((
@@ -431,11 +433,13 @@
                         '-czf {t} {d}'.format(
                             t='{0}.tar.gz'.format(node['node_name']),
                             d='/var/log')),
-                            verbose=True, raise_on_err=False)
+                            raise_on_err=False)
             except Exception:
                 LOG.info("Can not ssh for node {}".format(node))
         with self.remote(master_node['node_name']) as r:
             for node in minion_nodes:
+                LOG.info("Copying logs from {0} to {1}"
+                         .format(node['node_name'], master_node['node_name']))
                 packages_minion_cmd = ("salt '{0}*' cmd.run "
                                        "'dpkg -l' > /var/log/"
                                        "{0}_packages.output".format(
@@ -443,9 +447,58 @@
                 r.check_call(packages_minion_cmd)
                 r.check_call("rsync {0}:/root/*.tar.gz "
                              "/var/log/".format(node['node_name']),
-                             verbose=True, raise_on_err=False)
-            r.check_call(cmd)
+                             raise_on_err=False)
 
+            r.check_call(cmd)
             r.check_call(tar_cmd)
-            r.download(destination='{0}_log.tar.gz'.format(artifact_name),
-                       target=os.getcwd())
+
+            destination_name = '{0}_log.tar.gz'.format(artifact_name)
+            LOG.info("Downloading the artifact {0}".format(destination_name))
+            r.download(destination=destination_name, target=os.getcwd())
+
+    def delayed_call(
+            self, cmd,
+            node_name=None, host=None, address_pool=None,
+            verbose=True, timeout=5,
+            delay_min=None, delay_max=None):
+        """Delayed call of the specified command in background
+
+        :param delay_min: minimum delay in minutes before run
+                          the command
+        :param delay_max: maximum delay in minutes before run
+                          the command
+        The command will be started at random time in the range
+        from delay_min to delay_max in minutes from 'now'
+        using the command 'at'.
+
+        'now' is rounded to integer by 'at' command, i.e.:
+          now(28 min 59 sec) == 28 min 00 sec.
+
+        So, if delay_min=1 , the command may start in range from
+        1 sec to 60 sec.
+
+        If delay_min and delay_max are None, then the command will
+        be executed in the background right now.
+        """
+        time_min = delay_min or delay_max
+        time_max = delay_max or delay_min
+
+        delay = None
+        if time_min is not None and time_max is not None:
+            delay = random.randint(time_min, time_max)
+
+        delay_str = ''
+        if delay:
+            delay_str = " + {0} min".format(delay)
+
+        delay_cmd = "cat << EOF | at now {0}\n{1}\nEOF".format(delay_str, cmd)
+
+        self.check_call(delay_cmd, node_name=node_name, host=host,
+                        address_pool=address_pool, verbose=verbose,
+                        timeout=timeout)
+
+    def get_target_node_names(self, target='gtw01.'):
+        """Get all node names which names starts with <target>"""
+        return [node_name for node_name
+                in self.node_names()
+                if node_name.startswith(target)]

diff --git a/tcp_tests/tests/system/conftest.py b/tcp_tests/tests/system/conftest.py
index a4a72a2..ec3846d 100644
--- a/tcp_tests/tests/system/conftest.py
+++ b/tcp_tests/tests/system/conftest.py

@@ -32,6 +32,7 @@
     'show_step',
     'revert_snapshot',
     'snapshot',
+    'func_name',
     # config_fixtures
     'config',
     # underlay_fixtures

diff --git a/tcp_tests/tests/system/test_failover_openstack_services.py b/tcp_tests/tests/system/test_failover_openstack_services.py
new file mode 100644
index 0000000..87159d6
--- /dev/null
+++ b/tcp_tests/tests/system/test_failover_openstack_services.py

@@ -0,0 +1,236 @@
+#    Copyright 2017 Mirantis, Inc.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import pytest
+
+from tcp_tests import logger
+
+LOG = logger.logger
+
+
+def rally_load_task(times=10, concurrency=2):
+    return """{{
+        "NovaServers.boot_and_delete_server": [
+            {{
+                "args": {{
+                    "flavor": {{
+                        "name": "m1.tiny"
+                    }},
+                    "image": {{
+                        "name": "^cirros.*-disk$"
+                    }},
+                    "auto_assign_nic": true
+                }},
+                "runner": {{
+                    "type": "constant",
+                    "times": {times},
+                    "concurrency": {concurrency}
+                }},
+                "context": {{
+                    "users": {{
+                        "tenants": 3,
+                        "users_per_tenant": 2
+                    }},
+                    "network": {{
+                        "start_cidr": "10.2.0.0/24",
+                        "networks_per_tenant": 2
+                    }}
+                }}
+            }}
+        ]
+    }}""".format(times=times, concurrency=concurrency)
+
+
+class TestFailoverOpenStackServices(object):
+    """Test class for testing MCP services failover"""
+
+    def show_failed_msg(self, failed):
+        return "There are failed tempest tests:\n\n  {0}".format(
+            '\n\n  '.join([(name + ': ' + detail)
+                           for name, detail in failed.items()]))
+
+    @pytest.mark.grab_versions
+    @pytest.mark.fail_snapshot
+    @pytest.mark.with_rally(rally_node="gtw01.", prepare_openstack=True)
+    def test_restart_keepalived(self, func_name, underlay, config,
+                                openstack_deployed, common_services_actions,
+                                salt_actions, openstack_actions,
+                                rally, show_step):
+        """Test restart keepalived on ctl* nodes
+
+        Scenario:
+            1. Set keepalived to restart on ctl* nodes in few minutes
+            2. Run rally task to generate load (some tasks should fail
+               because of step 2)
+            3. Check that keepalived was restarted on ctl* nodes
+            4. Run tempest smoke after failover
+            5. Check tempest report for failed tests
+
+        Requiremets:
+            - Salt cluster
+            - OpenStack cluster
+        """
+        # TR case #4756965
+        common_services_actions.check_keepalived_pillar()
+        salt = salt_actions
+
+        ctl_node_names = underlay.get_target_node_names(
+            target='ctl')
+
+        # Get the ps output with datetime of the process
+        ps_before = {
+            node_name: underlay.check_call(
+                "ps -eo lstart,cmd|grep [^]]keepalived",
+                node_name=node_name)['stdout_str']
+            for node_name in ctl_node_names
+        }
+
+        # STEP #1
+        show_step(1)
+        underlay.delayed_call(
+            "salt 'ctl*' service.restart keepalived",
+            host=config.salt.salt_master_host,
+            delay_min=2,
+            delay_max=3)
+
+        # STEP #2
+        show_step(2)
+        # Create a task file in the directory that will be mounted to rally
+        rally.create_rally_task('/root/rally/rally_load_task.json',
+                                rally_load_task(times=60, concurrency=6))
+        # Run rally task with created task file
+        rally.run_task('/home/rally/.rally/rally_load_task.json', timeout=900,
+                       raise_on_timeout=False)
+
+        # STEP #3
+        show_step(3)
+        ret = salt.service_status("I@nova:controller:enabled:True",
+                                  "keepalived")
+        LOG.info(ret)
+        ps_after = {
+            node_name: underlay.check_call(
+                "ps -eo lstart,cmd|grep [^]]keepalived",
+                node_name=node_name)['stdout_str']
+            for node_name in ctl_node_names
+        }
+
+        for node_name, ps in ps_before.items():
+            assert ps != ps_after[node_name], "Keepalived wasn't restarted!"
+
+        # STEP #4
+        show_step(4)
+        results = rally.run_tempest(pattern='set=smoke',
+                                    report_prefix=func_name,
+                                    timeout=1800)
+        # Step #5
+        show_step(5)
+        assert not results['fail'], self.show_failed_msg(results['fail'])
+
+        LOG.info("*************** DONE **************")
+
+    @pytest.mark.grab_versions
+    @pytest.mark.fail_snapshot
+    @pytest.mark.with_rally(rally_node="gtw01.", prepare_openstack=True)
+    def test_stop_keepalived(self, func_name, underlay, config,
+                             openstack_deployed, common_services_actions,
+                             salt_actions, openstack_actions,
+                             rally, show_step):
+        """Test stop keepalived on ctl node with VIP under load
+
+        Scenario:
+            1. Find controller minion id with VIP
+            2. Set keepalived to stop on the ctl node with VIP in few minutes
+            3. Run rally task to generate load (some tasks should fail
+               because of step 2)
+            4. Check that keepalived was stopped on the ctl node with VIP
+            5. Run tempest smoke after failover
+            6. Check tempest report for failed tests
+
+        Requiremets:
+            - Salt cluster
+            - OpenStack cluster
+        """
+        # TR case #3385682
+        common_services_actions.check_keepalived_pillar()
+        salt = salt_actions
+
+        ctl_node_names = underlay.get_target_node_names(
+            target='ctl')
+
+        # Get the ps output with datetime of the process
+        ps_before = {
+            node_name: underlay.check_call(
+                "ps -eo lstart,cmd|grep [^]]keepalived",
+                node_name=node_name)['stdout_str']
+            for node_name in ctl_node_names
+        }
+
+        # STEP #1
+        show_step(1)
+        ctl_vip_pillar = salt.get_pillar(
+            tgt="I@nova:controller:enabled:True",
+            pillar="_param:cluster_vip_address")[0]
+        vip = [vip for minion_id, vip in ctl_vip_pillar.items()][0]
+        minion_vip = common_services_actions.get_keepalived_vip_minion_id(vip)
+        LOG.info("VIP {0} is on {1}".format(vip, minion_vip))
+
+        # STEP #2
+        show_step(2)
+        underlay.delayed_call(
+            "salt '{0}' service.stop keepalived".format(minion_vip),
+            host=config.salt.salt_master_host,
+            delay_min=2,
+            delay_max=3)
+
+        # STEP #3
+        show_step(3)
+        # Create a task file in the directory that will be mounted to rally
+        rally.create_rally_task('/root/rally/rally_load_task.json',
+                                rally_load_task(times=60, concurrency=6))
+        # Run rally task with created task file
+        rally.run_task('/home/rally/.rally/rally_load_task.json', timeout=900,
+                       raise_on_timeout=False)
+
+        # STEP #4
+        show_step(4)
+        ret = salt.service_status("I@nova:controller:enabled:True",
+                                  "keepalived")
+        LOG.info(ret)
+        ps_after = {
+            node_name: underlay.check_call(
+                "ps -eo lstart,cmd|grep [^]]keepalived",
+                node_name=node_name, raise_on_err=False)['stdout_str']
+            for node_name in ctl_node_names
+        }
+
+        for node_name, ps in ps_before.items():
+            if node_name == minion_vip:
+                # Check that keepalived actually stopped on <minion_vip> node
+                assert not ps_after[node_name], (
+                    "Keepalived was not stopped on node {0}"
+                    .format(minion_vip))
+            else:
+                # Check that keepalived on other ctl nodes was not restarted
+                assert ps == ps_after[node_name], (
+                   "Keepalived was restarted while it shouldn't!")
+
+        # STEP #5
+        show_step(5)
+        results = rally.run_tempest(pattern='set=smoke',
+                                    report_prefix=func_name,
+                                    timeout=1800)
+        # Step #6
+        show_step(6)
+        assert not results['fail'], self.show_failed_msg(results['fail'])
+
+        LOG.info("*************** DONE **************")
commit	2d643bc8205cb995342fe10934b19e0f8a39ca7d	[log] [tgz]
author	Dennis Dmitriev <ddmitriev@mirantis.com>	Mon Dec 04 12:23:47 2017 +0200
committer	Dennis Dmitriev <ddmitriev@mirantis.com>	Wed Dec 13 19:33:58 2017 +0200
tree	45e8c14a0960decc19c751abdf9f7d0ba7b2784d
parent	56005dad5dad374c682caeb21fde243cc9b8093f [diff]