Ceph failover HA

Change-Id: I41945fa48f5de97c472392b22e0efc6319b5aefb
diff --git a/tcp_tests/managers/openstack_manager.py b/tcp_tests/managers/openstack_manager.py
index 2780d9b..bac459a 100644
--- a/tcp_tests/managers/openstack_manager.py
+++ b/tcp_tests/managers/openstack_manager.py
@@ -12,6 +12,7 @@
 #    License for the specific language governing permissions and limitations
 #    under the License.
 import os
+import requests
 
 from tcp_tests.managers.execute_commands import ExecuteCommandsMixin
 from tcp_tests import logger
@@ -39,6 +40,55 @@
         self.execute_commands(commands,
                               label='Install OpenStack services')
         self.__config.openstack.openstack_installed = True
+        h_data = self.get_horizon_data()
+        self.__config.openstack.horizon_host = h_data['horizon_host']
+        self.__config.openstack.horizon_port = h_data['horizon_port']
+        self.__config.openstack.horizon_user = h_data['horizon_user']
+        self.__config.openstack.horizon_password = h_data['horizon_password']
+        self.auth_in_horizon(
+            h_data['horizon_host'],
+            h_data['horizon_port'],
+            h_data['horizon_user'],
+            h_data['horizon_password'])
+
+    def get_horizon_data(self):
+        horizon_data = {}
+        tgt = 'I@nginx:server and not cfg*'
+        pillar_host = ('nginx:server:site:nginx_ssl_redirect'
+                       '_openstack_web:host:name')
+        pillar_port = ('nginx:server:site:nginx_ssl_redirect'
+                       '_openstack_web:host:port')
+        hosts = self._salt.get_pillar(tgt=tgt, pillar=pillar_host)
+        host = set([ip for item in hosts for node, ip
+                    in item.items() if ip])
+        if host:
+            host = host.pop()
+        ports = self._salt.get_pillar(tgt=tgt, pillar=pillar_port)
+
+        port = set([port for item in ports for node, port
+                    in item.items() if port])
+        if port:
+            port = port.pop()
+        tgt = 'I@keystone:server and ctl01*'
+        pillar_user = 'keystone:server:admin_name'
+        pillar_password = 'keystone:server:admin_password'
+        users = self._salt.get_pillar(tgt=tgt, pillar=pillar_user)
+        user = set([user for item in users for node, user
+                    in item.items() if user])
+        if user:
+            user = user.pop()
+        passwords = self._salt.get_pillar(tgt=tgt, pillar=pillar_password)
+        pwd = set([pwd for item in passwords for node, pwd
+                   in item.items() if pwd])
+        if pwd:
+            pwd = pwd.pop()
+        horizon_data.update({'horizon_host': host})
+        horizon_data.update({'horizon_port': port})
+        horizon_data.update({'horizon_user': user})
+        horizon_data.update({'horizon_password': pwd})
+        LOG.info("Data from pillars {}".format(horizon_data))
+
+        return horizon_data
 
     def run_tempest(
             self,
@@ -121,3 +171,37 @@
         LOG.info('Reboot (warm restart) nodes {0}'.format(node_names))
         self.warm_shutdown_openstack_nodes(node_names, timeout=timeout)
         self.warm_start_nodes(node_names)
+
+    def auth_in_horizon(self, host, port, user, password):
+        client = requests.session()
+        url = "http://{0}:{1}".format(
+            self.__config.openstack.horizon_host,
+            self.__config.openstack.horizon_port)
+        # Retrieve the CSRF token first
+        client.get(url, verify=False)  # sets cookie
+        if not len(client.cookies):
+            login_data = dict(
+                username=self.__config.openstack.horizon_user,
+                password=self.__config.openstack.horizon_password,
+                next='/')
+            resp = client.post(url, data=login_data,
+                               headers=dict(Referer=url), verify=False)
+            LOG.debug("Horizon resp {}".format(resp))
+            assert 200 == resp.status_code, ("Failed to auth in "
+                                             "horizon. Response "
+                                             "{0}".format(resp.status_code))
+        else:
+            login_data = dict(
+                username=self.__config.openstack.horizon_user,
+                password=self.__config.openstack.horizon_password,
+                next='/')
+            csrftoken = client.cookies.get('csrftoken', None)
+            if csrftoken:
+                login_data['csrfmiddlewaretoken'] = csrftoken
+
+            resp = client.post(url, data=login_data,
+                               headers=dict(Referer=url), verify=False)
+            LOG.debug("Horizon resp {}".format(resp))
+            assert 200 == resp.status_code, ("Failed to auth in "
+                                             "horizon. Response "
+                                             "{0}".format(resp.status_code))
diff --git a/tcp_tests/managers/rallymanager.py b/tcp_tests/managers/rallymanager.py
index fc56afa..ae72941 100644
--- a/tcp_tests/managers/rallymanager.py
+++ b/tcp_tests/managers/rallymanager.py
@@ -173,7 +173,8 @@
     # Updated to replace the OpenStackManager method run_tempest
     def run_tempest(self, conf_name='/var/lib/lvm_mcp.conf',
                     pattern='set=smoke', concurrency=0, timeout=None,
-                    report_prefix='', report_types=None):
+                    report_prefix='', report_types=None,
+                    designate_plugin=True):
         """Run tempest tests
 
         :param conf_name: tempest config placed in the rally container
@@ -182,6 +183,7 @@
                             to take the amount of the cores on the node
                             <self._node_name>.
         :param timeout: stop tempest tests after specified timeout.
+        :param designate_plugin: enabled by default plugin for designate
         :param report_prefix: str, prefix for report filenames. Usually the
                               output of the fixture 'func_name'
         :param report_types: list of the report types that need to download
@@ -189,23 +191,36 @@
                              None by default.
         """
         report_types = report_types or []
-
-        cmd = (
-            "cat > /root/rally/install_tempest.sh << EOF\n"
-            "rally verify create-verifier"
-            "  --type tempest "
-            "  --name tempest-verifier"
-            "  --source /var/lib/tempest"
-            "  --version {tempest_tag}"
-            "  --system-wide\n"
-            "rally verify add-verifier-ext"
-            "  --source /var/lib/designate-tempest-plugin"
-            "  --version {designate_tag}\n"
-            "rally verify configure-verifier --extend {tempest_conf}\n"
-            "rally verify configure-verifier --show\n"
-            "EOF".format(tempest_tag=self.tempest_tag,
-                         designate_tag=self.designate_tag,
-                         tempest_conf=conf_name))
+        if not designate_plugin:
+            cmd = (
+                "cat > /root/rally/install_tempest.sh << EOF\n"
+                "rally verify create-verifier"
+                "  --type tempest "
+                "  --name tempest-verifier"
+                "  --source /var/lib/tempest"
+                "  --version {tempest_tag}"
+                "  --system-wide\n"
+                "rally verify configure-verifier --extend {tempest_conf}\n"
+                "rally verify configure-verifier --show\n"
+                "EOF".format(tempest_tag=self.tempest_tag,
+                             tempest_conf=conf_name))
+        else:
+            cmd = (
+                "cat > /root/rally/install_tempest.sh << EOF\n"
+                "rally verify create-verifier"
+                "  --type tempest "
+                "  --name tempest-verifier"
+                "  --source /var/lib/tempest"
+                "  --version {tempest_tag}"
+                "  --system-wide\n"
+                "rally verify add-verifier-ext"
+                "  --source /var/lib/designate-tempest-plugin"
+                "  --version {designate_tag}\n"
+                "rally verify configure-verifier --extend {tempest_conf}\n"
+                "rally verify configure-verifier --show\n"
+                "EOF".format(tempest_tag=self.tempest_tag,
+                             designate_tag=self.designate_tag,
+                             tempest_conf=conf_name))
         with self._underlay.remote(node_name=self._node_name) as remote:
             LOG.info("Create install_tempest.sh")
             remote.check_call(cmd)
diff --git a/tcp_tests/settings_oslo.py b/tcp_tests/settings_oslo.py
index dacdb22..721d3a5 100644
--- a/tcp_tests/settings_oslo.py
+++ b/tcp_tests/settings_oslo.py
@@ -174,6 +174,14 @@
     ct.Cfg('openstack_steps_path', ct.String(),
            help="Path to YAML with steps to deploy openstack",
            default=_default_openstack_steps),
+    ct.Cfg('horizon_host', ct.IPAddress(),
+           help="", default='0.0.0.0'),
+    ct.Cfg('horizon_port', ct.String(),
+           help="", default='5000'),
+    ct.Cfg('horizon_user', ct.String(),
+           help="", default='admin'),
+    ct.Cfg('horizon_password', ct.String(),
+           help="", default='workshop'),
 ]
 openstack_opts = [
     ct.Cfg('openstack_installed', ct.Boolean(),
diff --git a/tcp_tests/tests/system/test_failover_ceph.py b/tcp_tests/tests/system/test_failover_ceph.py
new file mode 100644
index 0000000..4a68705
--- /dev/null
+++ b/tcp_tests/tests/system/test_failover_ceph.py
@@ -0,0 +1,244 @@
+#    Copyright 2017 Mirantis, Inc.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import pytest
+
+from tcp_tests import logger
+
+LOG = logger.logger
+
+
+class TestFailoverCeph(object):
+    """Test class for testing MCP ceph failover"""
+
+    def get_ceph_health(self, underlay, node_names):
+        """Get ceph health on the specified nodes
+
+        Returns the dict {<node_name>: <str>, }
+        where <str> is the 'ceph -s' output
+        """
+        res = {
+            node_name: underlay.check_call("ceph -s",
+                                           node_name=node_name,
+                                           raise_on_err=False)['stdout_str']
+            for node_name in node_names
+        }
+        return res
+
+    def show_failed_msg(self, failed):
+        return "There are failed tempest tests:\n\n  {0}".format(
+            '\n\n  '.join([(name + ': ' + detail)
+                           for name, detail in failed.items()]))
+
+    @pytest.mark.grab_versions
+    @pytest.mark.fail_snapshot
+    def test_restart_osd_node(self, func_name, underlay, config,
+                              openstack_deployed, ceph_deployed,
+                              openstack_actions,
+                              rally, show_step):
+        """Test restart ceph osd node
+
+        Scenario:
+            1. Find ceph osd nodes
+            2. Check ceph health before restart
+            3. Restart 1 ceph osd node
+            4. Check ceph health after restart
+            5. Run tempest smoke after failover
+            6. Check tempest report for failed tests
+
+        Requiremets:
+            - Salt cluster
+            - OpenStack cluster
+            - Ceph cluster
+        """
+        openstack_actions._salt.local(
+            tgt='*', fun='cmd.run',
+            args='service ntp stop; ntpd -gq; service ntp start')
+        # STEP #1
+        show_step(1)
+        osd_node_names = underlay.get_target_node_names(
+            target='osd')
+
+        # STEP #2
+        show_step(2)
+        # Get the ceph health output before restart
+        health_before = self.get_ceph_health(underlay, osd_node_names)
+        assert all(["OK" in p for n, p in health_before.items()]), (
+            "'Ceph health is not ok from node: {0}".format(health_before))
+
+        # STEP #3
+        show_step(3)
+        openstack_actions.warm_restart_nodes('osd01')
+
+        openstack_actions._salt.local(
+            tgt='*', fun='cmd.run',
+            args='service ntp stop; ntpd -gq; service ntp start')
+
+        # STEP #4
+        show_step(4)
+        # Get the ceph health output after restart
+        health_after = self.get_ceph_health(underlay, osd_node_names)
+        assert all(["OK" in p for n, p in health_before.items()]), (
+            "'Ceph health is not ok from node: {0}".format(health_after))
+
+        rally.run_container()
+
+        # STEP #5
+        show_step(5)
+        results = rally.run_tempest(pattern='set=smoke',
+                                    conf_name='/var/lib/ceph_mcp.conf',
+                                    report_prefix=func_name,
+                                    designate_plugin=False,
+                                    timeout=1800)
+        # Step #6
+        show_step(6)
+        assert not results['fail'], self.show_failed_msg(results['fail'])
+
+        LOG.info("*************** DONE **************")
+
+    @pytest.mark.grab_versions
+    @pytest.mark.fail_snapshot
+    def test_restart_cmn_node(self, func_name, underlay, config,
+                              openstack_deployed, ceph_deployed,
+                              common_services_actions,
+                              salt_actions, openstack_actions,
+                              rally, show_step):
+        """Test restart ceph cmn node
+
+        Scenario:
+            1. Find ceph cmn nodes
+            2. Check ceph health before restart
+            3. Restart 1 ceph cmn node
+            4. Check ceph health after restart
+            5. Run tempest smoke after failover
+            6. Check tempest report for failed tests
+
+        Requiremets:
+            - Salt cluster
+            - OpenStack cluster
+            - Ceph cluster
+        """
+        openstack_actions._salt.local(
+            tgt='*', fun='cmd.run',
+            args='service ntp stop; ntpd -gq; service ntp start')
+        # STEP #1
+        show_step(1)
+        cmn_node_names = underlay.get_target_node_names(
+            target='cmn')
+
+        # STEP #2
+        show_step(2)
+        # Get the ceph health output before restart
+        health_before = self.get_ceph_health(underlay, cmn_node_names)
+        assert all(["OK" in p for n, p in health_before.items()]), (
+            "'Ceph health is not ok from node: {0}".format(health_before))
+
+        # STEP #3
+        show_step(3)
+        openstack_actions.warm_restart_nodes('cmn01')
+
+        openstack_actions._salt.local(
+            tgt='*', fun='cmd.run',
+            args='service ntp stop; ntpd -gq; service ntp start')
+
+        # STEP #4
+        show_step(4)
+        # Get the ceph health output after restart
+        health_after = self.get_ceph_health(underlay, cmn_node_names)
+        assert all(["OK" in p for n, p in health_before.items()]), (
+            "'Ceph health is not ok from node: {0}".format(health_after))
+
+        rally.run_container()
+
+        # STEP #5
+        show_step(5)
+        results = rally.run_tempest(pattern='set=smoke',
+                                    conf_name='/var/lib/ceph_mcp.conf',
+                                    report_prefix=func_name,
+                                    designate_plugin=False,
+                                    timeout=1800)
+        # Step #6
+        show_step(6)
+        assert not results['fail'], self.show_failed_msg(results['fail'])
+
+        LOG.info("*************** DONE **************")
+
+    @pytest.mark.grab_versions
+    @pytest.mark.fail_snapshot
+    def test_restart_rgw_node(self, func_name, underlay, config,
+                              openstack_deployed, ceph_deployed,
+                              common_services_actions,
+                              salt_actions, openstack_actions,
+                              rally, show_step):
+        """Test restart ceph rgw node
+
+        Scenario:
+            1. Find ceph rgw nodes
+            2. Check ceph health before restart
+            3. Restart 1 ceph rgw node
+            4. Check ceph health after restart
+            5. Run tempest smoke after failover
+            6. Check tempest report for failed tests
+
+        Requiremets:
+            - Salt cluster
+            - OpenStack cluster
+            - Ceph cluster
+        """
+        openstack_actions._salt.local(
+            tgt='*', fun='cmd.run',
+            args='service ntp stop; ntpd -gq; service ntp start')
+
+        # STEP #1
+        show_step(1)
+        rgw_node_names = underlay.get_target_node_names(
+            target='rgw')
+        if not rgw_node_names:
+            pytest.skip('Skip as there are not rgw nodes in deploy')
+
+        # STEP #2
+        show_step(2)
+        # Get the ceph health output before restart
+        health_before = self.get_ceph_health(underlay, rgw_node_names)
+        assert all(["OK" in p for n, p in health_before.items()]), (
+            "'Ceph health is not ok from node: {0}".format(health_before))
+
+        # STEP #3
+        show_step(3)
+        openstack_actions.warm_restart_nodes('rgw01')
+
+        openstack_actions._salt.local(
+            tgt='*', fun='cmd.run',
+            args='service ntp stop; ntpd -gq; service ntp start')
+
+        # STEP #4
+        show_step(4)
+        # Get the ceph health output after restart
+        health_after = self.get_ceph_health(underlay, rgw_node_names)
+        assert all(["OK" in p for n, p in health_before.items()]), (
+            "'Ceph health is not ok from node: {0}".format(health_after))
+
+        rally.run_container()
+
+        # STEP #5
+        show_step(5)
+        results = rally.run_tempest(pattern='set=smoke',
+                                    conf_name='/var/lib/ceph_mcp.conf',
+                                    designate_plugin=False,
+                                    report_prefix=func_name,
+                                    timeout=1800)
+        # Step #6
+        show_step(6)
+        assert not results['fail'], self.show_failed_msg(results['fail'])
+
+        LOG.info("*************** DONE **************")