Ceph failover HA
Change-Id: I41945fa48f5de97c472392b22e0efc6319b5aefb
diff --git a/tcp_tests/managers/openstack_manager.py b/tcp_tests/managers/openstack_manager.py
index 2780d9b..bac459a 100644
--- a/tcp_tests/managers/openstack_manager.py
+++ b/tcp_tests/managers/openstack_manager.py
@@ -12,6 +12,7 @@
# License for the specific language governing permissions and limitations
# under the License.
import os
+import requests
from tcp_tests.managers.execute_commands import ExecuteCommandsMixin
from tcp_tests import logger
@@ -39,6 +40,55 @@
self.execute_commands(commands,
label='Install OpenStack services')
self.__config.openstack.openstack_installed = True
+ h_data = self.get_horizon_data()
+ self.__config.openstack.horizon_host = h_data['horizon_host']
+ self.__config.openstack.horizon_port = h_data['horizon_port']
+ self.__config.openstack.horizon_user = h_data['horizon_user']
+ self.__config.openstack.horizon_password = h_data['horizon_password']
+ self.auth_in_horizon(
+ h_data['horizon_host'],
+ h_data['horizon_port'],
+ h_data['horizon_user'],
+ h_data['horizon_password'])
+
+ def get_horizon_data(self):
+ horizon_data = {}
+ tgt = 'I@nginx:server and not cfg*'
+ pillar_host = ('nginx:server:site:nginx_ssl_redirect'
+ '_openstack_web:host:name')
+ pillar_port = ('nginx:server:site:nginx_ssl_redirect'
+ '_openstack_web:host:port')
+ hosts = self._salt.get_pillar(tgt=tgt, pillar=pillar_host)
+ host = set([ip for item in hosts for node, ip
+ in item.items() if ip])
+ if host:
+ host = host.pop()
+ ports = self._salt.get_pillar(tgt=tgt, pillar=pillar_port)
+
+ port = set([port for item in ports for node, port
+ in item.items() if port])
+ if port:
+ port = port.pop()
+ tgt = 'I@keystone:server and ctl01*'
+ pillar_user = 'keystone:server:admin_name'
+ pillar_password = 'keystone:server:admin_password'
+ users = self._salt.get_pillar(tgt=tgt, pillar=pillar_user)
+ user = set([user for item in users for node, user
+ in item.items() if user])
+ if user:
+ user = user.pop()
+ passwords = self._salt.get_pillar(tgt=tgt, pillar=pillar_password)
+ pwd = set([pwd for item in passwords for node, pwd
+ in item.items() if pwd])
+ if pwd:
+ pwd = pwd.pop()
+ horizon_data.update({'horizon_host': host})
+ horizon_data.update({'horizon_port': port})
+ horizon_data.update({'horizon_user': user})
+ horizon_data.update({'horizon_password': pwd})
+ LOG.info("Data from pillars {}".format(horizon_data))
+
+ return horizon_data
def run_tempest(
self,
@@ -121,3 +171,37 @@
LOG.info('Reboot (warm restart) nodes {0}'.format(node_names))
self.warm_shutdown_openstack_nodes(node_names, timeout=timeout)
self.warm_start_nodes(node_names)
+
+ def auth_in_horizon(self, host, port, user, password):
+ client = requests.session()
+ url = "http://{0}:{1}".format(
+ self.__config.openstack.horizon_host,
+ self.__config.openstack.horizon_port)
+ # Retrieve the CSRF token first
+ client.get(url, verify=False) # sets cookie
+ if not len(client.cookies):
+ login_data = dict(
+ username=self.__config.openstack.horizon_user,
+ password=self.__config.openstack.horizon_password,
+ next='/')
+ resp = client.post(url, data=login_data,
+ headers=dict(Referer=url), verify=False)
+ LOG.debug("Horizon resp {}".format(resp))
+ assert 200 == resp.status_code, ("Failed to auth in "
+ "horizon. Response "
+ "{0}".format(resp.status_code))
+ else:
+ login_data = dict(
+ username=self.__config.openstack.horizon_user,
+ password=self.__config.openstack.horizon_password,
+ next='/')
+ csrftoken = client.cookies.get('csrftoken', None)
+ if csrftoken:
+ login_data['csrfmiddlewaretoken'] = csrftoken
+
+ resp = client.post(url, data=login_data,
+ headers=dict(Referer=url), verify=False)
+ LOG.debug("Horizon resp {}".format(resp))
+ assert 200 == resp.status_code, ("Failed to auth in "
+ "horizon. Response "
+ "{0}".format(resp.status_code))
diff --git a/tcp_tests/managers/rallymanager.py b/tcp_tests/managers/rallymanager.py
index fc56afa..ae72941 100644
--- a/tcp_tests/managers/rallymanager.py
+++ b/tcp_tests/managers/rallymanager.py
@@ -173,7 +173,8 @@
# Updated to replace the OpenStackManager method run_tempest
def run_tempest(self, conf_name='/var/lib/lvm_mcp.conf',
pattern='set=smoke', concurrency=0, timeout=None,
- report_prefix='', report_types=None):
+ report_prefix='', report_types=None,
+ designate_plugin=True):
"""Run tempest tests
:param conf_name: tempest config placed in the rally container
@@ -182,6 +183,7 @@
to take the amount of the cores on the node
<self._node_name>.
:param timeout: stop tempest tests after specified timeout.
+ :param designate_plugin: enabled by default plugin for designate
:param report_prefix: str, prefix for report filenames. Usually the
output of the fixture 'func_name'
:param report_types: list of the report types that need to download
@@ -189,23 +191,36 @@
None by default.
"""
report_types = report_types or []
-
- cmd = (
- "cat > /root/rally/install_tempest.sh << EOF\n"
- "rally verify create-verifier"
- " --type tempest "
- " --name tempest-verifier"
- " --source /var/lib/tempest"
- " --version {tempest_tag}"
- " --system-wide\n"
- "rally verify add-verifier-ext"
- " --source /var/lib/designate-tempest-plugin"
- " --version {designate_tag}\n"
- "rally verify configure-verifier --extend {tempest_conf}\n"
- "rally verify configure-verifier --show\n"
- "EOF".format(tempest_tag=self.tempest_tag,
- designate_tag=self.designate_tag,
- tempest_conf=conf_name))
+ if not designate_plugin:
+ cmd = (
+ "cat > /root/rally/install_tempest.sh << EOF\n"
+ "rally verify create-verifier"
+ " --type tempest "
+ " --name tempest-verifier"
+ " --source /var/lib/tempest"
+ " --version {tempest_tag}"
+ " --system-wide\n"
+ "rally verify configure-verifier --extend {tempest_conf}\n"
+ "rally verify configure-verifier --show\n"
+ "EOF".format(tempest_tag=self.tempest_tag,
+ tempest_conf=conf_name))
+ else:
+ cmd = (
+ "cat > /root/rally/install_tempest.sh << EOF\n"
+ "rally verify create-verifier"
+ " --type tempest "
+ " --name tempest-verifier"
+ " --source /var/lib/tempest"
+ " --version {tempest_tag}"
+ " --system-wide\n"
+ "rally verify add-verifier-ext"
+ " --source /var/lib/designate-tempest-plugin"
+ " --version {designate_tag}\n"
+ "rally verify configure-verifier --extend {tempest_conf}\n"
+ "rally verify configure-verifier --show\n"
+ "EOF".format(tempest_tag=self.tempest_tag,
+ designate_tag=self.designate_tag,
+ tempest_conf=conf_name))
with self._underlay.remote(node_name=self._node_name) as remote:
LOG.info("Create install_tempest.sh")
remote.check_call(cmd)
diff --git a/tcp_tests/settings_oslo.py b/tcp_tests/settings_oslo.py
index dacdb22..721d3a5 100644
--- a/tcp_tests/settings_oslo.py
+++ b/tcp_tests/settings_oslo.py
@@ -174,6 +174,14 @@
ct.Cfg('openstack_steps_path', ct.String(),
help="Path to YAML with steps to deploy openstack",
default=_default_openstack_steps),
+ ct.Cfg('horizon_host', ct.IPAddress(),
+ help="", default='0.0.0.0'),
+ ct.Cfg('horizon_port', ct.String(),
+ help="", default='5000'),
+ ct.Cfg('horizon_user', ct.String(),
+ help="", default='admin'),
+ ct.Cfg('horizon_password', ct.String(),
+ help="", default='workshop'),
]
openstack_opts = [
ct.Cfg('openstack_installed', ct.Boolean(),
diff --git a/tcp_tests/tests/system/test_failover_ceph.py b/tcp_tests/tests/system/test_failover_ceph.py
new file mode 100644
index 0000000..4a68705
--- /dev/null
+++ b/tcp_tests/tests/system/test_failover_ceph.py
@@ -0,0 +1,244 @@
+# Copyright 2017 Mirantis, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import pytest
+
+from tcp_tests import logger
+
+LOG = logger.logger
+
+
+class TestFailoverCeph(object):
+ """Test class for testing MCP ceph failover"""
+
+ def get_ceph_health(self, underlay, node_names):
+ """Get ceph health on the specified nodes
+
+ Returns the dict {<node_name>: <str>, }
+ where <str> is the 'ceph -s' output
+ """
+ res = {
+ node_name: underlay.check_call("ceph -s",
+ node_name=node_name,
+ raise_on_err=False)['stdout_str']
+ for node_name in node_names
+ }
+ return res
+
+ def show_failed_msg(self, failed):
+ return "There are failed tempest tests:\n\n {0}".format(
+ '\n\n '.join([(name + ': ' + detail)
+ for name, detail in failed.items()]))
+
+ @pytest.mark.grab_versions
+ @pytest.mark.fail_snapshot
+ def test_restart_osd_node(self, func_name, underlay, config,
+ openstack_deployed, ceph_deployed,
+ openstack_actions,
+ rally, show_step):
+ """Test restart ceph osd node
+
+ Scenario:
+ 1. Find ceph osd nodes
+ 2. Check ceph health before restart
+ 3. Restart 1 ceph osd node
+ 4. Check ceph health after restart
+ 5. Run tempest smoke after failover
+ 6. Check tempest report for failed tests
+
+ Requiremets:
+ - Salt cluster
+ - OpenStack cluster
+ - Ceph cluster
+ """
+ openstack_actions._salt.local(
+ tgt='*', fun='cmd.run',
+ args='service ntp stop; ntpd -gq; service ntp start')
+ # STEP #1
+ show_step(1)
+ osd_node_names = underlay.get_target_node_names(
+ target='osd')
+
+ # STEP #2
+ show_step(2)
+ # Get the ceph health output before restart
+ health_before = self.get_ceph_health(underlay, osd_node_names)
+ assert all(["OK" in p for n, p in health_before.items()]), (
+ "'Ceph health is not ok from node: {0}".format(health_before))
+
+ # STEP #3
+ show_step(3)
+ openstack_actions.warm_restart_nodes('osd01')
+
+ openstack_actions._salt.local(
+ tgt='*', fun='cmd.run',
+ args='service ntp stop; ntpd -gq; service ntp start')
+
+ # STEP #4
+ show_step(4)
+ # Get the ceph health output after restart
+ health_after = self.get_ceph_health(underlay, osd_node_names)
+ assert all(["OK" in p for n, p in health_before.items()]), (
+ "'Ceph health is not ok from node: {0}".format(health_after))
+
+ rally.run_container()
+
+ # STEP #5
+ show_step(5)
+ results = rally.run_tempest(pattern='set=smoke',
+ conf_name='/var/lib/ceph_mcp.conf',
+ report_prefix=func_name,
+ designate_plugin=False,
+ timeout=1800)
+ # Step #6
+ show_step(6)
+ assert not results['fail'], self.show_failed_msg(results['fail'])
+
+ LOG.info("*************** DONE **************")
+
+ @pytest.mark.grab_versions
+ @pytest.mark.fail_snapshot
+ def test_restart_cmn_node(self, func_name, underlay, config,
+ openstack_deployed, ceph_deployed,
+ common_services_actions,
+ salt_actions, openstack_actions,
+ rally, show_step):
+ """Test restart ceph cmn node
+
+ Scenario:
+ 1. Find ceph cmn nodes
+ 2. Check ceph health before restart
+ 3. Restart 1 ceph cmn node
+ 4. Check ceph health after restart
+ 5. Run tempest smoke after failover
+ 6. Check tempest report for failed tests
+
+ Requiremets:
+ - Salt cluster
+ - OpenStack cluster
+ - Ceph cluster
+ """
+ openstack_actions._salt.local(
+ tgt='*', fun='cmd.run',
+ args='service ntp stop; ntpd -gq; service ntp start')
+ # STEP #1
+ show_step(1)
+ cmn_node_names = underlay.get_target_node_names(
+ target='cmn')
+
+ # STEP #2
+ show_step(2)
+ # Get the ceph health output before restart
+ health_before = self.get_ceph_health(underlay, cmn_node_names)
+ assert all(["OK" in p for n, p in health_before.items()]), (
+ "'Ceph health is not ok from node: {0}".format(health_before))
+
+ # STEP #3
+ show_step(3)
+ openstack_actions.warm_restart_nodes('cmn01')
+
+ openstack_actions._salt.local(
+ tgt='*', fun='cmd.run',
+ args='service ntp stop; ntpd -gq; service ntp start')
+
+ # STEP #4
+ show_step(4)
+ # Get the ceph health output after restart
+ health_after = self.get_ceph_health(underlay, cmn_node_names)
+ assert all(["OK" in p for n, p in health_before.items()]), (
+ "'Ceph health is not ok from node: {0}".format(health_after))
+
+ rally.run_container()
+
+ # STEP #5
+ show_step(5)
+ results = rally.run_tempest(pattern='set=smoke',
+ conf_name='/var/lib/ceph_mcp.conf',
+ report_prefix=func_name,
+ designate_plugin=False,
+ timeout=1800)
+ # Step #6
+ show_step(6)
+ assert not results['fail'], self.show_failed_msg(results['fail'])
+
+ LOG.info("*************** DONE **************")
+
+ @pytest.mark.grab_versions
+ @pytest.mark.fail_snapshot
+ def test_restart_rgw_node(self, func_name, underlay, config,
+ openstack_deployed, ceph_deployed,
+ common_services_actions,
+ salt_actions, openstack_actions,
+ rally, show_step):
+ """Test restart ceph rgw node
+
+ Scenario:
+ 1. Find ceph rgw nodes
+ 2. Check ceph health before restart
+ 3. Restart 1 ceph rgw node
+ 4. Check ceph health after restart
+ 5. Run tempest smoke after failover
+ 6. Check tempest report for failed tests
+
+ Requiremets:
+ - Salt cluster
+ - OpenStack cluster
+ - Ceph cluster
+ """
+ openstack_actions._salt.local(
+ tgt='*', fun='cmd.run',
+ args='service ntp stop; ntpd -gq; service ntp start')
+
+ # STEP #1
+ show_step(1)
+ rgw_node_names = underlay.get_target_node_names(
+ target='rgw')
+ if not rgw_node_names:
+ pytest.skip('Skip as there are not rgw nodes in deploy')
+
+ # STEP #2
+ show_step(2)
+ # Get the ceph health output before restart
+ health_before = self.get_ceph_health(underlay, rgw_node_names)
+ assert all(["OK" in p for n, p in health_before.items()]), (
+ "'Ceph health is not ok from node: {0}".format(health_before))
+
+ # STEP #3
+ show_step(3)
+ openstack_actions.warm_restart_nodes('rgw01')
+
+ openstack_actions._salt.local(
+ tgt='*', fun='cmd.run',
+ args='service ntp stop; ntpd -gq; service ntp start')
+
+ # STEP #4
+ show_step(4)
+ # Get the ceph health output after restart
+ health_after = self.get_ceph_health(underlay, rgw_node_names)
+ assert all(["OK" in p for n, p in health_before.items()]), (
+ "'Ceph health is not ok from node: {0}".format(health_after))
+
+ rally.run_container()
+
+ # STEP #5
+ show_step(5)
+ results = rally.run_tempest(pattern='set=smoke',
+ conf_name='/var/lib/ceph_mcp.conf',
+ designate_plugin=False,
+ report_prefix=func_name,
+ timeout=1800)
+ # Step #6
+ show_step(6)
+ assert not results['fail'], self.show_failed_msg(results['fail'])
+
+ LOG.info("*************** DONE **************")