Ceph failover HA
Change-Id: I41945fa48f5de97c472392b22e0efc6319b5aefb
diff --git a/tcp_tests/tests/system/test_failover_ceph.py b/tcp_tests/tests/system/test_failover_ceph.py
new file mode 100644
index 0000000..4a68705
--- /dev/null
+++ b/tcp_tests/tests/system/test_failover_ceph.py
@@ -0,0 +1,244 @@
+# Copyright 2017 Mirantis, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import pytest
+
+from tcp_tests import logger
+
+LOG = logger.logger
+
+
+class TestFailoverCeph(object):
+ """Test class for testing MCP ceph failover"""
+
+ def get_ceph_health(self, underlay, node_names):
+ """Get ceph health on the specified nodes
+
+ Returns the dict {<node_name>: <str>, }
+ where <str> is the 'ceph -s' output
+ """
+ res = {
+ node_name: underlay.check_call("ceph -s",
+ node_name=node_name,
+ raise_on_err=False)['stdout_str']
+ for node_name in node_names
+ }
+ return res
+
+ def show_failed_msg(self, failed):
+ return "There are failed tempest tests:\n\n {0}".format(
+ '\n\n '.join([(name + ': ' + detail)
+ for name, detail in failed.items()]))
+
+ @pytest.mark.grab_versions
+ @pytest.mark.fail_snapshot
+ def test_restart_osd_node(self, func_name, underlay, config,
+ openstack_deployed, ceph_deployed,
+ openstack_actions,
+ rally, show_step):
+ """Test restart ceph osd node
+
+ Scenario:
+ 1. Find ceph osd nodes
+ 2. Check ceph health before restart
+ 3. Restart 1 ceph osd node
+ 4. Check ceph health after restart
+ 5. Run tempest smoke after failover
+ 6. Check tempest report for failed tests
+
+ Requiremets:
+ - Salt cluster
+ - OpenStack cluster
+ - Ceph cluster
+ """
+ openstack_actions._salt.local(
+ tgt='*', fun='cmd.run',
+ args='service ntp stop; ntpd -gq; service ntp start')
+ # STEP #1
+ show_step(1)
+ osd_node_names = underlay.get_target_node_names(
+ target='osd')
+
+ # STEP #2
+ show_step(2)
+ # Get the ceph health output before restart
+ health_before = self.get_ceph_health(underlay, osd_node_names)
+ assert all(["OK" in p for n, p in health_before.items()]), (
+ "'Ceph health is not ok from node: {0}".format(health_before))
+
+ # STEP #3
+ show_step(3)
+ openstack_actions.warm_restart_nodes('osd01')
+
+ openstack_actions._salt.local(
+ tgt='*', fun='cmd.run',
+ args='service ntp stop; ntpd -gq; service ntp start')
+
+ # STEP #4
+ show_step(4)
+ # Get the ceph health output after restart
+ health_after = self.get_ceph_health(underlay, osd_node_names)
+ assert all(["OK" in p for n, p in health_before.items()]), (
+ "'Ceph health is not ok from node: {0}".format(health_after))
+
+ rally.run_container()
+
+ # STEP #5
+ show_step(5)
+ results = rally.run_tempest(pattern='set=smoke',
+ conf_name='/var/lib/ceph_mcp.conf',
+ report_prefix=func_name,
+ designate_plugin=False,
+ timeout=1800)
+ # Step #6
+ show_step(6)
+ assert not results['fail'], self.show_failed_msg(results['fail'])
+
+ LOG.info("*************** DONE **************")
+
+ @pytest.mark.grab_versions
+ @pytest.mark.fail_snapshot
+ def test_restart_cmn_node(self, func_name, underlay, config,
+ openstack_deployed, ceph_deployed,
+ common_services_actions,
+ salt_actions, openstack_actions,
+ rally, show_step):
+ """Test restart ceph cmn node
+
+ Scenario:
+ 1. Find ceph cmn nodes
+ 2. Check ceph health before restart
+ 3. Restart 1 ceph cmn node
+ 4. Check ceph health after restart
+ 5. Run tempest smoke after failover
+ 6. Check tempest report for failed tests
+
+ Requiremets:
+ - Salt cluster
+ - OpenStack cluster
+ - Ceph cluster
+ """
+ openstack_actions._salt.local(
+ tgt='*', fun='cmd.run',
+ args='service ntp stop; ntpd -gq; service ntp start')
+ # STEP #1
+ show_step(1)
+ cmn_node_names = underlay.get_target_node_names(
+ target='cmn')
+
+ # STEP #2
+ show_step(2)
+ # Get the ceph health output before restart
+ health_before = self.get_ceph_health(underlay, cmn_node_names)
+ assert all(["OK" in p for n, p in health_before.items()]), (
+ "'Ceph health is not ok from node: {0}".format(health_before))
+
+ # STEP #3
+ show_step(3)
+ openstack_actions.warm_restart_nodes('cmn01')
+
+ openstack_actions._salt.local(
+ tgt='*', fun='cmd.run',
+ args='service ntp stop; ntpd -gq; service ntp start')
+
+ # STEP #4
+ show_step(4)
+ # Get the ceph health output after restart
+ health_after = self.get_ceph_health(underlay, cmn_node_names)
+ assert all(["OK" in p for n, p in health_before.items()]), (
+ "'Ceph health is not ok from node: {0}".format(health_after))
+
+ rally.run_container()
+
+ # STEP #5
+ show_step(5)
+ results = rally.run_tempest(pattern='set=smoke',
+ conf_name='/var/lib/ceph_mcp.conf',
+ report_prefix=func_name,
+ designate_plugin=False,
+ timeout=1800)
+ # Step #6
+ show_step(6)
+ assert not results['fail'], self.show_failed_msg(results['fail'])
+
+ LOG.info("*************** DONE **************")
+
+ @pytest.mark.grab_versions
+ @pytest.mark.fail_snapshot
+ def test_restart_rgw_node(self, func_name, underlay, config,
+ openstack_deployed, ceph_deployed,
+ common_services_actions,
+ salt_actions, openstack_actions,
+ rally, show_step):
+ """Test restart ceph rgw node
+
+ Scenario:
+ 1. Find ceph rgw nodes
+ 2. Check ceph health before restart
+ 3. Restart 1 ceph rgw node
+ 4. Check ceph health after restart
+ 5. Run tempest smoke after failover
+ 6. Check tempest report for failed tests
+
+ Requiremets:
+ - Salt cluster
+ - OpenStack cluster
+ - Ceph cluster
+ """
+ openstack_actions._salt.local(
+ tgt='*', fun='cmd.run',
+ args='service ntp stop; ntpd -gq; service ntp start')
+
+ # STEP #1
+ show_step(1)
+ rgw_node_names = underlay.get_target_node_names(
+ target='rgw')
+ if not rgw_node_names:
+ pytest.skip('Skip as there are not rgw nodes in deploy')
+
+ # STEP #2
+ show_step(2)
+ # Get the ceph health output before restart
+ health_before = self.get_ceph_health(underlay, rgw_node_names)
+ assert all(["OK" in p for n, p in health_before.items()]), (
+ "'Ceph health is not ok from node: {0}".format(health_before))
+
+ # STEP #3
+ show_step(3)
+ openstack_actions.warm_restart_nodes('rgw01')
+
+ openstack_actions._salt.local(
+ tgt='*', fun='cmd.run',
+ args='service ntp stop; ntpd -gq; service ntp start')
+
+ # STEP #4
+ show_step(4)
+ # Get the ceph health output after restart
+ health_after = self.get_ceph_health(underlay, rgw_node_names)
+ assert all(["OK" in p for n, p in health_before.items()]), (
+ "'Ceph health is not ok from node: {0}".format(health_after))
+
+ rally.run_container()
+
+ # STEP #5
+ show_step(5)
+ results = rally.run_tempest(pattern='set=smoke',
+ conf_name='/var/lib/ceph_mcp.conf',
+ designate_plugin=False,
+ report_prefix=func_name,
+ timeout=1800)
+ # Step #6
+ show_step(6)
+ assert not results['fail'], self.show_failed_msg(results['fail'])
+
+ LOG.info("*************** DONE **************")