Blame - tcp_tests/tests/system/test_failover_ceph.py - mcp/tcp-qa

blob: 6745f808523475f57a8298b5e867709c8bb768b5 [file] [log] [blame]

Tatyana Leontovich	c72604d	2018-01-04 17:58:00 +0200	[diff] [blame]	1	# Copyright 2017 Mirantis, Inc.
				2	#
				3	# Licensed under the Apache License, Version 2.0 (the "License"); you may
				4	# not use this file except in compliance with the License. You may obtain
				5	# a copy of the License at
				6	#
				7	# http://www.apache.org/licenses/LICENSE-2.0
				8	#
				9	# Unless required by applicable law or agreed to in writing, software
				10	# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
				11	# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
				12	# License for the specific language governing permissions and limitations
				13	# under the License.
				14	import pytest
				15
Dmitriy Kruglov	a1aecc2	2019-08-25 19:26:44 +0200	[diff] [blame]	16	from devops.helpers import helpers
Tatyana Leontovich	c72604d	2018-01-04 17:58:00 +0200	[diff] [blame]	17	from tcp_tests import logger
				18
				19	LOG = logger.logger
				20
				21
				22	class TestFailoverCeph(object):
Dmitriy Kruglov	a1aecc2	2019-08-25 19:26:44 +0200	[diff] [blame]	23	"""Test class for testing MCP Ceph failover"""
Tatyana Leontovich	c72604d	2018-01-04 17:58:00 +0200	[diff] [blame]	24
Dmitriy Kruglov	a1aecc2	2019-08-25 19:26:44 +0200	[diff] [blame]	25	TEMPEST_JOB_NAME = 'cvp-tempest'
				26	TEMPEST_JOB_PARAMETERS = {
				27	'TEMPEST_ENDPOINT_TYPE': 'internalURL',
				28	'TEMPEST_TEST_PATTERN': 'set=smoke'
				29	}
Tatyana Leontovich	c72604d	2018-01-04 17:58:00 +0200	[diff] [blame]	30
Dmitriy Kruglov	a1aecc2	2019-08-25 19:26:44 +0200	[diff] [blame]	31	SANITY_JOB_NAME = 'cvp-sanity'
				32	SANITY_JOB_PARAMETERS = {
				33	'EXTRA_PARAMS': {
				34	'envs': [
				35	"tests_set=-k "
				36	"'not test_ceph_health and not test_prometheus_alert_count'"
				37	]
				38	}
				39	}
				40
				41	JENKINS_START_TIMEOUT = 60
				42	JENKINS_BUILD_TIMEOUT = 60 * 15
				43
				44	def get_ceph_health(self, ssh, node_names):
				45	"""Get Ceph health status on specified nodes
				46
				47	:param ssh: UnderlaySSHManager, tcp-qa SSH manager instance
				48	:param node_names: list, full hostnames of Ceph OSD nodes
				49	:return: dict, Ceph health status from each OSD node (output of
				50	'ceph -s' command executed on each node)
Tatyana Leontovich	c72604d	2018-01-04 17:58:00 +0200	[diff] [blame]	51	"""
Dmitriy Kruglov	a1aecc2	2019-08-25 19:26:44 +0200	[diff] [blame]	52	return {
				53	node_name: ssh.check_call(
				54	"ceph -s",
				55	node_name=node_name,
				56	raise_on_err=False)['stdout_str']
Tatyana Leontovich	c72604d	2018-01-04 17:58:00 +0200	[diff] [blame]	57	for node_name in node_names
				58	}
Tatyana Leontovich	c72604d	2018-01-04 17:58:00 +0200	[diff] [blame]	59
Dmitriy Kruglov	a1aecc2	2019-08-25 19:26:44 +0200	[diff] [blame]	60	@pytest.mark.grab_versions
				61	@pytest.mark.restart_osd_node
				62	def test_restart_osd_node(
				63	self,
				64	salt_actions,
Hanna Arhipova	17b2c10	2019-09-06 16:44:17 +0300	[diff] [blame]	65	drivetrain_actions,
Dmitriy Kruglov	a1aecc2	2019-08-25 19:26:44 +0200	[diff] [blame]	66	underlay_actions,
				67	show_step):
				68	"""Verify that Ceph OSD node is not affected by system restart
				69
				70	Scenario:
				71	1. Find Ceph OSD nodes
				72	2. Check Ceph cluster health before node restart (skipped until
				73	PROD-31374 is fixed)
				74	3. Restart 1 Ceph OSD node
				75	4. Check Ceph cluster health after node restart (skipped until
				76	PROD-31374 is fixed)
				77	5. Run Tempest smoke test suite
				78	6. Run test_ceph_status.py::test_ceph_osd and
				79	test_services.py::test_check_services[osd] sanity tests
				80
				81	Duration: ~9 min
				82	"""
				83	salt = salt_actions
				84	ssh = underlay_actions
Hanna Arhipova	17b2c10	2019-09-06 16:44:17 +0300	[diff] [blame]	85	dt = drivetrain_actions
Dmitriy Kruglov	a1aecc2	2019-08-25 19:26:44 +0200	[diff] [blame]	86
				87	# Find Ceph OSD nodes
				88	show_step(1)
				89	tgt = "I@ceph:osd"
				90	osd_hosts = salt.local(tgt, "test.ping")['return'][0].keys()
				91	# Select a node for the test
				92	osd_host = osd_hosts[0]
				93
				94	# Check Ceph cluster health before node restart
				95	show_step(2)
				96	ceph_health = self.get_ceph_health(ssh, osd_hosts)
				97	# FIXME: uncomment the check once PROD-31374 is fixed
				98	# status = all(
				99	# ["OK" in status for node, status in ceph_health.items()])
				100	# assert status, "Ceph health is not OK: {0}".format(ceph_health)
				101
				102	# Restart a Ceph OSD node
				103	show_step(3)
				104	LOG.info("Sending reboot command to '{}' node.".format(osd_host))
				105	remote = ssh.remote(node_name=osd_host)
				106	remote.execute_async("/sbin/shutdown -r now")
				107
				108	# Wait for restarted node to boot and become accessible
				109	helpers.wait_pass(remote.reconnect, timeout=60 * 3, interval=5)
				110	echo_request = "echo"
				111	echo_response = salt.local(
				112	osd_host, "test.echo", echo_request)['return'][0]
				113	assert echo_request == echo_response[osd_host], (
				114	"Minion on node '{}' node is not responding after node "
				115	"reboot.".format(osd_host)
				116	)
				117	LOG.info("'{}' node is back after reboot.".format(osd_host))
				118
				119	# Check Ceph cluster health after node restart
				120	show_step(4)
Hanna Arhipova	17b2c10	2019-09-06 16:44:17 +0300	[diff] [blame]	121	ceph_health = self.get_ceph_health(ssh, osd_hosts) # noqa
Dmitriy Kruglov	a1aecc2	2019-08-25 19:26:44 +0200	[diff] [blame]	122	# FIXME: uncomment the check once PROD-31374 is fixed
				123	# status = all(
				124	# ["OK" in status for node, status in ceph_health.items()])
				125	# assert status, "Ceph health is not OK: {0}".format(ceph_health)
				126
				127	# Run Tempest smoke test suite
				128	show_step(5)
Hanna Arhipova	17b2c10	2019-09-06 16:44:17 +0300	[diff] [blame]	129	status = dt.start_job_on_cid_jenkins(
				130	job_name=self.TEMPEST_JOB_NAME,
				131	job_parameters=self.TEMPEST_JOB_PARAMETERS,
				132	start_timeout=self.JENKINS_START_TIMEOUT,
				133	build_timeout=self.JENKINS_BUILD_TIMEOUT
Dmitriy Kruglov	a1aecc2	2019-08-25 19:26:44 +0200	[diff] [blame]	134	)
Hanna Arhipova	17b2c10	2019-09-06 16:44:17 +0300	[diff] [blame]	135
Dmitriy Kruglov	a1aecc2	2019-08-25 19:26:44 +0200	[diff] [blame]	136	assert status == 'SUCCESS', (
				137	"'{0}' job run status is {1} after executing Tempest smoke "
Hanna Arhipova	17b2c10	2019-09-06 16:44:17 +0300	[diff] [blame]	138	"tests".format(
				139	self.TEMPEST_JOB_NAME, status)
Dmitriy Kruglov	a1aecc2	2019-08-25 19:26:44 +0200	[diff] [blame]	140	)
				141
				142	# Run Sanity test
				143	show_step(6)
Hanna Arhipova	17b2c10	2019-09-06 16:44:17 +0300	[diff] [blame]	144	status = dt.start_job_on_cid_jenkins(
				145	job_name=self.SANITY_JOB_NAME,
				146	job_parameters=self.SANITY_JOB_PARAMETERS,
				147	start_timeout=self.JENKINS_START_TIMEOUT,
				148	build_timeout=self.JENKINS_BUILD_TIMEOUT
Dmitriy Kruglov	a1aecc2	2019-08-25 19:26:44 +0200	[diff] [blame]	149	)
				150	assert status == 'SUCCESS', (
				151	"'{0}' job run status is {1} after executing selected sanity "
Hanna Arhipova	17b2c10	2019-09-06 16:44:17 +0300	[diff] [blame]	152	"tests".format(
				153	self.SANITY_JOB_NAME, status)
Dmitriy Kruglov	a1aecc2	2019-08-25 19:26:44 +0200	[diff] [blame]	154	)
				155
				156	@pytest.mark.grab_versions
				157	@pytest.mark.restart_cmn_node
				158	def test_restart_cmn_node(
				159	self,
				160	salt_actions,
Hanna Arhipova	17b2c10	2019-09-06 16:44:17 +0300	[diff] [blame]	161	drivetrain_actions,
Dmitriy Kruglov	a1aecc2	2019-08-25 19:26:44 +0200	[diff] [blame]	162	underlay_actions,
				163	show_step):
				164	"""Verify that Ceph CMN node is not affected by system restart
				165
				166	Scenario:
				167	1. Find Ceph CMN nodes
				168	2. Check Ceph cluster health before node restart (skipped until
				169	PROD-31374 is fixed)
				170	3. Restart 1 Ceph CMN node
				171	4. Check Ceph cluster health after node restart (skipped until
				172	PROD-31374 is fixed)
				173	5. Run Tempest smoke test suite
				174	6. Run test_ceph_status.py::test_ceph_replicas and
				175	test_services.py::test_check_services[cmn] sanity tests
				176
				177	Duration: ~9 min
				178	"""
				179	salt = salt_actions
				180	ssh = underlay_actions
Hanna Arhipova	17b2c10	2019-09-06 16:44:17 +0300	[diff] [blame]	181	dt = drivetrain_actions
Dmitriy Kruglov	a1aecc2	2019-08-25 19:26:44 +0200	[diff] [blame]	182
				183	# Find Ceph CMN nodes
				184	show_step(1)
				185	tgt = "I@ceph:mon"
				186	cmn_hosts = salt.local(tgt, "test.ping")['return'][0].keys()
				187	# Select a node for the test
				188	cmn_host = cmn_hosts[0]
				189
				190	# Check Ceph cluster health before node restart
				191	show_step(2)
				192	ceph_health = self.get_ceph_health(ssh, cmn_hosts)
				193	# FIXME: uncomment the check once PROD-31374 is fixed
				194	# status = all(
				195	# ["OK" in status for node, status in ceph_health.items()])
				196	# assert status, "Ceph health is not OK: {0}".format(ceph_health)
				197
				198	# Restart a Ceph CMN node
				199	show_step(3)
				200	LOG.info("Sending reboot command to '{}' node.".format(cmn_host))
				201	remote = ssh.remote(node_name=cmn_host)
				202	remote.execute_async("/sbin/shutdown -r now")
				203
				204	# Wait for restarted node to boot and become accessible
				205	helpers.wait_pass(remote.reconnect, timeout=60 * 3, interval=5)
				206	echo_request = "echo"
				207	echo_response = salt.local(
				208	cmn_host, "test.echo", echo_request)['return'][0]
				209	assert echo_request == echo_response[cmn_host], (
				210	"Minion on node '{}' node is not responding after node "
				211	"reboot.".format(cmn_host)
				212	)
				213	LOG.info("'{}' node is back after reboot.".format(cmn_host))
				214
				215	# Check Ceph cluster health after node restart
				216	show_step(4)
				217	ceph_health = self.get_ceph_health(ssh, cmn_hosts) # noqa
				218	# FIXME: uncomment the check once PROD-31374 is fixed
				219	# status = all(
				220	# ["OK" in status for node, status in ceph_health.items()])
				221	# assert status, "Ceph health is not OK: {0}".format(ceph_health)
				222
				223	# Run Tempest smoke test suite
				224	show_step(5)
Hanna Arhipova	17b2c10	2019-09-06 16:44:17 +0300	[diff] [blame]	225	status = dt.start_job_on_cid_jenkins(
				226	job_name=self.TEMPEST_JOB_NAME,
				227	job_parameters=self.TEMPEST_JOB_PARAMETERS,
				228	start_timeout=self.JENKINS_START_TIMEOUT,
				229	build_timeout=self.JENKINS_BUILD_TIMEOUT
Dmitriy Kruglov	a1aecc2	2019-08-25 19:26:44 +0200	[diff] [blame]	230	)
Hanna Arhipova	17b2c10	2019-09-06 16:44:17 +0300	[diff] [blame]	231
Dmitriy Kruglov	a1aecc2	2019-08-25 19:26:44 +0200	[diff] [blame]	232	assert status == 'SUCCESS', (
				233	"'{0}' job run status is {1} after executing Tempest smoke "
Hanna Arhipova	17b2c10	2019-09-06 16:44:17 +0300	[diff] [blame]	234	"tests".format(
				235	self.TEMPEST_JOB_NAME, status)
Dmitriy Kruglov	a1aecc2	2019-08-25 19:26:44 +0200	[diff] [blame]	236	)
				237
				238	# Run Sanity test
				239	show_step(6)
Hanna Arhipova	17b2c10	2019-09-06 16:44:17 +0300	[diff] [blame]	240	status = dt.start_job_on_cid_jenkins(
				241	job_name=self.SANITY_JOB_NAME,
				242	job_parameters=self.SANITY_JOB_PARAMETERS,
				243	start_timeout=self.JENKINS_START_TIMEOUT,
				244	build_timeout=self.JENKINS_BUILD_TIMEOUT
Dmitriy Kruglov	a1aecc2	2019-08-25 19:26:44 +0200	[diff] [blame]	245	)
				246	assert status == 'SUCCESS', (
				247	"'{0}' job run status is {1} after executing selected sanity "
Hanna Arhipova	17b2c10	2019-09-06 16:44:17 +0300	[diff] [blame]	248	"tests".format(
				249	self.SANITY_JOB_NAME, status)
Dmitriy Kruglov	a1aecc2	2019-08-25 19:26:44 +0200	[diff] [blame]	250	)
				251
				252	@pytest.mark.grab_versions
				253	@pytest.mark.restart_rgw_node
				254	def test_restart_rgw_node(
				255	self,
				256	salt_actions,
Hanna Arhipova	17b2c10	2019-09-06 16:44:17 +0300	[diff] [blame]	257	drivetrain_actions,
Dmitriy Kruglov	a1aecc2	2019-08-25 19:26:44 +0200	[diff] [blame]	258	underlay_actions,
				259	show_step):
				260	"""Verify that Ceph RGW node is not affected by system restart
				261
				262	Scenario:
				263	1. Find Ceph RGW nodes
				264	2. Check Ceph cluster health before node restart (skipped until
				265	PROD-31374 is fixed)
				266	3. Restart 1 Ceph RGW node
				267	4. Check Ceph cluster health after node restart (skipped until
				268	PROD-31374 is fixed)
				269	5. Run Tempest smoke test suite
				270	6. Run test_services.py::test_check_services[rgw] sanity test
				271
				272	Duration: ~9 min
				273	"""
				274	salt = salt_actions
				275	ssh = underlay_actions
Hanna Arhipova	17b2c10	2019-09-06 16:44:17 +0300	[diff] [blame]	276	dt = drivetrain_actions
Dmitriy Kruglov	a1aecc2	2019-08-25 19:26:44 +0200	[diff] [blame]	277
				278	# Find Ceph RGW nodes
				279	show_step(1)
				280	tgt = "I@ceph:radosgw"
				281	rgw_hosts = salt.local(tgt, "test.ping")['return'][0].keys()
				282	# Select a node for the test
				283	rgw_host = rgw_hosts[0]
				284
				285	# Check Ceph cluster health before node restart
				286	show_step(2)
				287	ceph_health = self.get_ceph_health(ssh, rgw_hosts)
				288	# FIXME: uncomment the check once PROD-31374 is fixed
				289	# status = all(
				290	# ["OK" in status for node, status in ceph_health.items()])
				291	# assert status, "Ceph health is not OK: {0}".format(ceph_health)
				292
				293	# Restart a Ceph RGW node
				294	show_step(3)
				295	LOG.info("Sending reboot command to '{}' node.".format(rgw_host))
				296	remote = ssh.remote(node_name=rgw_host)
				297	remote.execute_async("/sbin/shutdown -r now")
				298
				299	# Wait for restarted node to boot and become accessible
				300	helpers.wait_pass(remote.reconnect, timeout=60 * 3, interval=5)
				301	echo_request = "echo"
				302	echo_response = salt.local(
				303	rgw_host, "test.echo", echo_request)['return'][0]
				304	assert echo_request == echo_response[rgw_host], (
				305	"Minion on node '{}' node is not responding after node "
				306	"reboot.".format(rgw_host)
				307	)
				308	LOG.info("'{}' node is back after reboot.".format(rgw_host))
				309
				310	# Check Ceph cluster health after node restart
				311	show_step(4)
				312	ceph_health = self.get_ceph_health(ssh, rgw_hosts) # noqa
				313	# FIXME: uncomment the check once PROD-31374 is fixed
				314	# status = all(
				315	# ["OK" in status for node, status in ceph_health.items()])
				316	# assert status, "Ceph health is not OK: {0}".format(ceph_health)
				317
				318	# Run Tempest smoke test suite
				319	show_step(5)
Hanna Arhipova	17b2c10	2019-09-06 16:44:17 +0300	[diff] [blame]	320	status = dt.start_job_on_cid_jenkins(
				321	job_name=self.TEMPEST_JOB_NAME,
				322	job_parameters=self.TEMPEST_JOB_PARAMETERS,
				323	start_timeout=self.JENKINS_START_TIMEOUT,
				324	build_timeout=self.JENKINS_BUILD_TIMEOUT
Dmitriy Kruglov	a1aecc2	2019-08-25 19:26:44 +0200	[diff] [blame]	325	)
Hanna Arhipova	17b2c10	2019-09-06 16:44:17 +0300	[diff] [blame]	326
Dmitriy Kruglov	a1aecc2	2019-08-25 19:26:44 +0200	[diff] [blame]	327	assert status == 'SUCCESS', (
				328	"'{0}' job run status is {1} after executing Tempest smoke "
Hanna Arhipova	17b2c10	2019-09-06 16:44:17 +0300	[diff] [blame]	329	"tests".format(
				330	self.TEMPEST_JOB_NAME, status)
Dmitriy Kruglov	a1aecc2	2019-08-25 19:26:44 +0200	[diff] [blame]	331	)
				332
				333	# Run Sanity test
				334	show_step(6)
Hanna Arhipova	17b2c10	2019-09-06 16:44:17 +0300	[diff] [blame]	335	status = dt.start_job_on_cid_jenkins(
				336	job_name=self.SANITY_JOB_NAME,
				337	job_parameters=self.SANITY_JOB_PARAMETERS,
				338	start_timeout=self.JENKINS_START_TIMEOUT,
				339	build_timeout=self.JENKINS_BUILD_TIMEOUT
Dmitriy Kruglov	a1aecc2	2019-08-25 19:26:44 +0200	[diff] [blame]	340	)
				341	assert status == 'SUCCESS', (
				342	"'{0}' job run status is {1} after executing selected sanity "
Hanna Arhipova	17b2c10	2019-09-06 16:44:17 +0300	[diff] [blame]	343	"tests".format(
				344	self.SANITY_JOB_NAME, status)
Dmitriy Kruglov	a1aecc2	2019-08-25 19:26:44 +0200	[diff] [blame]	345	)
				346
				347	# #######################################################################
				348	# ############# Tests for fuel-devops deployed environments #############
				349	# #######################################################################
Tatyana Leontovich	c72604d	2018-01-04 17:58:00 +0200	[diff] [blame]	350	def show_failed_msg(self, failed):
				351	return "There are failed tempest tests:\n\n {0}".format(
				352	'\n\n '.join([(name + ': ' + detail)
				353	for name, detail in failed.items()]))
				354
				355	@pytest.mark.grab_versions
				356	@pytest.mark.fail_snapshot
Dmitriy Kruglov	a1aecc2	2019-08-25 19:26:44 +0200	[diff] [blame]	357	def _test_restart_osd_node(self, func_name, underlay, config,
				358	openstack_deployed, ceph_deployed,
				359	openstack_actions, hardware,
				360	rally, show_step):
Tatyana Leontovich	c72604d	2018-01-04 17:58:00 +0200	[diff] [blame]	361	"""Test restart ceph osd node
				362
				363	Scenario:
				364	1. Find ceph osd nodes
				365	2. Check ceph health before restart
				366	3. Restart 1 ceph osd node
				367	4. Check ceph health after restart
				368	5. Run tempest smoke after failover
				369	6. Check tempest report for failed tests
				370
				371	Requiremets:
				372	- Salt cluster
				373	- OpenStack cluster
				374	- Ceph cluster
				375	"""
				376	openstack_actions._salt.local(
				377	tgt='*', fun='cmd.run',
				378	args='service ntp stop; ntpd -gq; service ntp start')
				379	# STEP #1
				380	show_step(1)
				381	osd_node_names = underlay.get_target_node_names(
				382	target='osd')
				383
				384	# STEP #2
				385	show_step(2)
				386	# Get the ceph health output before restart
				387	health_before = self.get_ceph_health(underlay, osd_node_names)
				388	assert all(["OK" in p for n, p in health_before.items()]), (
				389	"'Ceph health is not ok from node: {0}".format(health_before))
				390
				391	# STEP #3
				392	show_step(3)
Vladimir Jigulin	ee1faa5	2018-06-25 13:00:51 +0400	[diff] [blame]	393	hardware.warm_restart_nodes(underlay, 'osd01')
Tatyana Leontovich	c72604d	2018-01-04 17:58:00 +0200	[diff] [blame]	394
				395	openstack_actions._salt.local(
				396	tgt='*', fun='cmd.run',
				397	args='service ntp stop; ntpd -gq; service ntp start')
				398
				399	# STEP #4
				400	show_step(4)
				401	# Get the ceph health output after restart
				402	health_after = self.get_ceph_health(underlay, osd_node_names)
				403	assert all(["OK" in p for n, p in health_before.items()]), (
				404	"'Ceph health is not ok from node: {0}".format(health_after))
				405
				406	rally.run_container()
				407
				408	# STEP #5
				409	show_step(5)
				410	results = rally.run_tempest(pattern='set=smoke',
				411	conf_name='/var/lib/ceph_mcp.conf',
				412	report_prefix=func_name,
				413	designate_plugin=False,
				414	timeout=1800)
				415	# Step #6
				416	show_step(6)
				417	assert not results['fail'], self.show_failed_msg(results['fail'])
				418
				419	LOG.info("************* DONE ************")
				420
				421	@pytest.mark.grab_versions
				422	@pytest.mark.fail_snapshot
Dmitriy Kruglov	a1aecc2	2019-08-25 19:26:44 +0200	[diff] [blame]	423	def _test_restart_cmn_node(self, func_name, underlay, config,
				424	openstack_deployed, ceph_deployed,
				425	core_actions,
				426	salt_actions, openstack_actions,
				427	rally, show_step, hardware):
Tatyana Leontovich	c72604d	2018-01-04 17:58:00 +0200	[diff] [blame]	428	"""Test restart ceph cmn node
				429
				430	Scenario:
				431	1. Find ceph cmn nodes
				432	2. Check ceph health before restart
				433	3. Restart 1 ceph cmn node
				434	4. Check ceph health after restart
				435	5. Run tempest smoke after failover
				436	6. Check tempest report for failed tests
				437
				438	Requiremets:
				439	- Salt cluster
				440	- OpenStack cluster
				441	- Ceph cluster
				442	"""
				443	openstack_actions._salt.local(
				444	tgt='*', fun='cmd.run',
				445	args='service ntp stop; ntpd -gq; service ntp start')
				446	# STEP #1
				447	show_step(1)
				448	cmn_node_names = underlay.get_target_node_names(
				449	target='cmn')
				450
				451	# STEP #2
				452	show_step(2)
				453	# Get the ceph health output before restart
				454	health_before = self.get_ceph_health(underlay, cmn_node_names)
				455	assert all(["OK" in p for n, p in health_before.items()]), (
				456	"'Ceph health is not ok from node: {0}".format(health_before))
				457
				458	# STEP #3
				459	show_step(3)
Vladimir Jigulin	ee1faa5	2018-06-25 13:00:51 +0400	[diff] [blame]	460	hardware.warm_restart_nodes(underlay, 'cmn01')
Tatyana Leontovich	c72604d	2018-01-04 17:58:00 +0200	[diff] [blame]	461
				462	openstack_actions._salt.local(
				463	tgt='*', fun='cmd.run',
				464	args='service ntp stop; ntpd -gq; service ntp start')
				465
				466	# STEP #4
				467	show_step(4)
				468	# Get the ceph health output after restart
				469	health_after = self.get_ceph_health(underlay, cmn_node_names)
				470	assert all(["OK" in p for n, p in health_before.items()]), (
				471	"'Ceph health is not ok from node: {0}".format(health_after))
				472
				473	rally.run_container()
				474
				475	# STEP #5
				476	show_step(5)
				477	results = rally.run_tempest(pattern='set=smoke',
				478	conf_name='/var/lib/ceph_mcp.conf',
				479	report_prefix=func_name,
				480	designate_plugin=False,
				481	timeout=1800)
				482	# Step #6
				483	show_step(6)
				484	assert not results['fail'], self.show_failed_msg(results['fail'])
				485
				486	LOG.info("************* DONE ************")
				487
				488	@pytest.mark.grab_versions
				489	@pytest.mark.fail_snapshot
Dmitriy Kruglov	a1aecc2	2019-08-25 19:26:44 +0200	[diff] [blame]	490	def _test_restart_rgw_node(self, func_name, underlay, config,
				491	openstack_deployed, ceph_deployed,
				492	core_actions, hardware,
				493	salt_actions, openstack_actions,
				494	rally, show_step):
Tatyana Leontovich	c72604d	2018-01-04 17:58:00 +0200	[diff] [blame]	495	"""Test restart ceph rgw node
				496
				497	Scenario:
				498	1. Find ceph rgw nodes
				499	2. Check ceph health before restart
				500	3. Restart 1 ceph rgw node
				501	4. Check ceph health after restart
				502	5. Run tempest smoke after failover
				503	6. Check tempest report for failed tests
				504
				505	Requiremets:
				506	- Salt cluster
				507	- OpenStack cluster
				508	- Ceph cluster
				509	"""
				510	openstack_actions._salt.local(
				511	tgt='*', fun='cmd.run',
				512	args='service ntp stop; ntpd -gq; service ntp start')
				513
				514	# STEP #1
				515	show_step(1)
				516	rgw_node_names = underlay.get_target_node_names(
				517	target='rgw')
				518	if not rgw_node_names:
				519	pytest.skip('Skip as there are not rgw nodes in deploy')
				520
				521	# STEP #2
				522	show_step(2)
				523	# Get the ceph health output before restart
				524	health_before = self.get_ceph_health(underlay, rgw_node_names)
				525	assert all(["OK" in p for n, p in health_before.items()]), (
				526	"'Ceph health is not ok from node: {0}".format(health_before))
				527
				528	# STEP #3
				529	show_step(3)
Vladimir Jigulin	ee1faa5	2018-06-25 13:00:51 +0400	[diff] [blame]	530	hardware.warm_restart_nodes(underlay, 'rgw01')
Tatyana Leontovich	c72604d	2018-01-04 17:58:00 +0200	[diff] [blame]	531
				532	openstack_actions._salt.local(
				533	tgt='*', fun='cmd.run',
				534	args='service ntp stop; ntpd -gq; service ntp start')
				535
				536	# STEP #4
				537	show_step(4)
				538	# Get the ceph health output after restart
				539	health_after = self.get_ceph_health(underlay, rgw_node_names)
				540	assert all(["OK" in p for n, p in health_before.items()]), (
				541	"'Ceph health is not ok from node: {0}".format(health_after))
				542
				543	rally.run_container()
				544
				545	# STEP #5
				546	show_step(5)
				547	results = rally.run_tempest(pattern='set=smoke',
				548	conf_name='/var/lib/ceph_mcp.conf',
				549	designate_plugin=False,
				550	report_prefix=func_name,
				551	timeout=1800)
				552	# Step #6
				553	show_step(6)
				554	assert not results['fail'], self.show_failed_msg(results['fail'])
				555
				556	LOG.info("************* DONE ************")