blob: a89d711350290c5ca603f201edbcfdc8a5bc7691 [file] [log] [blame]
Tatyana Leontovichc72604d2018-01-04 17:58:00 +02001# Copyright 2017 Mirantis, Inc.
2#
3# Licensed under the Apache License, Version 2.0 (the "License"); you may
4# not use this file except in compliance with the License. You may obtain
5# a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12# License for the specific language governing permissions and limitations
13# under the License.
14import pytest
15
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +020016from devops.helpers import helpers
Tatyana Leontovichc72604d2018-01-04 17:58:00 +020017from tcp_tests import logger
18
19LOG = logger.logger
20
21
22class TestFailoverCeph(object):
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +020023 """Test class for testing MCP Ceph failover"""
Tatyana Leontovichc72604d2018-01-04 17:58:00 +020024
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +020025 TEMPEST_JOB_NAME = 'cvp-tempest'
26 TEMPEST_JOB_PARAMETERS = {
27 'TEMPEST_ENDPOINT_TYPE': 'internalURL',
28 'TEMPEST_TEST_PATTERN': 'set=smoke'
29 }
Tatyana Leontovichc72604d2018-01-04 17:58:00 +020030
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +020031 SANITY_JOB_NAME = 'cvp-sanity'
32 SANITY_JOB_PARAMETERS = {
33 'EXTRA_PARAMS': {
34 'envs': [
35 "tests_set=-k "
36 "'not test_ceph_health and not test_prometheus_alert_count'"
37 ]
38 }
39 }
40
41 JENKINS_START_TIMEOUT = 60
42 JENKINS_BUILD_TIMEOUT = 60 * 15
43
44 def get_ceph_health(self, ssh, node_names):
45 """Get Ceph health status on specified nodes
46
47 :param ssh: UnderlaySSHManager, tcp-qa SSH manager instance
48 :param node_names: list, full hostnames of Ceph OSD nodes
49 :return: dict, Ceph health status from each OSD node (output of
50 'ceph -s' command executed on each node)
Tatyana Leontovichc72604d2018-01-04 17:58:00 +020051 """
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +020052 return {
53 node_name: ssh.check_call(
54 "ceph -s",
55 node_name=node_name,
56 raise_on_err=False)['stdout_str']
Tatyana Leontovichc72604d2018-01-04 17:58:00 +020057 for node_name in node_names
58 }
Tatyana Leontovichc72604d2018-01-04 17:58:00 +020059
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +020060 @pytest.mark.grab_versions
61 @pytest.mark.restart_osd_node
62 def test_restart_osd_node(
63 self,
64 salt_actions,
Hanna Arhipova17b2c102019-09-06 16:44:17 +030065 drivetrain_actions,
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +020066 underlay_actions,
67 show_step):
68 """Verify that Ceph OSD node is not affected by system restart
69
70 Scenario:
71 1. Find Ceph OSD nodes
72 2. Check Ceph cluster health before node restart (skipped until
73 PROD-31374 is fixed)
74 3. Restart 1 Ceph OSD node
75 4. Check Ceph cluster health after node restart (skipped until
76 PROD-31374 is fixed)
77 5. Run Tempest smoke test suite
78 6. Run test_ceph_status.py::test_ceph_osd and
79 test_services.py::test_check_services[osd] sanity tests
80
81 Duration: ~9 min
82 """
83 salt = salt_actions
84 ssh = underlay_actions
Hanna Arhipova17b2c102019-09-06 16:44:17 +030085 dt = drivetrain_actions
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +020086
87 # Find Ceph OSD nodes
88 show_step(1)
89 tgt = "I@ceph:osd"
90 osd_hosts = salt.local(tgt, "test.ping")['return'][0].keys()
91 # Select a node for the test
92 osd_host = osd_hosts[0]
93
94 # Check Ceph cluster health before node restart
95 show_step(2)
96 ceph_health = self.get_ceph_health(ssh, osd_hosts)
97 # FIXME: uncomment the check once PROD-31374 is fixed
98 # status = all(
99 # ["OK" in status for node, status in ceph_health.items()])
100 # assert status, "Ceph health is not OK: {0}".format(ceph_health)
101
102 # Restart a Ceph OSD node
103 show_step(3)
104 LOG.info("Sending reboot command to '{}' node.".format(osd_host))
105 remote = ssh.remote(node_name=osd_host)
106 remote.execute_async("/sbin/shutdown -r now")
107
108 # Wait for restarted node to boot and become accessible
109 helpers.wait_pass(remote.reconnect, timeout=60 * 3, interval=5)
110 echo_request = "echo"
111 echo_response = salt.local(
112 osd_host, "test.echo", echo_request)['return'][0]
113 assert echo_request == echo_response[osd_host], (
114 "Minion on node '{}' node is not responding after node "
115 "reboot.".format(osd_host)
116 )
117 LOG.info("'{}' node is back after reboot.".format(osd_host))
118
119 # Check Ceph cluster health after node restart
120 show_step(4)
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300121 ceph_health = self.get_ceph_health(ssh, osd_hosts) # noqa
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200122 # FIXME: uncomment the check once PROD-31374 is fixed
123 # status = all(
124 # ["OK" in status for node, status in ceph_health.items()])
125 # assert status, "Ceph health is not OK: {0}".format(ceph_health)
126
127 # Run Tempest smoke test suite
128 show_step(5)
Hanna Arhipova7f2a1752021-04-22 09:59:11 +0300129 job_result, job_description = dt.start_job_on_jenkins(
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300130 job_name=self.TEMPEST_JOB_NAME,
131 job_parameters=self.TEMPEST_JOB_PARAMETERS,
132 start_timeout=self.JENKINS_START_TIMEOUT,
133 build_timeout=self.JENKINS_BUILD_TIMEOUT
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200134 )
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300135
Hanna Arhipova7f2a1752021-04-22 09:59:11 +0300136 assert job_result == 'SUCCESS', (
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200137 "'{0}' job run status is {1} after executing Tempest smoke "
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300138 "tests".format(
Hanna Arhipova7f2a1752021-04-22 09:59:11 +0300139 self.TEMPEST_JOB_NAME, job_description)
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200140 )
141
142 # Run Sanity test
143 show_step(6)
Hanna Arhipova7f2a1752021-04-22 09:59:11 +0300144 job_result, job_description = dt.start_job_on_jenkins(
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300145 job_name=self.SANITY_JOB_NAME,
146 job_parameters=self.SANITY_JOB_PARAMETERS,
147 start_timeout=self.JENKINS_START_TIMEOUT,
148 build_timeout=self.JENKINS_BUILD_TIMEOUT
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200149 )
Hanna Arhipova7f2a1752021-04-22 09:59:11 +0300150 assert job_result == 'SUCCESS', (
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200151 "'{0}' job run status is {1} after executing selected sanity "
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300152 "tests".format(
Hanna Arhipova7f2a1752021-04-22 09:59:11 +0300153 self.SANITY_JOB_NAME, job_description)
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200154 )
155
156 @pytest.mark.grab_versions
157 @pytest.mark.restart_cmn_node
158 def test_restart_cmn_node(
159 self,
160 salt_actions,
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300161 drivetrain_actions,
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200162 underlay_actions,
163 show_step):
164 """Verify that Ceph CMN node is not affected by system restart
165
166 Scenario:
167 1. Find Ceph CMN nodes
168 2. Check Ceph cluster health before node restart (skipped until
169 PROD-31374 is fixed)
170 3. Restart 1 Ceph CMN node
171 4. Check Ceph cluster health after node restart (skipped until
172 PROD-31374 is fixed)
173 5. Run Tempest smoke test suite
174 6. Run test_ceph_status.py::test_ceph_replicas and
175 test_services.py::test_check_services[cmn] sanity tests
176
177 Duration: ~9 min
178 """
179 salt = salt_actions
180 ssh = underlay_actions
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300181 dt = drivetrain_actions
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200182
183 # Find Ceph CMN nodes
184 show_step(1)
185 tgt = "I@ceph:mon"
186 cmn_hosts = salt.local(tgt, "test.ping")['return'][0].keys()
187 # Select a node for the test
188 cmn_host = cmn_hosts[0]
189
190 # Check Ceph cluster health before node restart
191 show_step(2)
192 ceph_health = self.get_ceph_health(ssh, cmn_hosts)
193 # FIXME: uncomment the check once PROD-31374 is fixed
194 # status = all(
195 # ["OK" in status for node, status in ceph_health.items()])
196 # assert status, "Ceph health is not OK: {0}".format(ceph_health)
197
198 # Restart a Ceph CMN node
199 show_step(3)
200 LOG.info("Sending reboot command to '{}' node.".format(cmn_host))
201 remote = ssh.remote(node_name=cmn_host)
202 remote.execute_async("/sbin/shutdown -r now")
203
204 # Wait for restarted node to boot and become accessible
205 helpers.wait_pass(remote.reconnect, timeout=60 * 3, interval=5)
206 echo_request = "echo"
207 echo_response = salt.local(
208 cmn_host, "test.echo", echo_request)['return'][0]
209 assert echo_request == echo_response[cmn_host], (
210 "Minion on node '{}' node is not responding after node "
211 "reboot.".format(cmn_host)
212 )
213 LOG.info("'{}' node is back after reboot.".format(cmn_host))
214
215 # Check Ceph cluster health after node restart
216 show_step(4)
217 ceph_health = self.get_ceph_health(ssh, cmn_hosts) # noqa
218 # FIXME: uncomment the check once PROD-31374 is fixed
219 # status = all(
220 # ["OK" in status for node, status in ceph_health.items()])
221 # assert status, "Ceph health is not OK: {0}".format(ceph_health)
222
223 # Run Tempest smoke test suite
224 show_step(5)
Hanna Arhipova7f2a1752021-04-22 09:59:11 +0300225 job_result, job_description = dt.start_job_on_jenkins(
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300226 job_name=self.TEMPEST_JOB_NAME,
227 job_parameters=self.TEMPEST_JOB_PARAMETERS,
228 start_timeout=self.JENKINS_START_TIMEOUT,
229 build_timeout=self.JENKINS_BUILD_TIMEOUT
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200230 )
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300231
Hanna Arhipova7f2a1752021-04-22 09:59:11 +0300232 assert job_result == 'SUCCESS', (
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200233 "'{0}' job run status is {1} after executing Tempest smoke "
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300234 "tests".format(
Hanna Arhipova7f2a1752021-04-22 09:59:11 +0300235 self.TEMPEST_JOB_NAME, job_description)
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200236 )
237
238 # Run Sanity test
239 show_step(6)
Hanna Arhipova7f2a1752021-04-22 09:59:11 +0300240 job_result, job_description = dt.start_job_on_jenkins(
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300241 job_name=self.SANITY_JOB_NAME,
242 job_parameters=self.SANITY_JOB_PARAMETERS,
243 start_timeout=self.JENKINS_START_TIMEOUT,
244 build_timeout=self.JENKINS_BUILD_TIMEOUT
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200245 )
Hanna Arhipova7f2a1752021-04-22 09:59:11 +0300246 assert job_result == 'SUCCESS', (
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200247 "'{0}' job run status is {1} after executing selected sanity "
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300248 "tests".format(
Hanna Arhipova7f2a1752021-04-22 09:59:11 +0300249 self.SANITY_JOB_NAME, job_description)
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200250 )
251
252 @pytest.mark.grab_versions
253 @pytest.mark.restart_rgw_node
254 def test_restart_rgw_node(
255 self,
256 salt_actions,
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300257 drivetrain_actions,
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200258 underlay_actions,
259 show_step):
260 """Verify that Ceph RGW node is not affected by system restart
261
262 Scenario:
263 1. Find Ceph RGW nodes
264 2. Check Ceph cluster health before node restart (skipped until
265 PROD-31374 is fixed)
266 3. Restart 1 Ceph RGW node
267 4. Check Ceph cluster health after node restart (skipped until
268 PROD-31374 is fixed)
269 5. Run Tempest smoke test suite
270 6. Run test_services.py::test_check_services[rgw] sanity test
271
272 Duration: ~9 min
273 """
274 salt = salt_actions
275 ssh = underlay_actions
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300276 dt = drivetrain_actions
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200277
278 # Find Ceph RGW nodes
279 show_step(1)
280 tgt = "I@ceph:radosgw"
281 rgw_hosts = salt.local(tgt, "test.ping")['return'][0].keys()
282 # Select a node for the test
283 rgw_host = rgw_hosts[0]
284
285 # Check Ceph cluster health before node restart
286 show_step(2)
287 ceph_health = self.get_ceph_health(ssh, rgw_hosts)
288 # FIXME: uncomment the check once PROD-31374 is fixed
289 # status = all(
290 # ["OK" in status for node, status in ceph_health.items()])
291 # assert status, "Ceph health is not OK: {0}".format(ceph_health)
292
293 # Restart a Ceph RGW node
294 show_step(3)
295 LOG.info("Sending reboot command to '{}' node.".format(rgw_host))
296 remote = ssh.remote(node_name=rgw_host)
297 remote.execute_async("/sbin/shutdown -r now")
298
299 # Wait for restarted node to boot and become accessible
300 helpers.wait_pass(remote.reconnect, timeout=60 * 3, interval=5)
301 echo_request = "echo"
302 echo_response = salt.local(
303 rgw_host, "test.echo", echo_request)['return'][0]
304 assert echo_request == echo_response[rgw_host], (
305 "Minion on node '{}' node is not responding after node "
306 "reboot.".format(rgw_host)
307 )
308 LOG.info("'{}' node is back after reboot.".format(rgw_host))
309
310 # Check Ceph cluster health after node restart
311 show_step(4)
312 ceph_health = self.get_ceph_health(ssh, rgw_hosts) # noqa
313 # FIXME: uncomment the check once PROD-31374 is fixed
314 # status = all(
315 # ["OK" in status for node, status in ceph_health.items()])
316 # assert status, "Ceph health is not OK: {0}".format(ceph_health)
317
318 # Run Tempest smoke test suite
319 show_step(5)
Hanna Arhipova7f2a1752021-04-22 09:59:11 +0300320 job_result, job_description = dt.start_job_on_jenkins(
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300321 job_name=self.TEMPEST_JOB_NAME,
322 job_parameters=self.TEMPEST_JOB_PARAMETERS,
323 start_timeout=self.JENKINS_START_TIMEOUT,
324 build_timeout=self.JENKINS_BUILD_TIMEOUT
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200325 )
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300326
Hanna Arhipova7f2a1752021-04-22 09:59:11 +0300327 assert job_result == 'SUCCESS', (
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200328 "'{0}' job run status is {1} after executing Tempest smoke "
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300329 "tests".format(
Hanna Arhipova7f2a1752021-04-22 09:59:11 +0300330 self.TEMPEST_JOB_NAME, job_description)
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200331 )
332
333 # Run Sanity test
334 show_step(6)
Hanna Arhipova7f2a1752021-04-22 09:59:11 +0300335 job_result, job_description = dt.start_job_on_jenkins(
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300336 job_name=self.SANITY_JOB_NAME,
337 job_parameters=self.SANITY_JOB_PARAMETERS,
338 start_timeout=self.JENKINS_START_TIMEOUT,
339 build_timeout=self.JENKINS_BUILD_TIMEOUT
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200340 )
Hanna Arhipova7f2a1752021-04-22 09:59:11 +0300341 assert job_result == 'SUCCESS', (
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200342 "'{0}' job run status is {1} after executing selected sanity "
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300343 "tests".format(
Hanna Arhipova7f2a1752021-04-22 09:59:11 +0300344 self.SANITY_JOB_NAME, job_description)
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200345 )
346
347 # #######################################################################
348 # ############# Tests for fuel-devops deployed environments #############
349 # #######################################################################
Tatyana Leontovichc72604d2018-01-04 17:58:00 +0200350 def show_failed_msg(self, failed):
351 return "There are failed tempest tests:\n\n {0}".format(
352 '\n\n '.join([(name + ': ' + detail)
353 for name, detail in failed.items()]))
354
355 @pytest.mark.grab_versions
356 @pytest.mark.fail_snapshot
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200357 def _test_restart_osd_node(self, func_name, underlay, config,
358 openstack_deployed, ceph_deployed,
359 openstack_actions, hardware,
360 rally, show_step):
Tatyana Leontovichc72604d2018-01-04 17:58:00 +0200361 """Test restart ceph osd node
362
363 Scenario:
364 1. Find ceph osd nodes
365 2. Check ceph health before restart
366 3. Restart 1 ceph osd node
367 4. Check ceph health after restart
368 5. Run tempest smoke after failover
369 6. Check tempest report for failed tests
370
371 Requiremets:
372 - Salt cluster
373 - OpenStack cluster
374 - Ceph cluster
375 """
376 openstack_actions._salt.local(
377 tgt='*', fun='cmd.run',
378 args='service ntp stop; ntpd -gq; service ntp start')
379 # STEP #1
380 show_step(1)
381 osd_node_names = underlay.get_target_node_names(
382 target='osd')
383
384 # STEP #2
385 show_step(2)
386 # Get the ceph health output before restart
387 health_before = self.get_ceph_health(underlay, osd_node_names)
388 assert all(["OK" in p for n, p in health_before.items()]), (
389 "'Ceph health is not ok from node: {0}".format(health_before))
390
391 # STEP #3
392 show_step(3)
Vladimir Jigulinee1faa52018-06-25 13:00:51 +0400393 hardware.warm_restart_nodes(underlay, 'osd01')
Tatyana Leontovichc72604d2018-01-04 17:58:00 +0200394
395 openstack_actions._salt.local(
396 tgt='*', fun='cmd.run',
397 args='service ntp stop; ntpd -gq; service ntp start')
398
399 # STEP #4
400 show_step(4)
401 # Get the ceph health output after restart
402 health_after = self.get_ceph_health(underlay, osd_node_names)
403 assert all(["OK" in p for n, p in health_before.items()]), (
404 "'Ceph health is not ok from node: {0}".format(health_after))
405
406 rally.run_container()
407
408 # STEP #5
409 show_step(5)
410 results = rally.run_tempest(pattern='set=smoke',
411 conf_name='/var/lib/ceph_mcp.conf',
412 report_prefix=func_name,
413 designate_plugin=False,
414 timeout=1800)
415 # Step #6
416 show_step(6)
417 assert not results['fail'], self.show_failed_msg(results['fail'])
418
419 LOG.info("*************** DONE **************")
420
421 @pytest.mark.grab_versions
422 @pytest.mark.fail_snapshot
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200423 def _test_restart_cmn_node(self, func_name, underlay, config,
424 openstack_deployed, ceph_deployed,
425 core_actions,
426 salt_actions, openstack_actions,
427 rally, show_step, hardware):
Tatyana Leontovichc72604d2018-01-04 17:58:00 +0200428 """Test restart ceph cmn node
429
430 Scenario:
431 1. Find ceph cmn nodes
432 2. Check ceph health before restart
433 3. Restart 1 ceph cmn node
434 4. Check ceph health after restart
435 5. Run tempest smoke after failover
436 6. Check tempest report for failed tests
437
438 Requiremets:
439 - Salt cluster
440 - OpenStack cluster
441 - Ceph cluster
442 """
443 openstack_actions._salt.local(
444 tgt='*', fun='cmd.run',
445 args='service ntp stop; ntpd -gq; service ntp start')
446 # STEP #1
447 show_step(1)
448 cmn_node_names = underlay.get_target_node_names(
449 target='cmn')
450
451 # STEP #2
452 show_step(2)
453 # Get the ceph health output before restart
454 health_before = self.get_ceph_health(underlay, cmn_node_names)
455 assert all(["OK" in p for n, p in health_before.items()]), (
456 "'Ceph health is not ok from node: {0}".format(health_before))
457
458 # STEP #3
459 show_step(3)
Vladimir Jigulinee1faa52018-06-25 13:00:51 +0400460 hardware.warm_restart_nodes(underlay, 'cmn01')
Tatyana Leontovichc72604d2018-01-04 17:58:00 +0200461
462 openstack_actions._salt.local(
463 tgt='*', fun='cmd.run',
464 args='service ntp stop; ntpd -gq; service ntp start')
465
466 # STEP #4
467 show_step(4)
468 # Get the ceph health output after restart
469 health_after = self.get_ceph_health(underlay, cmn_node_names)
470 assert all(["OK" in p for n, p in health_before.items()]), (
471 "'Ceph health is not ok from node: {0}".format(health_after))
472
473 rally.run_container()
474
475 # STEP #5
476 show_step(5)
477 results = rally.run_tempest(pattern='set=smoke',
478 conf_name='/var/lib/ceph_mcp.conf',
479 report_prefix=func_name,
480 designate_plugin=False,
481 timeout=1800)
482 # Step #6
483 show_step(6)
484 assert not results['fail'], self.show_failed_msg(results['fail'])
485
486 LOG.info("*************** DONE **************")
487
488 @pytest.mark.grab_versions
489 @pytest.mark.fail_snapshot
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200490 def _test_restart_rgw_node(self, func_name, underlay, config,
491 openstack_deployed, ceph_deployed,
492 core_actions, hardware,
493 salt_actions, openstack_actions,
494 rally, show_step):
Tatyana Leontovichc72604d2018-01-04 17:58:00 +0200495 """Test restart ceph rgw node
496
497 Scenario:
498 1. Find ceph rgw nodes
499 2. Check ceph health before restart
500 3. Restart 1 ceph rgw node
501 4. Check ceph health after restart
502 5. Run tempest smoke after failover
503 6. Check tempest report for failed tests
504
505 Requiremets:
506 - Salt cluster
507 - OpenStack cluster
508 - Ceph cluster
509 """
510 openstack_actions._salt.local(
511 tgt='*', fun='cmd.run',
512 args='service ntp stop; ntpd -gq; service ntp start')
513
514 # STEP #1
515 show_step(1)
516 rgw_node_names = underlay.get_target_node_names(
517 target='rgw')
518 if not rgw_node_names:
519 pytest.skip('Skip as there are not rgw nodes in deploy')
520
521 # STEP #2
522 show_step(2)
523 # Get the ceph health output before restart
524 health_before = self.get_ceph_health(underlay, rgw_node_names)
525 assert all(["OK" in p for n, p in health_before.items()]), (
526 "'Ceph health is not ok from node: {0}".format(health_before))
527
528 # STEP #3
529 show_step(3)
Vladimir Jigulinee1faa52018-06-25 13:00:51 +0400530 hardware.warm_restart_nodes(underlay, 'rgw01')
Tatyana Leontovichc72604d2018-01-04 17:58:00 +0200531
532 openstack_actions._salt.local(
533 tgt='*', fun='cmd.run',
534 args='service ntp stop; ntpd -gq; service ntp start')
535
536 # STEP #4
537 show_step(4)
538 # Get the ceph health output after restart
539 health_after = self.get_ceph_health(underlay, rgw_node_names)
540 assert all(["OK" in p for n, p in health_before.items()]), (
541 "'Ceph health is not ok from node: {0}".format(health_after))
542
543 rally.run_container()
544
545 # STEP #5
546 show_step(5)
547 results = rally.run_tempest(pattern='set=smoke',
548 conf_name='/var/lib/ceph_mcp.conf',
549 designate_plugin=False,
550 report_prefix=func_name,
551 timeout=1800)
552 # Step #6
553 show_step(6)
554 assert not results['fail'], self.show_failed_msg(results['fail'])
555
556 LOG.info("*************** DONE **************")