blob: 02d7d28f5bcd9491d878dae3ee8e646a8ffb2312 [file] [log] [blame]
Tatyana Leontovichc72604d2018-01-04 17:58:00 +02001# Copyright 2017 Mirantis, Inc.
2#
3# Licensed under the Apache License, Version 2.0 (the "License"); you may
4# not use this file except in compliance with the License. You may obtain
5# a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12# License for the specific language governing permissions and limitations
13# under the License.
14import pytest
15
Hanna Arhipovac2cb6a52021-10-20 14:30:05 +030016import time
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +020017from devops.helpers import helpers
Tatyana Leontovichc72604d2018-01-04 17:58:00 +020018from tcp_tests import logger
19
20LOG = logger.logger
21
22
23class TestFailoverCeph(object):
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +020024 """Test class for testing MCP Ceph failover"""
Tatyana Leontovichc72604d2018-01-04 17:58:00 +020025
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +020026 TEMPEST_JOB_NAME = 'cvp-tempest'
27 TEMPEST_JOB_PARAMETERS = {
28 'TEMPEST_ENDPOINT_TYPE': 'internalURL',
29 'TEMPEST_TEST_PATTERN': 'set=smoke'
30 }
Tatyana Leontovichc72604d2018-01-04 17:58:00 +020031
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +020032 SANITY_JOB_NAME = 'cvp-sanity'
33 SANITY_JOB_PARAMETERS = {
34 'EXTRA_PARAMS': {
35 'envs': [
36 "tests_set=-k "
Hanna Arhipovac2cb6a52021-10-20 14:30:05 +030037 "'not salt_master and not test_ceph_health and not "
38 "test_prometheus_alert_count'"
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +020039 ]
40 }
41 }
42
43 JENKINS_START_TIMEOUT = 60
Hanna Arhipovac2cb6a52021-10-20 14:30:05 +030044 JENKINS_BUILD_TIMEOUT = 60 * 25
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +020045
46 def get_ceph_health(self, ssh, node_names):
47 """Get Ceph health status on specified nodes
48
49 :param ssh: UnderlaySSHManager, tcp-qa SSH manager instance
50 :param node_names: list, full hostnames of Ceph OSD nodes
51 :return: dict, Ceph health status from each OSD node (output of
52 'ceph -s' command executed on each node)
Tatyana Leontovichc72604d2018-01-04 17:58:00 +020053 """
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +020054 return {
55 node_name: ssh.check_call(
Hanna Arhipovac2cb6a52021-10-20 14:30:05 +030056 "ceph health",
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +020057 node_name=node_name,
58 raise_on_err=False)['stdout_str']
Tatyana Leontovichc72604d2018-01-04 17:58:00 +020059 for node_name in node_names
60 }
Tatyana Leontovichc72604d2018-01-04 17:58:00 +020061
Hanna Arhipovac2cb6a52021-10-20 14:30:05 +030062 def wait_healthy_ceph(self,
63 ssh,
64 node_names=None,
65 time_sec=30):
66 ceph_health = ""
67 status = False
68
69 start_time = time.time()
70 while time.time() - start_time < time_sec and not status:
71 ceph_health = self.get_ceph_health(ssh, node_names)
72 status = all(["HEALTH_OK"
73 in status
74 for node, status
75 in ceph_health.items()])
76 if status:
77 break
78 LOG.info("Retry getting ceph health because Ceph is unhealthy: {}"
79 .format(ceph_health))
80 time.sleep(10)
81
82 error = "" if status \
83 else "Ceph health is not OK: {0}".format(ceph_health)
84 return status, error
85
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +020086 @pytest.mark.grab_versions
87 @pytest.mark.restart_osd_node
88 def test_restart_osd_node(
89 self,
90 salt_actions,
Hanna Arhipova17b2c102019-09-06 16:44:17 +030091 drivetrain_actions,
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +020092 underlay_actions,
93 show_step):
94 """Verify that Ceph OSD node is not affected by system restart
95
96 Scenario:
97 1. Find Ceph OSD nodes
Hanna Arhipovac2cb6a52021-10-20 14:30:05 +030098 2. Check Ceph cluster health before node restart
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +020099 3. Restart 1 Ceph OSD node
Hanna Arhipovac2cb6a52021-10-20 14:30:05 +0300100 4. Check Ceph cluster health after node restart
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200101 5. Run Tempest smoke test suite
102 6. Run test_ceph_status.py::test_ceph_osd and
103 test_services.py::test_check_services[osd] sanity tests
104
105 Duration: ~9 min
106 """
107 salt = salt_actions
108 ssh = underlay_actions
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300109 dt = drivetrain_actions
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200110
111 # Find Ceph OSD nodes
112 show_step(1)
113 tgt = "I@ceph:osd"
114 osd_hosts = salt.local(tgt, "test.ping")['return'][0].keys()
115 # Select a node for the test
116 osd_host = osd_hosts[0]
117
118 # Check Ceph cluster health before node restart
119 show_step(2)
Hanna Arhipovac2cb6a52021-10-20 14:30:05 +0300120 result, error = self.wait_healthy_ceph(ssh=ssh,
121 node_names=osd_hosts)
122 assert result, error
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200123
124 # Restart a Ceph OSD node
125 show_step(3)
126 LOG.info("Sending reboot command to '{}' node.".format(osd_host))
127 remote = ssh.remote(node_name=osd_host)
128 remote.execute_async("/sbin/shutdown -r now")
129
130 # Wait for restarted node to boot and become accessible
131 helpers.wait_pass(remote.reconnect, timeout=60 * 3, interval=5)
132 echo_request = "echo"
133 echo_response = salt.local(
134 osd_host, "test.echo", echo_request)['return'][0]
135 assert echo_request == echo_response[osd_host], (
136 "Minion on node '{}' node is not responding after node "
137 "reboot.".format(osd_host)
138 )
139 LOG.info("'{}' node is back after reboot.".format(osd_host))
140
141 # Check Ceph cluster health after node restart
142 show_step(4)
Hanna Arhipovac2cb6a52021-10-20 14:30:05 +0300143 result, error = self.wait_healthy_ceph(ssh=ssh,
144 node_names=osd_hosts,
145 time_sec=120)
146 assert result, error
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200147
148 # Run Tempest smoke test suite
149 show_step(5)
Hanna Arhipova7f2a1752021-04-22 09:59:11 +0300150 job_result, job_description = dt.start_job_on_jenkins(
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300151 job_name=self.TEMPEST_JOB_NAME,
152 job_parameters=self.TEMPEST_JOB_PARAMETERS,
153 start_timeout=self.JENKINS_START_TIMEOUT,
154 build_timeout=self.JENKINS_BUILD_TIMEOUT
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200155 )
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300156
Hanna Arhipova7f2a1752021-04-22 09:59:11 +0300157 assert job_result == 'SUCCESS', (
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200158 "'{0}' job run status is {1} after executing Tempest smoke "
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300159 "tests".format(
Hanna Arhipova7f2a1752021-04-22 09:59:11 +0300160 self.TEMPEST_JOB_NAME, job_description)
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200161 )
162
163 # Run Sanity test
164 show_step(6)
Hanna Arhipova7f2a1752021-04-22 09:59:11 +0300165 job_result, job_description = dt.start_job_on_jenkins(
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300166 job_name=self.SANITY_JOB_NAME,
167 job_parameters=self.SANITY_JOB_PARAMETERS,
168 start_timeout=self.JENKINS_START_TIMEOUT,
169 build_timeout=self.JENKINS_BUILD_TIMEOUT
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200170 )
Hanna Arhipova7f2a1752021-04-22 09:59:11 +0300171 assert job_result == 'SUCCESS', (
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200172 "'{0}' job run status is {1} after executing selected sanity "
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300173 "tests".format(
Hanna Arhipova7f2a1752021-04-22 09:59:11 +0300174 self.SANITY_JOB_NAME, job_description)
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200175 )
176
177 @pytest.mark.grab_versions
178 @pytest.mark.restart_cmn_node
179 def test_restart_cmn_node(
180 self,
181 salt_actions,
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300182 drivetrain_actions,
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200183 underlay_actions,
184 show_step):
185 """Verify that Ceph CMN node is not affected by system restart
186
187 Scenario:
188 1. Find Ceph CMN nodes
Hanna Arhipovac2cb6a52021-10-20 14:30:05 +0300189 2. Check Ceph cluster health before node restart
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200190 3. Restart 1 Ceph CMN node
Hanna Arhipovac2cb6a52021-10-20 14:30:05 +0300191 4. Check Ceph cluster health after node restart
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200192 5. Run Tempest smoke test suite
193 6. Run test_ceph_status.py::test_ceph_replicas and
194 test_services.py::test_check_services[cmn] sanity tests
195
196 Duration: ~9 min
197 """
198 salt = salt_actions
199 ssh = underlay_actions
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300200 dt = drivetrain_actions
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200201
202 # Find Ceph CMN nodes
203 show_step(1)
204 tgt = "I@ceph:mon"
205 cmn_hosts = salt.local(tgt, "test.ping")['return'][0].keys()
206 # Select a node for the test
207 cmn_host = cmn_hosts[0]
208
209 # Check Ceph cluster health before node restart
210 show_step(2)
Hanna Arhipovac2cb6a52021-10-20 14:30:05 +0300211 result, error = self.wait_healthy_ceph(ssh=ssh,
212 node_names=cmn_hosts)
213 assert result, error
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200214
215 # Restart a Ceph CMN node
216 show_step(3)
217 LOG.info("Sending reboot command to '{}' node.".format(cmn_host))
218 remote = ssh.remote(node_name=cmn_host)
219 remote.execute_async("/sbin/shutdown -r now")
220
221 # Wait for restarted node to boot and become accessible
222 helpers.wait_pass(remote.reconnect, timeout=60 * 3, interval=5)
223 echo_request = "echo"
224 echo_response = salt.local(
225 cmn_host, "test.echo", echo_request)['return'][0]
226 assert echo_request == echo_response[cmn_host], (
227 "Minion on node '{}' node is not responding after node "
228 "reboot.".format(cmn_host)
229 )
230 LOG.info("'{}' node is back after reboot.".format(cmn_host))
231
232 # Check Ceph cluster health after node restart
233 show_step(4)
Hanna Arhipovac2cb6a52021-10-20 14:30:05 +0300234 result, error = self.wait_healthy_ceph(ssh=ssh,
235 node_names=cmn_hosts,
236 time_sec=120)
237 assert result, error
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200238
239 # Run Tempest smoke test suite
240 show_step(5)
Hanna Arhipova7f2a1752021-04-22 09:59:11 +0300241 job_result, job_description = dt.start_job_on_jenkins(
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300242 job_name=self.TEMPEST_JOB_NAME,
243 job_parameters=self.TEMPEST_JOB_PARAMETERS,
244 start_timeout=self.JENKINS_START_TIMEOUT,
245 build_timeout=self.JENKINS_BUILD_TIMEOUT
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200246 )
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300247
Hanna Arhipova7f2a1752021-04-22 09:59:11 +0300248 assert job_result == 'SUCCESS', (
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200249 "'{0}' job run status is {1} after executing Tempest smoke "
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300250 "tests".format(
Hanna Arhipova7f2a1752021-04-22 09:59:11 +0300251 self.TEMPEST_JOB_NAME, job_description)
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200252 )
253
254 # Run Sanity test
255 show_step(6)
Hanna Arhipova7f2a1752021-04-22 09:59:11 +0300256 job_result, job_description = dt.start_job_on_jenkins(
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300257 job_name=self.SANITY_JOB_NAME,
258 job_parameters=self.SANITY_JOB_PARAMETERS,
259 start_timeout=self.JENKINS_START_TIMEOUT,
260 build_timeout=self.JENKINS_BUILD_TIMEOUT
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200261 )
Hanna Arhipova7f2a1752021-04-22 09:59:11 +0300262 assert job_result == 'SUCCESS', (
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200263 "'{0}' job run status is {1} after executing selected sanity "
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300264 "tests".format(
Hanna Arhipova7f2a1752021-04-22 09:59:11 +0300265 self.SANITY_JOB_NAME, job_description)
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200266 )
267
268 @pytest.mark.grab_versions
269 @pytest.mark.restart_rgw_node
270 def test_restart_rgw_node(
271 self,
272 salt_actions,
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300273 drivetrain_actions,
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200274 underlay_actions,
275 show_step):
276 """Verify that Ceph RGW node is not affected by system restart
277
278 Scenario:
279 1. Find Ceph RGW nodes
Hanna Arhipovac2cb6a52021-10-20 14:30:05 +0300280 2. Check Ceph cluster health before node restart
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200281 3. Restart 1 Ceph RGW node
Hanna Arhipovac2cb6a52021-10-20 14:30:05 +0300282 4. Check Ceph cluster health after node restart
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200283 5. Run Tempest smoke test suite
284 6. Run test_services.py::test_check_services[rgw] sanity test
285
286 Duration: ~9 min
287 """
288 salt = salt_actions
289 ssh = underlay_actions
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300290 dt = drivetrain_actions
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200291
292 # Find Ceph RGW nodes
293 show_step(1)
294 tgt = "I@ceph:radosgw"
295 rgw_hosts = salt.local(tgt, "test.ping")['return'][0].keys()
296 # Select a node for the test
297 rgw_host = rgw_hosts[0]
298
299 # Check Ceph cluster health before node restart
300 show_step(2)
Hanna Arhipovac2cb6a52021-10-20 14:30:05 +0300301 result, error = self.wait_healthy_ceph(ssh=ssh,
302 node_names=rgw_hosts)
303 assert result, error
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200304
305 # Restart a Ceph RGW node
306 show_step(3)
307 LOG.info("Sending reboot command to '{}' node.".format(rgw_host))
308 remote = ssh.remote(node_name=rgw_host)
309 remote.execute_async("/sbin/shutdown -r now")
310
311 # Wait for restarted node to boot and become accessible
312 helpers.wait_pass(remote.reconnect, timeout=60 * 3, interval=5)
313 echo_request = "echo"
314 echo_response = salt.local(
315 rgw_host, "test.echo", echo_request)['return'][0]
316 assert echo_request == echo_response[rgw_host], (
317 "Minion on node '{}' node is not responding after node "
318 "reboot.".format(rgw_host)
319 )
320 LOG.info("'{}' node is back after reboot.".format(rgw_host))
321
322 # Check Ceph cluster health after node restart
323 show_step(4)
Hanna Arhipovac2cb6a52021-10-20 14:30:05 +0300324 result, error = self.wait_healthy_ceph(ssh=ssh,
325 node_names=rgw_hosts,
326 time_sec=120)
327
328 assert result, error
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200329
330 # Run Tempest smoke test suite
331 show_step(5)
Hanna Arhipova7f2a1752021-04-22 09:59:11 +0300332 job_result, job_description = dt.start_job_on_jenkins(
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300333 job_name=self.TEMPEST_JOB_NAME,
334 job_parameters=self.TEMPEST_JOB_PARAMETERS,
335 start_timeout=self.JENKINS_START_TIMEOUT,
336 build_timeout=self.JENKINS_BUILD_TIMEOUT
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200337 )
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300338
Hanna Arhipova7f2a1752021-04-22 09:59:11 +0300339 assert job_result == 'SUCCESS', (
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200340 "'{0}' job run status is {1} after executing Tempest smoke "
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300341 "tests".format(
Hanna Arhipova7f2a1752021-04-22 09:59:11 +0300342 self.TEMPEST_JOB_NAME, job_description)
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200343 )
344
345 # Run Sanity test
346 show_step(6)
Hanna Arhipova7f2a1752021-04-22 09:59:11 +0300347 job_result, job_description = dt.start_job_on_jenkins(
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300348 job_name=self.SANITY_JOB_NAME,
349 job_parameters=self.SANITY_JOB_PARAMETERS,
350 start_timeout=self.JENKINS_START_TIMEOUT,
351 build_timeout=self.JENKINS_BUILD_TIMEOUT
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200352 )
Hanna Arhipova7f2a1752021-04-22 09:59:11 +0300353 assert job_result == 'SUCCESS', (
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200354 "'{0}' job run status is {1} after executing selected sanity "
Hanna Arhipova17b2c102019-09-06 16:44:17 +0300355 "tests".format(
Hanna Arhipova7f2a1752021-04-22 09:59:11 +0300356 self.SANITY_JOB_NAME, job_description)
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200357 )
358
359 # #######################################################################
360 # ############# Tests for fuel-devops deployed environments #############
361 # #######################################################################
Tatyana Leontovichc72604d2018-01-04 17:58:00 +0200362 def show_failed_msg(self, failed):
363 return "There are failed tempest tests:\n\n {0}".format(
364 '\n\n '.join([(name + ': ' + detail)
365 for name, detail in failed.items()]))
366
367 @pytest.mark.grab_versions
368 @pytest.mark.fail_snapshot
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200369 def _test_restart_osd_node(self, func_name, underlay, config,
370 openstack_deployed, ceph_deployed,
371 openstack_actions, hardware,
372 rally, show_step):
Tatyana Leontovichc72604d2018-01-04 17:58:00 +0200373 """Test restart ceph osd node
374
375 Scenario:
376 1. Find ceph osd nodes
377 2. Check ceph health before restart
378 3. Restart 1 ceph osd node
379 4. Check ceph health after restart
380 5. Run tempest smoke after failover
381 6. Check tempest report for failed tests
382
383 Requiremets:
384 - Salt cluster
385 - OpenStack cluster
386 - Ceph cluster
387 """
388 openstack_actions._salt.local(
389 tgt='*', fun='cmd.run',
390 args='service ntp stop; ntpd -gq; service ntp start')
391 # STEP #1
392 show_step(1)
393 osd_node_names = underlay.get_target_node_names(
394 target='osd')
395
396 # STEP #2
397 show_step(2)
398 # Get the ceph health output before restart
Hanna Arhipovac2cb6a52021-10-20 14:30:05 +0300399 result, error = self.wait_healthy_ceph(ssh=underlay,
400 node_names=osd_node_names)
401 assert result, error
Tatyana Leontovichc72604d2018-01-04 17:58:00 +0200402
403 # STEP #3
404 show_step(3)
Vladimir Jigulinee1faa52018-06-25 13:00:51 +0400405 hardware.warm_restart_nodes(underlay, 'osd01')
Tatyana Leontovichc72604d2018-01-04 17:58:00 +0200406
407 openstack_actions._salt.local(
408 tgt='*', fun='cmd.run',
409 args='service ntp stop; ntpd -gq; service ntp start')
410
411 # STEP #4
412 show_step(4)
413 # Get the ceph health output after restart
Hanna Arhipovac2cb6a52021-10-20 14:30:05 +0300414 result, error = self.wait_healthy_ceph(ssh=underlay,
415 node_names=osd_node_names)
416
417 assert result, error
Tatyana Leontovichc72604d2018-01-04 17:58:00 +0200418
419 rally.run_container()
420
421 # STEP #5
422 show_step(5)
423 results = rally.run_tempest(pattern='set=smoke',
424 conf_name='/var/lib/ceph_mcp.conf',
425 report_prefix=func_name,
426 designate_plugin=False,
427 timeout=1800)
428 # Step #6
429 show_step(6)
430 assert not results['fail'], self.show_failed_msg(results['fail'])
431
432 LOG.info("*************** DONE **************")
433
434 @pytest.mark.grab_versions
435 @pytest.mark.fail_snapshot
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200436 def _test_restart_cmn_node(self, func_name, underlay, config,
437 openstack_deployed, ceph_deployed,
438 core_actions,
439 salt_actions, openstack_actions,
440 rally, show_step, hardware):
Tatyana Leontovichc72604d2018-01-04 17:58:00 +0200441 """Test restart ceph cmn node
442
443 Scenario:
444 1. Find ceph cmn nodes
445 2. Check ceph health before restart
446 3. Restart 1 ceph cmn node
447 4. Check ceph health after restart
448 5. Run tempest smoke after failover
449 6. Check tempest report for failed tests
450
451 Requiremets:
452 - Salt cluster
453 - OpenStack cluster
454 - Ceph cluster
455 """
456 openstack_actions._salt.local(
457 tgt='*', fun='cmd.run',
458 args='service ntp stop; ntpd -gq; service ntp start')
459 # STEP #1
460 show_step(1)
461 cmn_node_names = underlay.get_target_node_names(
462 target='cmn')
463
464 # STEP #2
465 show_step(2)
466 # Get the ceph health output before restart
Hanna Arhipovac2cb6a52021-10-20 14:30:05 +0300467 result, error = self.wait_healthy_ceph(ssh=underlay,
468 node_names=cmn_node_names)
469
470 assert result, error
Tatyana Leontovichc72604d2018-01-04 17:58:00 +0200471
472 # STEP #3
473 show_step(3)
Vladimir Jigulinee1faa52018-06-25 13:00:51 +0400474 hardware.warm_restart_nodes(underlay, 'cmn01')
Tatyana Leontovichc72604d2018-01-04 17:58:00 +0200475
476 openstack_actions._salt.local(
477 tgt='*', fun='cmd.run',
478 args='service ntp stop; ntpd -gq; service ntp start')
479
480 # STEP #4
481 show_step(4)
482 # Get the ceph health output after restart
Hanna Arhipovac2cb6a52021-10-20 14:30:05 +0300483 result, error = self.wait_healthy_ceph(ssh=underlay,
484 node_names=cmn_node_names,
485 time_sec=120)
486
487 assert result, error
Tatyana Leontovichc72604d2018-01-04 17:58:00 +0200488
489 rally.run_container()
490
491 # STEP #5
492 show_step(5)
493 results = rally.run_tempest(pattern='set=smoke',
494 conf_name='/var/lib/ceph_mcp.conf',
495 report_prefix=func_name,
496 designate_plugin=False,
497 timeout=1800)
498 # Step #6
499 show_step(6)
500 assert not results['fail'], self.show_failed_msg(results['fail'])
501
502 LOG.info("*************** DONE **************")
503
504 @pytest.mark.grab_versions
505 @pytest.mark.fail_snapshot
Dmitriy Kruglova1aecc22019-08-25 19:26:44 +0200506 def _test_restart_rgw_node(self, func_name, underlay, config,
507 openstack_deployed, ceph_deployed,
508 core_actions, hardware,
509 salt_actions, openstack_actions,
510 rally, show_step):
Tatyana Leontovichc72604d2018-01-04 17:58:00 +0200511 """Test restart ceph rgw node
512
513 Scenario:
514 1. Find ceph rgw nodes
515 2. Check ceph health before restart
516 3. Restart 1 ceph rgw node
517 4. Check ceph health after restart
518 5. Run tempest smoke after failover
519 6. Check tempest report for failed tests
520
521 Requiremets:
522 - Salt cluster
523 - OpenStack cluster
524 - Ceph cluster
525 """
526 openstack_actions._salt.local(
527 tgt='*', fun='cmd.run',
528 args='service ntp stop; ntpd -gq; service ntp start')
529
530 # STEP #1
531 show_step(1)
532 rgw_node_names = underlay.get_target_node_names(
533 target='rgw')
534 if not rgw_node_names:
535 pytest.skip('Skip as there are not rgw nodes in deploy')
536
537 # STEP #2
538 show_step(2)
539 # Get the ceph health output before restart
Hanna Arhipovac2cb6a52021-10-20 14:30:05 +0300540 result, error = self.wait_healthy_ceph(ssh=underlay,
541 node_names=rgw_node_names)
542 assert result, error
Tatyana Leontovichc72604d2018-01-04 17:58:00 +0200543
544 # STEP #3
545 show_step(3)
Vladimir Jigulinee1faa52018-06-25 13:00:51 +0400546 hardware.warm_restart_nodes(underlay, 'rgw01')
Tatyana Leontovichc72604d2018-01-04 17:58:00 +0200547
548 openstack_actions._salt.local(
549 tgt='*', fun='cmd.run',
550 args='service ntp stop; ntpd -gq; service ntp start')
551
552 # STEP #4
553 show_step(4)
554 # Get the ceph health output after restart
Hanna Arhipovac2cb6a52021-10-20 14:30:05 +0300555 result, error = self.wait_healthy_ceph(ssh=underlay,
556 node_names=rgw_node_names,
557 time_sec=120)
558 assert result, error
Tatyana Leontovichc72604d2018-01-04 17:58:00 +0200559
560 rally.run_container()
561
562 # STEP #5
563 show_step(5)
564 results = rally.run_tempest(pattern='set=smoke',
565 conf_name='/var/lib/ceph_mcp.conf',
566 designate_plugin=False,
567 report_prefix=func_name,
568 timeout=1800)
569 # Step #6
570 show_step(6)
571 assert not results['fail'], self.show_failed_msg(results['fail'])
572
573 LOG.info("*************** DONE **************")