Tatyana Leontovich | c72604d | 2018-01-04 17:58:00 +0200 | [diff] [blame] | 1 | # Copyright 2017 Mirantis, Inc. |
| 2 | # |
| 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may |
| 4 | # not use this file except in compliance with the License. You may obtain |
| 5 | # a copy of the License at |
| 6 | # |
| 7 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | # |
| 9 | # Unless required by applicable law or agreed to in writing, software |
| 10 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| 11 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| 12 | # License for the specific language governing permissions and limitations |
| 13 | # under the License. |
| 14 | import pytest |
| 15 | |
Hanna Arhipova | c2cb6a5 | 2021-10-20 14:30:05 +0300 | [diff] [blame] | 16 | import time |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 17 | from devops.helpers import helpers |
Tatyana Leontovich | c72604d | 2018-01-04 17:58:00 +0200 | [diff] [blame] | 18 | from tcp_tests import logger |
| 19 | |
| 20 | LOG = logger.logger |
| 21 | |
| 22 | |
| 23 | class TestFailoverCeph(object): |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 24 | """Test class for testing MCP Ceph failover""" |
Tatyana Leontovich | c72604d | 2018-01-04 17:58:00 +0200 | [diff] [blame] | 25 | |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 26 | TEMPEST_JOB_NAME = 'cvp-tempest' |
| 27 | TEMPEST_JOB_PARAMETERS = { |
| 28 | 'TEMPEST_ENDPOINT_TYPE': 'internalURL', |
| 29 | 'TEMPEST_TEST_PATTERN': 'set=smoke' |
| 30 | } |
Tatyana Leontovich | c72604d | 2018-01-04 17:58:00 +0200 | [diff] [blame] | 31 | |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 32 | SANITY_JOB_NAME = 'cvp-sanity' |
| 33 | SANITY_JOB_PARAMETERS = { |
| 34 | 'EXTRA_PARAMS': { |
| 35 | 'envs': [ |
| 36 | "tests_set=-k " |
Hanna Arhipova | c2cb6a5 | 2021-10-20 14:30:05 +0300 | [diff] [blame] | 37 | "'not salt_master and not test_ceph_health and not " |
| 38 | "test_prometheus_alert_count'" |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 39 | ] |
| 40 | } |
| 41 | } |
| 42 | |
| 43 | JENKINS_START_TIMEOUT = 60 |
Hanna Arhipova | c2cb6a5 | 2021-10-20 14:30:05 +0300 | [diff] [blame] | 44 | JENKINS_BUILD_TIMEOUT = 60 * 25 |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 45 | |
| 46 | def get_ceph_health(self, ssh, node_names): |
| 47 | """Get Ceph health status on specified nodes |
| 48 | |
| 49 | :param ssh: UnderlaySSHManager, tcp-qa SSH manager instance |
| 50 | :param node_names: list, full hostnames of Ceph OSD nodes |
| 51 | :return: dict, Ceph health status from each OSD node (output of |
| 52 | 'ceph -s' command executed on each node) |
Tatyana Leontovich | c72604d | 2018-01-04 17:58:00 +0200 | [diff] [blame] | 53 | """ |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 54 | return { |
| 55 | node_name: ssh.check_call( |
Hanna Arhipova | c2cb6a5 | 2021-10-20 14:30:05 +0300 | [diff] [blame] | 56 | "ceph health", |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 57 | node_name=node_name, |
| 58 | raise_on_err=False)['stdout_str'] |
Tatyana Leontovich | c72604d | 2018-01-04 17:58:00 +0200 | [diff] [blame] | 59 | for node_name in node_names |
| 60 | } |
Tatyana Leontovich | c72604d | 2018-01-04 17:58:00 +0200 | [diff] [blame] | 61 | |
Hanna Arhipova | c2cb6a5 | 2021-10-20 14:30:05 +0300 | [diff] [blame] | 62 | def wait_healthy_ceph(self, |
| 63 | ssh, |
| 64 | node_names=None, |
| 65 | time_sec=30): |
| 66 | ceph_health = "" |
| 67 | status = False |
| 68 | |
| 69 | start_time = time.time() |
| 70 | while time.time() - start_time < time_sec and not status: |
| 71 | ceph_health = self.get_ceph_health(ssh, node_names) |
| 72 | status = all(["HEALTH_OK" |
| 73 | in status |
| 74 | for node, status |
| 75 | in ceph_health.items()]) |
| 76 | if status: |
| 77 | break |
| 78 | LOG.info("Retry getting ceph health because Ceph is unhealthy: {}" |
| 79 | .format(ceph_health)) |
| 80 | time.sleep(10) |
| 81 | |
| 82 | error = "" if status \ |
| 83 | else "Ceph health is not OK: {0}".format(ceph_health) |
| 84 | return status, error |
| 85 | |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 86 | @pytest.mark.grab_versions |
| 87 | @pytest.mark.restart_osd_node |
| 88 | def test_restart_osd_node( |
| 89 | self, |
| 90 | salt_actions, |
Hanna Arhipova | 17b2c10 | 2019-09-06 16:44:17 +0300 | [diff] [blame] | 91 | drivetrain_actions, |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 92 | underlay_actions, |
| 93 | show_step): |
| 94 | """Verify that Ceph OSD node is not affected by system restart |
| 95 | |
| 96 | Scenario: |
| 97 | 1. Find Ceph OSD nodes |
Hanna Arhipova | c2cb6a5 | 2021-10-20 14:30:05 +0300 | [diff] [blame] | 98 | 2. Check Ceph cluster health before node restart |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 99 | 3. Restart 1 Ceph OSD node |
Hanna Arhipova | c2cb6a5 | 2021-10-20 14:30:05 +0300 | [diff] [blame] | 100 | 4. Check Ceph cluster health after node restart |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 101 | 5. Run Tempest smoke test suite |
| 102 | 6. Run test_ceph_status.py::test_ceph_osd and |
| 103 | test_services.py::test_check_services[osd] sanity tests |
| 104 | |
| 105 | Duration: ~9 min |
| 106 | """ |
| 107 | salt = salt_actions |
| 108 | ssh = underlay_actions |
Hanna Arhipova | 17b2c10 | 2019-09-06 16:44:17 +0300 | [diff] [blame] | 109 | dt = drivetrain_actions |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 110 | |
| 111 | # Find Ceph OSD nodes |
| 112 | show_step(1) |
| 113 | tgt = "I@ceph:osd" |
| 114 | osd_hosts = salt.local(tgt, "test.ping")['return'][0].keys() |
| 115 | # Select a node for the test |
| 116 | osd_host = osd_hosts[0] |
| 117 | |
| 118 | # Check Ceph cluster health before node restart |
| 119 | show_step(2) |
Hanna Arhipova | c2cb6a5 | 2021-10-20 14:30:05 +0300 | [diff] [blame] | 120 | result, error = self.wait_healthy_ceph(ssh=ssh, |
| 121 | node_names=osd_hosts) |
| 122 | assert result, error |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 123 | |
| 124 | # Restart a Ceph OSD node |
| 125 | show_step(3) |
| 126 | LOG.info("Sending reboot command to '{}' node.".format(osd_host)) |
| 127 | remote = ssh.remote(node_name=osd_host) |
| 128 | remote.execute_async("/sbin/shutdown -r now") |
| 129 | |
| 130 | # Wait for restarted node to boot and become accessible |
| 131 | helpers.wait_pass(remote.reconnect, timeout=60 * 3, interval=5) |
| 132 | echo_request = "echo" |
| 133 | echo_response = salt.local( |
| 134 | osd_host, "test.echo", echo_request)['return'][0] |
| 135 | assert echo_request == echo_response[osd_host], ( |
| 136 | "Minion on node '{}' node is not responding after node " |
| 137 | "reboot.".format(osd_host) |
| 138 | ) |
| 139 | LOG.info("'{}' node is back after reboot.".format(osd_host)) |
| 140 | |
| 141 | # Check Ceph cluster health after node restart |
| 142 | show_step(4) |
Hanna Arhipova | c2cb6a5 | 2021-10-20 14:30:05 +0300 | [diff] [blame] | 143 | result, error = self.wait_healthy_ceph(ssh=ssh, |
| 144 | node_names=osd_hosts, |
| 145 | time_sec=120) |
| 146 | assert result, error |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 147 | |
| 148 | # Run Tempest smoke test suite |
| 149 | show_step(5) |
Hanna Arhipova | 7f2a175 | 2021-04-22 09:59:11 +0300 | [diff] [blame] | 150 | job_result, job_description = dt.start_job_on_jenkins( |
Hanna Arhipova | 17b2c10 | 2019-09-06 16:44:17 +0300 | [diff] [blame] | 151 | job_name=self.TEMPEST_JOB_NAME, |
| 152 | job_parameters=self.TEMPEST_JOB_PARAMETERS, |
| 153 | start_timeout=self.JENKINS_START_TIMEOUT, |
| 154 | build_timeout=self.JENKINS_BUILD_TIMEOUT |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 155 | ) |
Hanna Arhipova | 17b2c10 | 2019-09-06 16:44:17 +0300 | [diff] [blame] | 156 | |
Hanna Arhipova | 7f2a175 | 2021-04-22 09:59:11 +0300 | [diff] [blame] | 157 | assert job_result == 'SUCCESS', ( |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 158 | "'{0}' job run status is {1} after executing Tempest smoke " |
Hanna Arhipova | 17b2c10 | 2019-09-06 16:44:17 +0300 | [diff] [blame] | 159 | "tests".format( |
Hanna Arhipova | 7f2a175 | 2021-04-22 09:59:11 +0300 | [diff] [blame] | 160 | self.TEMPEST_JOB_NAME, job_description) |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 161 | ) |
| 162 | |
| 163 | # Run Sanity test |
| 164 | show_step(6) |
Hanna Arhipova | 7f2a175 | 2021-04-22 09:59:11 +0300 | [diff] [blame] | 165 | job_result, job_description = dt.start_job_on_jenkins( |
Hanna Arhipova | 17b2c10 | 2019-09-06 16:44:17 +0300 | [diff] [blame] | 166 | job_name=self.SANITY_JOB_NAME, |
| 167 | job_parameters=self.SANITY_JOB_PARAMETERS, |
| 168 | start_timeout=self.JENKINS_START_TIMEOUT, |
| 169 | build_timeout=self.JENKINS_BUILD_TIMEOUT |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 170 | ) |
Hanna Arhipova | 7f2a175 | 2021-04-22 09:59:11 +0300 | [diff] [blame] | 171 | assert job_result == 'SUCCESS', ( |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 172 | "'{0}' job run status is {1} after executing selected sanity " |
Hanna Arhipova | 17b2c10 | 2019-09-06 16:44:17 +0300 | [diff] [blame] | 173 | "tests".format( |
Hanna Arhipova | 7f2a175 | 2021-04-22 09:59:11 +0300 | [diff] [blame] | 174 | self.SANITY_JOB_NAME, job_description) |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 175 | ) |
| 176 | |
| 177 | @pytest.mark.grab_versions |
| 178 | @pytest.mark.restart_cmn_node |
| 179 | def test_restart_cmn_node( |
| 180 | self, |
| 181 | salt_actions, |
Hanna Arhipova | 17b2c10 | 2019-09-06 16:44:17 +0300 | [diff] [blame] | 182 | drivetrain_actions, |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 183 | underlay_actions, |
| 184 | show_step): |
| 185 | """Verify that Ceph CMN node is not affected by system restart |
| 186 | |
| 187 | Scenario: |
| 188 | 1. Find Ceph CMN nodes |
Hanna Arhipova | c2cb6a5 | 2021-10-20 14:30:05 +0300 | [diff] [blame] | 189 | 2. Check Ceph cluster health before node restart |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 190 | 3. Restart 1 Ceph CMN node |
Hanna Arhipova | c2cb6a5 | 2021-10-20 14:30:05 +0300 | [diff] [blame] | 191 | 4. Check Ceph cluster health after node restart |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 192 | 5. Run Tempest smoke test suite |
| 193 | 6. Run test_ceph_status.py::test_ceph_replicas and |
| 194 | test_services.py::test_check_services[cmn] sanity tests |
| 195 | |
| 196 | Duration: ~9 min |
| 197 | """ |
| 198 | salt = salt_actions |
| 199 | ssh = underlay_actions |
Hanna Arhipova | 17b2c10 | 2019-09-06 16:44:17 +0300 | [diff] [blame] | 200 | dt = drivetrain_actions |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 201 | |
| 202 | # Find Ceph CMN nodes |
| 203 | show_step(1) |
| 204 | tgt = "I@ceph:mon" |
| 205 | cmn_hosts = salt.local(tgt, "test.ping")['return'][0].keys() |
| 206 | # Select a node for the test |
| 207 | cmn_host = cmn_hosts[0] |
| 208 | |
| 209 | # Check Ceph cluster health before node restart |
| 210 | show_step(2) |
Hanna Arhipova | c2cb6a5 | 2021-10-20 14:30:05 +0300 | [diff] [blame] | 211 | result, error = self.wait_healthy_ceph(ssh=ssh, |
| 212 | node_names=cmn_hosts) |
| 213 | assert result, error |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 214 | |
| 215 | # Restart a Ceph CMN node |
| 216 | show_step(3) |
| 217 | LOG.info("Sending reboot command to '{}' node.".format(cmn_host)) |
| 218 | remote = ssh.remote(node_name=cmn_host) |
| 219 | remote.execute_async("/sbin/shutdown -r now") |
| 220 | |
| 221 | # Wait for restarted node to boot and become accessible |
| 222 | helpers.wait_pass(remote.reconnect, timeout=60 * 3, interval=5) |
| 223 | echo_request = "echo" |
| 224 | echo_response = salt.local( |
| 225 | cmn_host, "test.echo", echo_request)['return'][0] |
| 226 | assert echo_request == echo_response[cmn_host], ( |
| 227 | "Minion on node '{}' node is not responding after node " |
| 228 | "reboot.".format(cmn_host) |
| 229 | ) |
| 230 | LOG.info("'{}' node is back after reboot.".format(cmn_host)) |
| 231 | |
| 232 | # Check Ceph cluster health after node restart |
| 233 | show_step(4) |
Hanna Arhipova | c2cb6a5 | 2021-10-20 14:30:05 +0300 | [diff] [blame] | 234 | result, error = self.wait_healthy_ceph(ssh=ssh, |
| 235 | node_names=cmn_hosts, |
| 236 | time_sec=120) |
| 237 | assert result, error |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 238 | |
| 239 | # Run Tempest smoke test suite |
| 240 | show_step(5) |
Hanna Arhipova | 7f2a175 | 2021-04-22 09:59:11 +0300 | [diff] [blame] | 241 | job_result, job_description = dt.start_job_on_jenkins( |
Hanna Arhipova | 17b2c10 | 2019-09-06 16:44:17 +0300 | [diff] [blame] | 242 | job_name=self.TEMPEST_JOB_NAME, |
| 243 | job_parameters=self.TEMPEST_JOB_PARAMETERS, |
| 244 | start_timeout=self.JENKINS_START_TIMEOUT, |
| 245 | build_timeout=self.JENKINS_BUILD_TIMEOUT |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 246 | ) |
Hanna Arhipova | 17b2c10 | 2019-09-06 16:44:17 +0300 | [diff] [blame] | 247 | |
Hanna Arhipova | 7f2a175 | 2021-04-22 09:59:11 +0300 | [diff] [blame] | 248 | assert job_result == 'SUCCESS', ( |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 249 | "'{0}' job run status is {1} after executing Tempest smoke " |
Hanna Arhipova | 17b2c10 | 2019-09-06 16:44:17 +0300 | [diff] [blame] | 250 | "tests".format( |
Hanna Arhipova | 7f2a175 | 2021-04-22 09:59:11 +0300 | [diff] [blame] | 251 | self.TEMPEST_JOB_NAME, job_description) |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 252 | ) |
| 253 | |
| 254 | # Run Sanity test |
| 255 | show_step(6) |
Hanna Arhipova | 7f2a175 | 2021-04-22 09:59:11 +0300 | [diff] [blame] | 256 | job_result, job_description = dt.start_job_on_jenkins( |
Hanna Arhipova | 17b2c10 | 2019-09-06 16:44:17 +0300 | [diff] [blame] | 257 | job_name=self.SANITY_JOB_NAME, |
| 258 | job_parameters=self.SANITY_JOB_PARAMETERS, |
| 259 | start_timeout=self.JENKINS_START_TIMEOUT, |
| 260 | build_timeout=self.JENKINS_BUILD_TIMEOUT |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 261 | ) |
Hanna Arhipova | 7f2a175 | 2021-04-22 09:59:11 +0300 | [diff] [blame] | 262 | assert job_result == 'SUCCESS', ( |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 263 | "'{0}' job run status is {1} after executing selected sanity " |
Hanna Arhipova | 17b2c10 | 2019-09-06 16:44:17 +0300 | [diff] [blame] | 264 | "tests".format( |
Hanna Arhipova | 7f2a175 | 2021-04-22 09:59:11 +0300 | [diff] [blame] | 265 | self.SANITY_JOB_NAME, job_description) |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 266 | ) |
| 267 | |
| 268 | @pytest.mark.grab_versions |
| 269 | @pytest.mark.restart_rgw_node |
| 270 | def test_restart_rgw_node( |
| 271 | self, |
| 272 | salt_actions, |
Hanna Arhipova | 17b2c10 | 2019-09-06 16:44:17 +0300 | [diff] [blame] | 273 | drivetrain_actions, |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 274 | underlay_actions, |
| 275 | show_step): |
| 276 | """Verify that Ceph RGW node is not affected by system restart |
| 277 | |
| 278 | Scenario: |
| 279 | 1. Find Ceph RGW nodes |
Hanna Arhipova | c2cb6a5 | 2021-10-20 14:30:05 +0300 | [diff] [blame] | 280 | 2. Check Ceph cluster health before node restart |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 281 | 3. Restart 1 Ceph RGW node |
Hanna Arhipova | c2cb6a5 | 2021-10-20 14:30:05 +0300 | [diff] [blame] | 282 | 4. Check Ceph cluster health after node restart |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 283 | 5. Run Tempest smoke test suite |
| 284 | 6. Run test_services.py::test_check_services[rgw] sanity test |
| 285 | |
| 286 | Duration: ~9 min |
| 287 | """ |
| 288 | salt = salt_actions |
| 289 | ssh = underlay_actions |
Hanna Arhipova | 17b2c10 | 2019-09-06 16:44:17 +0300 | [diff] [blame] | 290 | dt = drivetrain_actions |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 291 | |
| 292 | # Find Ceph RGW nodes |
| 293 | show_step(1) |
| 294 | tgt = "I@ceph:radosgw" |
| 295 | rgw_hosts = salt.local(tgt, "test.ping")['return'][0].keys() |
| 296 | # Select a node for the test |
| 297 | rgw_host = rgw_hosts[0] |
| 298 | |
| 299 | # Check Ceph cluster health before node restart |
| 300 | show_step(2) |
Hanna Arhipova | c2cb6a5 | 2021-10-20 14:30:05 +0300 | [diff] [blame] | 301 | result, error = self.wait_healthy_ceph(ssh=ssh, |
| 302 | node_names=rgw_hosts) |
| 303 | assert result, error |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 304 | |
| 305 | # Restart a Ceph RGW node |
| 306 | show_step(3) |
| 307 | LOG.info("Sending reboot command to '{}' node.".format(rgw_host)) |
| 308 | remote = ssh.remote(node_name=rgw_host) |
| 309 | remote.execute_async("/sbin/shutdown -r now") |
| 310 | |
| 311 | # Wait for restarted node to boot and become accessible |
| 312 | helpers.wait_pass(remote.reconnect, timeout=60 * 3, interval=5) |
| 313 | echo_request = "echo" |
| 314 | echo_response = salt.local( |
| 315 | rgw_host, "test.echo", echo_request)['return'][0] |
| 316 | assert echo_request == echo_response[rgw_host], ( |
| 317 | "Minion on node '{}' node is not responding after node " |
| 318 | "reboot.".format(rgw_host) |
| 319 | ) |
| 320 | LOG.info("'{}' node is back after reboot.".format(rgw_host)) |
| 321 | |
| 322 | # Check Ceph cluster health after node restart |
| 323 | show_step(4) |
Hanna Arhipova | c2cb6a5 | 2021-10-20 14:30:05 +0300 | [diff] [blame] | 324 | result, error = self.wait_healthy_ceph(ssh=ssh, |
| 325 | node_names=rgw_hosts, |
| 326 | time_sec=120) |
| 327 | |
| 328 | assert result, error |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 329 | |
| 330 | # Run Tempest smoke test suite |
| 331 | show_step(5) |
Hanna Arhipova | 7f2a175 | 2021-04-22 09:59:11 +0300 | [diff] [blame] | 332 | job_result, job_description = dt.start_job_on_jenkins( |
Hanna Arhipova | 17b2c10 | 2019-09-06 16:44:17 +0300 | [diff] [blame] | 333 | job_name=self.TEMPEST_JOB_NAME, |
| 334 | job_parameters=self.TEMPEST_JOB_PARAMETERS, |
| 335 | start_timeout=self.JENKINS_START_TIMEOUT, |
| 336 | build_timeout=self.JENKINS_BUILD_TIMEOUT |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 337 | ) |
Hanna Arhipova | 17b2c10 | 2019-09-06 16:44:17 +0300 | [diff] [blame] | 338 | |
Hanna Arhipova | 7f2a175 | 2021-04-22 09:59:11 +0300 | [diff] [blame] | 339 | assert job_result == 'SUCCESS', ( |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 340 | "'{0}' job run status is {1} after executing Tempest smoke " |
Hanna Arhipova | 17b2c10 | 2019-09-06 16:44:17 +0300 | [diff] [blame] | 341 | "tests".format( |
Hanna Arhipova | 7f2a175 | 2021-04-22 09:59:11 +0300 | [diff] [blame] | 342 | self.TEMPEST_JOB_NAME, job_description) |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 343 | ) |
| 344 | |
| 345 | # Run Sanity test |
| 346 | show_step(6) |
Hanna Arhipova | 7f2a175 | 2021-04-22 09:59:11 +0300 | [diff] [blame] | 347 | job_result, job_description = dt.start_job_on_jenkins( |
Hanna Arhipova | 17b2c10 | 2019-09-06 16:44:17 +0300 | [diff] [blame] | 348 | job_name=self.SANITY_JOB_NAME, |
| 349 | job_parameters=self.SANITY_JOB_PARAMETERS, |
| 350 | start_timeout=self.JENKINS_START_TIMEOUT, |
| 351 | build_timeout=self.JENKINS_BUILD_TIMEOUT |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 352 | ) |
Hanna Arhipova | 7f2a175 | 2021-04-22 09:59:11 +0300 | [diff] [blame] | 353 | assert job_result == 'SUCCESS', ( |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 354 | "'{0}' job run status is {1} after executing selected sanity " |
Hanna Arhipova | 17b2c10 | 2019-09-06 16:44:17 +0300 | [diff] [blame] | 355 | "tests".format( |
Hanna Arhipova | 7f2a175 | 2021-04-22 09:59:11 +0300 | [diff] [blame] | 356 | self.SANITY_JOB_NAME, job_description) |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 357 | ) |
| 358 | |
| 359 | # ####################################################################### |
| 360 | # ############# Tests for fuel-devops deployed environments ############# |
| 361 | # ####################################################################### |
Tatyana Leontovich | c72604d | 2018-01-04 17:58:00 +0200 | [diff] [blame] | 362 | def show_failed_msg(self, failed): |
| 363 | return "There are failed tempest tests:\n\n {0}".format( |
| 364 | '\n\n '.join([(name + ': ' + detail) |
| 365 | for name, detail in failed.items()])) |
| 366 | |
| 367 | @pytest.mark.grab_versions |
| 368 | @pytest.mark.fail_snapshot |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 369 | def _test_restart_osd_node(self, func_name, underlay, config, |
| 370 | openstack_deployed, ceph_deployed, |
| 371 | openstack_actions, hardware, |
| 372 | rally, show_step): |
Tatyana Leontovich | c72604d | 2018-01-04 17:58:00 +0200 | [diff] [blame] | 373 | """Test restart ceph osd node |
| 374 | |
| 375 | Scenario: |
| 376 | 1. Find ceph osd nodes |
| 377 | 2. Check ceph health before restart |
| 378 | 3. Restart 1 ceph osd node |
| 379 | 4. Check ceph health after restart |
| 380 | 5. Run tempest smoke after failover |
| 381 | 6. Check tempest report for failed tests |
| 382 | |
| 383 | Requiremets: |
| 384 | - Salt cluster |
| 385 | - OpenStack cluster |
| 386 | - Ceph cluster |
| 387 | """ |
| 388 | openstack_actions._salt.local( |
| 389 | tgt='*', fun='cmd.run', |
| 390 | args='service ntp stop; ntpd -gq; service ntp start') |
| 391 | # STEP #1 |
| 392 | show_step(1) |
| 393 | osd_node_names = underlay.get_target_node_names( |
| 394 | target='osd') |
| 395 | |
| 396 | # STEP #2 |
| 397 | show_step(2) |
| 398 | # Get the ceph health output before restart |
Hanna Arhipova | c2cb6a5 | 2021-10-20 14:30:05 +0300 | [diff] [blame] | 399 | result, error = self.wait_healthy_ceph(ssh=underlay, |
| 400 | node_names=osd_node_names) |
| 401 | assert result, error |
Tatyana Leontovich | c72604d | 2018-01-04 17:58:00 +0200 | [diff] [blame] | 402 | |
| 403 | # STEP #3 |
| 404 | show_step(3) |
Vladimir Jigulin | ee1faa5 | 2018-06-25 13:00:51 +0400 | [diff] [blame] | 405 | hardware.warm_restart_nodes(underlay, 'osd01') |
Tatyana Leontovich | c72604d | 2018-01-04 17:58:00 +0200 | [diff] [blame] | 406 | |
| 407 | openstack_actions._salt.local( |
| 408 | tgt='*', fun='cmd.run', |
| 409 | args='service ntp stop; ntpd -gq; service ntp start') |
| 410 | |
| 411 | # STEP #4 |
| 412 | show_step(4) |
| 413 | # Get the ceph health output after restart |
Hanna Arhipova | c2cb6a5 | 2021-10-20 14:30:05 +0300 | [diff] [blame] | 414 | result, error = self.wait_healthy_ceph(ssh=underlay, |
| 415 | node_names=osd_node_names) |
| 416 | |
| 417 | assert result, error |
Tatyana Leontovich | c72604d | 2018-01-04 17:58:00 +0200 | [diff] [blame] | 418 | |
| 419 | rally.run_container() |
| 420 | |
| 421 | # STEP #5 |
| 422 | show_step(5) |
| 423 | results = rally.run_tempest(pattern='set=smoke', |
| 424 | conf_name='/var/lib/ceph_mcp.conf', |
| 425 | report_prefix=func_name, |
| 426 | designate_plugin=False, |
| 427 | timeout=1800) |
| 428 | # Step #6 |
| 429 | show_step(6) |
| 430 | assert not results['fail'], self.show_failed_msg(results['fail']) |
| 431 | |
| 432 | LOG.info("*************** DONE **************") |
| 433 | |
| 434 | @pytest.mark.grab_versions |
| 435 | @pytest.mark.fail_snapshot |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 436 | def _test_restart_cmn_node(self, func_name, underlay, config, |
| 437 | openstack_deployed, ceph_deployed, |
| 438 | core_actions, |
| 439 | salt_actions, openstack_actions, |
| 440 | rally, show_step, hardware): |
Tatyana Leontovich | c72604d | 2018-01-04 17:58:00 +0200 | [diff] [blame] | 441 | """Test restart ceph cmn node |
| 442 | |
| 443 | Scenario: |
| 444 | 1. Find ceph cmn nodes |
| 445 | 2. Check ceph health before restart |
| 446 | 3. Restart 1 ceph cmn node |
| 447 | 4. Check ceph health after restart |
| 448 | 5. Run tempest smoke after failover |
| 449 | 6. Check tempest report for failed tests |
| 450 | |
| 451 | Requiremets: |
| 452 | - Salt cluster |
| 453 | - OpenStack cluster |
| 454 | - Ceph cluster |
| 455 | """ |
| 456 | openstack_actions._salt.local( |
| 457 | tgt='*', fun='cmd.run', |
| 458 | args='service ntp stop; ntpd -gq; service ntp start') |
| 459 | # STEP #1 |
| 460 | show_step(1) |
| 461 | cmn_node_names = underlay.get_target_node_names( |
| 462 | target='cmn') |
| 463 | |
| 464 | # STEP #2 |
| 465 | show_step(2) |
| 466 | # Get the ceph health output before restart |
Hanna Arhipova | c2cb6a5 | 2021-10-20 14:30:05 +0300 | [diff] [blame] | 467 | result, error = self.wait_healthy_ceph(ssh=underlay, |
| 468 | node_names=cmn_node_names) |
| 469 | |
| 470 | assert result, error |
Tatyana Leontovich | c72604d | 2018-01-04 17:58:00 +0200 | [diff] [blame] | 471 | |
| 472 | # STEP #3 |
| 473 | show_step(3) |
Vladimir Jigulin | ee1faa5 | 2018-06-25 13:00:51 +0400 | [diff] [blame] | 474 | hardware.warm_restart_nodes(underlay, 'cmn01') |
Tatyana Leontovich | c72604d | 2018-01-04 17:58:00 +0200 | [diff] [blame] | 475 | |
| 476 | openstack_actions._salt.local( |
| 477 | tgt='*', fun='cmd.run', |
| 478 | args='service ntp stop; ntpd -gq; service ntp start') |
| 479 | |
| 480 | # STEP #4 |
| 481 | show_step(4) |
| 482 | # Get the ceph health output after restart |
Hanna Arhipova | c2cb6a5 | 2021-10-20 14:30:05 +0300 | [diff] [blame] | 483 | result, error = self.wait_healthy_ceph(ssh=underlay, |
| 484 | node_names=cmn_node_names, |
| 485 | time_sec=120) |
| 486 | |
| 487 | assert result, error |
Tatyana Leontovich | c72604d | 2018-01-04 17:58:00 +0200 | [diff] [blame] | 488 | |
| 489 | rally.run_container() |
| 490 | |
| 491 | # STEP #5 |
| 492 | show_step(5) |
| 493 | results = rally.run_tempest(pattern='set=smoke', |
| 494 | conf_name='/var/lib/ceph_mcp.conf', |
| 495 | report_prefix=func_name, |
| 496 | designate_plugin=False, |
| 497 | timeout=1800) |
| 498 | # Step #6 |
| 499 | show_step(6) |
| 500 | assert not results['fail'], self.show_failed_msg(results['fail']) |
| 501 | |
| 502 | LOG.info("*************** DONE **************") |
| 503 | |
| 504 | @pytest.mark.grab_versions |
| 505 | @pytest.mark.fail_snapshot |
Dmitriy Kruglov | a1aecc2 | 2019-08-25 19:26:44 +0200 | [diff] [blame] | 506 | def _test_restart_rgw_node(self, func_name, underlay, config, |
| 507 | openstack_deployed, ceph_deployed, |
| 508 | core_actions, hardware, |
| 509 | salt_actions, openstack_actions, |
| 510 | rally, show_step): |
Tatyana Leontovich | c72604d | 2018-01-04 17:58:00 +0200 | [diff] [blame] | 511 | """Test restart ceph rgw node |
| 512 | |
| 513 | Scenario: |
| 514 | 1. Find ceph rgw nodes |
| 515 | 2. Check ceph health before restart |
| 516 | 3. Restart 1 ceph rgw node |
| 517 | 4. Check ceph health after restart |
| 518 | 5. Run tempest smoke after failover |
| 519 | 6. Check tempest report for failed tests |
| 520 | |
| 521 | Requiremets: |
| 522 | - Salt cluster |
| 523 | - OpenStack cluster |
| 524 | - Ceph cluster |
| 525 | """ |
| 526 | openstack_actions._salt.local( |
| 527 | tgt='*', fun='cmd.run', |
| 528 | args='service ntp stop; ntpd -gq; service ntp start') |
| 529 | |
| 530 | # STEP #1 |
| 531 | show_step(1) |
| 532 | rgw_node_names = underlay.get_target_node_names( |
| 533 | target='rgw') |
| 534 | if not rgw_node_names: |
| 535 | pytest.skip('Skip as there are not rgw nodes in deploy') |
| 536 | |
| 537 | # STEP #2 |
| 538 | show_step(2) |
| 539 | # Get the ceph health output before restart |
Hanna Arhipova | c2cb6a5 | 2021-10-20 14:30:05 +0300 | [diff] [blame] | 540 | result, error = self.wait_healthy_ceph(ssh=underlay, |
| 541 | node_names=rgw_node_names) |
| 542 | assert result, error |
Tatyana Leontovich | c72604d | 2018-01-04 17:58:00 +0200 | [diff] [blame] | 543 | |
| 544 | # STEP #3 |
| 545 | show_step(3) |
Vladimir Jigulin | ee1faa5 | 2018-06-25 13:00:51 +0400 | [diff] [blame] | 546 | hardware.warm_restart_nodes(underlay, 'rgw01') |
Tatyana Leontovich | c72604d | 2018-01-04 17:58:00 +0200 | [diff] [blame] | 547 | |
| 548 | openstack_actions._salt.local( |
| 549 | tgt='*', fun='cmd.run', |
| 550 | args='service ntp stop; ntpd -gq; service ntp start') |
| 551 | |
| 552 | # STEP #4 |
| 553 | show_step(4) |
| 554 | # Get the ceph health output after restart |
Hanna Arhipova | c2cb6a5 | 2021-10-20 14:30:05 +0300 | [diff] [blame] | 555 | result, error = self.wait_healthy_ceph(ssh=underlay, |
| 556 | node_names=rgw_node_names, |
| 557 | time_sec=120) |
| 558 | assert result, error |
Tatyana Leontovich | c72604d | 2018-01-04 17:58:00 +0200 | [diff] [blame] | 559 | |
| 560 | rally.run_container() |
| 561 | |
| 562 | # STEP #5 |
| 563 | show_step(5) |
| 564 | results = rally.run_tempest(pattern='set=smoke', |
| 565 | conf_name='/var/lib/ceph_mcp.conf', |
| 566 | designate_plugin=False, |
| 567 | report_prefix=func_name, |
| 568 | timeout=1800) |
| 569 | # Step #6 |
| 570 | show_step(6) |
| 571 | assert not results['fail'], self.show_failed_msg(results['fail']) |
| 572 | |
| 573 | LOG.info("*************** DONE **************") |