Blame - tcp_tests/tests/system/test_failover_ceph.py - mcp/tcp-qa

2018-01-04 17:58:00 +0200

[diff] [blame]

1

2

#

3

# Licensed under the Apache License, Version 2.0 (the "License"); you may

4

# not use this file except in compliance with the License. You may obtain

5

# a copy of the License at

6

#

7

# http://www.apache.org/licenses/LICENSE-2.0

8

#

9

# Unless required by applicable law or agreed to in writing, software

10

# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT

11

# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the

12

# License for the specific language governing permissions and limitations

# under the License.

import pytest

Hanna Arhipova

2021-10-20 14:30:05 +0300

[diff] [blame]

16

import time

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

17

from devops.helpers import helpers

Tatyana Leontovich

2018-01-04 17:58:00 +0200

[diff] [blame]

18

from tcp_tests import logger

LOG = logger.logger

class TestFailoverCeph(object):

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

24

"""Test class for testing MCP Ceph failover"""

Tatyana Leontovich

2018-01-04 17:58:00 +0200

[diff] [blame]

25

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

26

TEMPEST_JOB_NAME = 'cvp-tempest'

27

TEMPEST_JOB_PARAMETERS = {

28

'TEMPEST_ENDPOINT_TYPE': 'internalURL',

29

'TEMPEST_TEST_PATTERN': 'set=smoke'

30

}

Tatyana Leontovich

2018-01-04 17:58:00 +0200

[diff] [blame]

31

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

32

SANITY_JOB_NAME = 'cvp-sanity'

33

SANITY_JOB_PARAMETERS = {

34

'EXTRA_PARAMS': {

35

'envs': [

36

"tests_set=-k "

Hanna Arhipova

2021-10-20 14:30:05 +0300

[diff] [blame]

37

"'not salt_master and not test_ceph_health and not "

38

"test_prometheus_alert_count'"

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

]

}

}

JENKINS_START_TIMEOUT = 60

Hanna Arhipova

2021-10-20 14:30:05 +0300

[diff] [blame]

44

JENKINS_BUILD_TIMEOUT = 60 * 25

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

45

46

def get_ceph_health(self, ssh, node_names):

47

"""Get Ceph health status on specified nodes

48

49

:param ssh: UnderlaySSHManager, tcp-qa SSH manager instance

50

:param node_names: list, full hostnames of Ceph OSD nodes

51

:return: dict, Ceph health status from each OSD node (output of

52

'ceph -s' command executed on each node)

Tatyana Leontovich

2018-01-04 17:58:00 +0200

[diff] [blame]

53

"""

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

54

return {

55

node_name: ssh.check_call(

Hanna Arhipova

2021-10-20 14:30:05 +0300

[diff] [blame]

56

"ceph health",

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

57

node_name=node_name,

58

raise_on_err=False)['stdout_str']

Tatyana Leontovich

2018-01-04 17:58:00 +0200

[diff] [blame]

59

for node_name in node_names

60

}

Tatyana Leontovich

2018-01-04 17:58:00 +0200

[diff] [blame]

61

Hanna Arhipova

2021-10-20 14:30:05 +0300

[diff] [blame]

62

def wait_healthy_ceph(self,

ssh,

node_names=None,

time_sec=30):

ceph_health = ""

status = False

start_time = time.time()

70

while time.time() - start_time < time_sec and not status:

71

ceph_health = self.get_ceph_health(ssh, node_names)

72

status = all(["HEALTH_OK"

73

in status

74

for node, status

75

in ceph_health.items()])

76

if status:

77

break

78

LOG.info("Retry getting ceph health because Ceph is unhealthy: {}"

79

.format(ceph_health))

80

time.sleep(10)

81

82

error = "" if status \

83

else "Ceph health is not OK: {0}".format(ceph_health)

84

return status, error

85

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

86

@pytest.mark.grab_versions

87

@pytest.mark.restart_osd_node

88

def test_restart_osd_node(

89

self,

90

salt_actions,

Hanna Arhipova

2019-09-06 16:44:17 +0300

[diff] [blame]

91

drivetrain_actions,

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

92

underlay_actions,

93

show_step):

94

"""Verify that Ceph OSD node is not affected by system restart

95

96

Scenario:

97

1. Find Ceph OSD nodes

Hanna Arhipova

2021-10-20 14:30:05 +0300

[diff] [blame]

98

2. Check Ceph cluster health before node restart

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

99

3. Restart 1 Ceph OSD node

Hanna Arhipova

2021-10-20 14:30:05 +0300

[diff] [blame]

100

4. Check Ceph cluster health after node restart

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

101

5. Run Tempest smoke test suite

102

6. Run test_ceph_status.py::test_ceph_osd and

103

test_services.py::test_check_services[osd] sanity tests

Duration: ~9 min

"""

salt = salt_actions

ssh = underlay_actions

Hanna Arhipova

2019-09-06 16:44:17 +0300

[diff] [blame]

109

dt = drivetrain_actions

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

110

111

# Find Ceph OSD nodes

112

show_step(1)

113

tgt = "I@ceph:osd"

114

osd_hosts = salt.local(tgt, "test.ping")['return'][0].keys()

115

# Select a node for the test

116

osd_host = osd_hosts[0]

117

118

# Check Ceph cluster health before node restart

119

show_step(2)

Hanna Arhipova

2021-10-20 14:30:05 +0300

[diff] [blame]

120

result, error = self.wait_healthy_ceph(ssh=ssh,

121

node_names=osd_hosts)

122

assert result, error

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

123

124

# Restart a Ceph OSD node

125

show_step(3)

126

LOG.info("Sending reboot command to '{}' node.".format(osd_host))

127

remote = ssh.remote(node_name=osd_host)

128

remote.execute_async("/sbin/shutdown -r now")

129

130

# Wait for restarted node to boot and become accessible

131

helpers.wait_pass(remote.reconnect, timeout=60 * 3, interval=5)

132

echo_request = "echo"

133

echo_response = salt.local(

134

osd_host, "test.echo", echo_request)['return'][0]

135

assert echo_request == echo_response[osd_host], (

136

"Minion on node '{}' node is not responding after node "

137

"reboot.".format(osd_host)

138

)

139

LOG.info("'{}' node is back after reboot.".format(osd_host))

140

141

# Check Ceph cluster health after node restart

142

show_step(4)

Hanna Arhipova

2021-10-20 14:30:05 +0300

[diff] [blame]

143

result, error = self.wait_healthy_ceph(ssh=ssh,

144

node_names=osd_hosts,

145

time_sec=120)

146

assert result, error

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

147

148

# Run Tempest smoke test suite

149

show_step(5)

Hanna Arhipova

2021-04-22 09:59:11 +0300

[diff] [blame]

150

job_result, job_description = dt.start_job_on_jenkins(

Hanna Arhipova

2019-09-06 16:44:17 +0300

[diff] [blame]

151

job_name=self.TEMPEST_JOB_NAME,

152

job_parameters=self.TEMPEST_JOB_PARAMETERS,

153

start_timeout=self.JENKINS_START_TIMEOUT,

154

build_timeout=self.JENKINS_BUILD_TIMEOUT

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

155

)

Hanna Arhipova

2019-09-06 16:44:17 +0300

[diff] [blame]

156

Hanna Arhipova

2021-04-22 09:59:11 +0300

[diff] [blame]

157

assert job_result == 'SUCCESS', (

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

158

"'{0}' job run status is {1} after executing Tempest smoke "

Hanna Arhipova

2019-09-06 16:44:17 +0300

[diff] [blame]

159

"tests".format(

Hanna Arhipova

2021-04-22 09:59:11 +0300

[diff] [blame]

160

self.TEMPEST_JOB_NAME, job_description)

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

)

# Run Sanity test

show_step(6)

Hanna Arhipova

2021-04-22 09:59:11 +0300

[diff] [blame]

165

job_result, job_description = dt.start_job_on_jenkins(

Hanna Arhipova

2019-09-06 16:44:17 +0300

[diff] [blame]

166

job_name=self.SANITY_JOB_NAME,

167

job_parameters=self.SANITY_JOB_PARAMETERS,

168

start_timeout=self.JENKINS_START_TIMEOUT,

169

build_timeout=self.JENKINS_BUILD_TIMEOUT

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

170

)

Hanna Arhipova

2021-04-22 09:59:11 +0300

[diff] [blame]

171

assert job_result == 'SUCCESS', (

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

172

"'{0}' job run status is {1} after executing selected sanity "

Hanna Arhipova

2019-09-06 16:44:17 +0300

[diff] [blame]

173

"tests".format(

Hanna Arhipova

2021-04-22 09:59:11 +0300

[diff] [blame]

174

self.SANITY_JOB_NAME, job_description)

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

175

)

176

177

@pytest.mark.grab_versions

178

@pytest.mark.restart_cmn_node

179

def test_restart_cmn_node(

180

self,

181

salt_actions,

Hanna Arhipova

2019-09-06 16:44:17 +0300

[diff] [blame]

182

drivetrain_actions,

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

183

underlay_actions,

184

show_step):

185

"""Verify that Ceph CMN node is not affected by system restart

186

187

Scenario:

188

1. Find Ceph CMN nodes

Hanna Arhipova

2021-10-20 14:30:05 +0300

[diff] [blame]

189

2. Check Ceph cluster health before node restart

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

190

3. Restart 1 Ceph CMN node

Hanna Arhipova

2021-10-20 14:30:05 +0300

[diff] [blame]

191

4. Check Ceph cluster health after node restart

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

192

5. Run Tempest smoke test suite

193

6. Run test_ceph_status.py::test_ceph_replicas and

194

test_services.py::test_check_services[cmn] sanity tests

Duration: ~9 min

"""

salt = salt_actions

ssh = underlay_actions

Hanna Arhipova

2019-09-06 16:44:17 +0300

[diff] [blame]

200

dt = drivetrain_actions

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

201

202

# Find Ceph CMN nodes

203

show_step(1)

204

tgt = "I@ceph:mon"

205

cmn_hosts = salt.local(tgt, "test.ping")['return'][0].keys()

206

# Select a node for the test

207

cmn_host = cmn_hosts[0]

208

209

# Check Ceph cluster health before node restart

210

show_step(2)

Hanna Arhipova

2021-10-20 14:30:05 +0300

[diff] [blame]

211

result, error = self.wait_healthy_ceph(ssh=ssh,

212

node_names=cmn_hosts)

213

assert result, error

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

214

215

# Restart a Ceph CMN node

216

show_step(3)

217

LOG.info("Sending reboot command to '{}' node.".format(cmn_host))

218

remote = ssh.remote(node_name=cmn_host)

219

remote.execute_async("/sbin/shutdown -r now")

220

221

# Wait for restarted node to boot and become accessible

222

helpers.wait_pass(remote.reconnect, timeout=60 * 3, interval=5)

223

echo_request = "echo"

224

echo_response = salt.local(

225

cmn_host, "test.echo", echo_request)['return'][0]

226

assert echo_request == echo_response[cmn_host], (

227

"Minion on node '{}' node is not responding after node "

228

"reboot.".format(cmn_host)

229

)

230

LOG.info("'{}' node is back after reboot.".format(cmn_host))

231

232

# Check Ceph cluster health after node restart

233

show_step(4)

Hanna Arhipova

2021-10-20 14:30:05 +0300

[diff] [blame]

234

result, error = self.wait_healthy_ceph(ssh=ssh,

235

node_names=cmn_hosts,

236

time_sec=120)

237

assert result, error

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

238

239

# Run Tempest smoke test suite

240

show_step(5)

Hanna Arhipova

2021-04-22 09:59:11 +0300

[diff] [blame]

241

job_result, job_description = dt.start_job_on_jenkins(

Hanna Arhipova

2019-09-06 16:44:17 +0300

[diff] [blame]

242

job_name=self.TEMPEST_JOB_NAME,

243

job_parameters=self.TEMPEST_JOB_PARAMETERS,

244

start_timeout=self.JENKINS_START_TIMEOUT,

245

build_timeout=self.JENKINS_BUILD_TIMEOUT

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

246

)

Hanna Arhipova

2019-09-06 16:44:17 +0300

[diff] [blame]

247

Hanna Arhipova

2021-04-22 09:59:11 +0300

[diff] [blame]

248

assert job_result == 'SUCCESS', (

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

249

"'{0}' job run status is {1} after executing Tempest smoke "

Hanna Arhipova

2019-09-06 16:44:17 +0300

[diff] [blame]

250

"tests".format(

Hanna Arhipova

2021-04-22 09:59:11 +0300

[diff] [blame]

251

self.TEMPEST_JOB_NAME, job_description)

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

)

# Run Sanity test

show_step(6)

Hanna Arhipova

2021-04-22 09:59:11 +0300

[diff] [blame]

256

job_result, job_description = dt.start_job_on_jenkins(

Hanna Arhipova

2019-09-06 16:44:17 +0300

[diff] [blame]

257

job_name=self.SANITY_JOB_NAME,

258

job_parameters=self.SANITY_JOB_PARAMETERS,

259

start_timeout=self.JENKINS_START_TIMEOUT,

260

build_timeout=self.JENKINS_BUILD_TIMEOUT

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

261

)

Hanna Arhipova

2021-04-22 09:59:11 +0300

[diff] [blame]

262

assert job_result == 'SUCCESS', (

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

263

"'{0}' job run status is {1} after executing selected sanity "

Hanna Arhipova

2019-09-06 16:44:17 +0300

[diff] [blame]

264

"tests".format(

Hanna Arhipova

2021-04-22 09:59:11 +0300

[diff] [blame]

265

self.SANITY_JOB_NAME, job_description)

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

266

)

267

268

@pytest.mark.grab_versions

269

@pytest.mark.restart_rgw_node

270

def test_restart_rgw_node(

271

self,

272

salt_actions,

Hanna Arhipova

2019-09-06 16:44:17 +0300

[diff] [blame]

273

drivetrain_actions,

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

274

underlay_actions,

275

show_step):

276

"""Verify that Ceph RGW node is not affected by system restart

277

278

Scenario:

279

1. Find Ceph RGW nodes

Hanna Arhipova

2021-10-20 14:30:05 +0300

[diff] [blame]

280

2. Check Ceph cluster health before node restart

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

281

3. Restart 1 Ceph RGW node

Hanna Arhipova

2021-10-20 14:30:05 +0300

[diff] [blame]

282

4. Check Ceph cluster health after node restart

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

283

5. Run Tempest smoke test suite

284

6. Run test_services.py::test_check_services[rgw] sanity test

Duration: ~9 min

"""

salt = salt_actions

ssh = underlay_actions

Hanna Arhipova

2019-09-06 16:44:17 +0300

[diff] [blame]

290

dt = drivetrain_actions

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

291

292

# Find Ceph RGW nodes

293

show_step(1)

294

tgt = "I@ceph:radosgw"

295

rgw_hosts = salt.local(tgt, "test.ping")['return'][0].keys()

296

# Select a node for the test

297

rgw_host = rgw_hosts[0]

298

299

# Check Ceph cluster health before node restart

300

show_step(2)

Hanna Arhipova

2021-10-20 14:30:05 +0300

[diff] [blame]

301

result, error = self.wait_healthy_ceph(ssh=ssh,

302

node_names=rgw_hosts)

303

assert result, error

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

304

305

# Restart a Ceph RGW node

306

show_step(3)

307

LOG.info("Sending reboot command to '{}' node.".format(rgw_host))

308

remote = ssh.remote(node_name=rgw_host)

309

remote.execute_async("/sbin/shutdown -r now")

310

311

# Wait for restarted node to boot and become accessible

312

helpers.wait_pass(remote.reconnect, timeout=60 * 3, interval=5)

313

echo_request = "echo"

314

echo_response = salt.local(

315

rgw_host, "test.echo", echo_request)['return'][0]

316

assert echo_request == echo_response[rgw_host], (

317

"Minion on node '{}' node is not responding after node "

318

"reboot.".format(rgw_host)

319

)

320

LOG.info("'{}' node is back after reboot.".format(rgw_host))

321

322

# Check Ceph cluster health after node restart

323

show_step(4)

Hanna Arhipova

2021-10-20 14:30:05 +0300

[diff] [blame]

324

result, error = self.wait_healthy_ceph(ssh=ssh,

325

node_names=rgw_hosts,

326

time_sec=120)

327

328

assert result, error

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

329

330

# Run Tempest smoke test suite

331

show_step(5)

Hanna Arhipova

2021-04-22 09:59:11 +0300

[diff] [blame]

332

job_result, job_description = dt.start_job_on_jenkins(

Hanna Arhipova

2019-09-06 16:44:17 +0300

[diff] [blame]

333

job_name=self.TEMPEST_JOB_NAME,

334

job_parameters=self.TEMPEST_JOB_PARAMETERS,

335

start_timeout=self.JENKINS_START_TIMEOUT,

336

build_timeout=self.JENKINS_BUILD_TIMEOUT

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

337

)

Hanna Arhipova

2019-09-06 16:44:17 +0300

[diff] [blame]

338

Hanna Arhipova

2021-04-22 09:59:11 +0300

[diff] [blame]

339

assert job_result == 'SUCCESS', (

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

340

"'{0}' job run status is {1} after executing Tempest smoke "

Hanna Arhipova

2019-09-06 16:44:17 +0300

[diff] [blame]

341

"tests".format(

Hanna Arhipova

2021-04-22 09:59:11 +0300

[diff] [blame]

342

self.TEMPEST_JOB_NAME, job_description)

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

)

# Run Sanity test

show_step(6)

Hanna Arhipova

2021-04-22 09:59:11 +0300

[diff] [blame]

347

job_result, job_description = dt.start_job_on_jenkins(

Hanna Arhipova

2019-09-06 16:44:17 +0300

[diff] [blame]

348

job_name=self.SANITY_JOB_NAME,

349

job_parameters=self.SANITY_JOB_PARAMETERS,

350

start_timeout=self.JENKINS_START_TIMEOUT,

351

build_timeout=self.JENKINS_BUILD_TIMEOUT

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

352

)

Hanna Arhipova

2021-04-22 09:59:11 +0300

[diff] [blame]

353

assert job_result == 'SUCCESS', (

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

354

"'{0}' job run status is {1} after executing selected sanity "

Hanna Arhipova

2019-09-06 16:44:17 +0300

[diff] [blame]

355

"tests".format(

Hanna Arhipova

2021-04-22 09:59:11 +0300

[diff] [blame]

356

self.SANITY_JOB_NAME, job_description)

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

357

)

358

359

# #######################################################################

360

# ############# Tests for fuel-devops deployed environments #############

361

# #######################################################################

Tatyana Leontovich

2018-01-04 17:58:00 +0200

[diff] [blame]

362

def show_failed_msg(self, failed):

363

return "There are failed tempest tests:\n\n {0}".format(

364

'\n\n '.join([(name + ': ' + detail)

365

for name, detail in failed.items()]))

366

367

@pytest.mark.grab_versions

368

@pytest.mark.fail_snapshot

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

369

def _test_restart_osd_node(self, func_name, underlay, config,

370

openstack_deployed, ceph_deployed,

371

openstack_actions, hardware,

372

rally, show_step):

Tatyana Leontovich

2018-01-04 17:58:00 +0200

[diff] [blame]

373

"""Test restart ceph osd node

374

375

Scenario:

376

1. Find ceph osd nodes

377

2. Check ceph health before restart

378

3. Restart 1 ceph osd node

379

4. Check ceph health after restart

380

5. Run tempest smoke after failover

381

6. Check tempest report for failed tests

Requiremets:

- Salt cluster

- OpenStack cluster

- Ceph cluster

"""

openstack_actions._salt.local(

389

tgt='*', fun='cmd.run',

390

args='service ntp stop; ntpd -gq; service ntp start')

391

# STEP #1

392

show_step(1)

393

osd_node_names = underlay.get_target_node_names(

target='osd')

# STEP #2

show_step(2)

# Get the ceph health output before restart

Hanna Arhipova

2021-10-20 14:30:05 +0300

[diff] [blame]

399

result, error = self.wait_healthy_ceph(ssh=underlay,

400

node_names=osd_node_names)

401

assert result, error

Tatyana Leontovich

2018-01-04 17:58:00 +0200

[diff] [blame]

402

403

# STEP #3

404

show_step(3)

Vladimir Jigulin

ee1faa5

2018-06-25 13:00:51 +0400

[diff] [blame]

405

hardware.warm_restart_nodes(underlay, 'osd01')

Tatyana Leontovich

2018-01-04 17:58:00 +0200

[diff] [blame]

406

407

openstack_actions._salt.local(

408

tgt='*', fun='cmd.run',

409

args='service ntp stop; ntpd -gq; service ntp start')

# STEP #4

show_step(4)

# Get the ceph health output after restart

Hanna Arhipova

2021-10-20 14:30:05 +0300

[diff] [blame]

414

result, error = self.wait_healthy_ceph(ssh=underlay,

415

node_names=osd_node_names)

416

417

assert result, error

Tatyana Leontovich

2018-01-04 17:58:00 +0200

[diff] [blame]

418

419

rally.run_container()

# STEP #5

show_step(5)

results = rally.run_tempest(pattern='set=smoke',

424

conf_name='/var/lib/ceph_mcp.conf',

425

report_prefix=func_name,

426

designate_plugin=False,

timeout=1800)

# Step #6

show_step(6)

assert not results['fail'], self.show_failed_msg(results['fail'])

431

432

LOG.info("*************** DONE **************")

433

434

@pytest.mark.grab_versions

435

@pytest.mark.fail_snapshot

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

436

def _test_restart_cmn_node(self, func_name, underlay, config,

437

openstack_deployed, ceph_deployed,

438

core_actions,

439

salt_actions, openstack_actions,

440

rally, show_step, hardware):

Tatyana Leontovich

2018-01-04 17:58:00 +0200

[diff] [blame]

441

"""Test restart ceph cmn node

442

443

Scenario:

444

1. Find ceph cmn nodes

445

2. Check ceph health before restart

446

3. Restart 1 ceph cmn node

447

4. Check ceph health after restart

448

5. Run tempest smoke after failover

449

6. Check tempest report for failed tests

Requiremets:

- Salt cluster

- OpenStack cluster

- Ceph cluster

"""

openstack_actions._salt.local(

457

tgt='*', fun='cmd.run',

458

args='service ntp stop; ntpd -gq; service ntp start')

459

# STEP #1

460

show_step(1)

461

cmn_node_names = underlay.get_target_node_names(

target='cmn')

# STEP #2

show_step(2)

# Get the ceph health output before restart

Hanna Arhipova

2021-10-20 14:30:05 +0300

[diff] [blame]

467

result, error = self.wait_healthy_ceph(ssh=underlay,

468

node_names=cmn_node_names)

469

470

assert result, error

Tatyana Leontovich

2018-01-04 17:58:00 +0200

[diff] [blame]

471

472

# STEP #3

473

show_step(3)

Vladimir Jigulin

ee1faa5

2018-06-25 13:00:51 +0400

[diff] [blame]

474

hardware.warm_restart_nodes(underlay, 'cmn01')

Tatyana Leontovich

2018-01-04 17:58:00 +0200

[diff] [blame]

475

476

openstack_actions._salt.local(

477

tgt='*', fun='cmd.run',

478

args='service ntp stop; ntpd -gq; service ntp start')

# STEP #4

show_step(4)

# Get the ceph health output after restart

Hanna Arhipova

2021-10-20 14:30:05 +0300

[diff] [blame]

483

result, error = self.wait_healthy_ceph(ssh=underlay,

484

node_names=cmn_node_names,

485

time_sec=120)

486

487

assert result, error

Tatyana Leontovich

2018-01-04 17:58:00 +0200

[diff] [blame]

488

489

rally.run_container()

# STEP #5

show_step(5)

results = rally.run_tempest(pattern='set=smoke',

494

conf_name='/var/lib/ceph_mcp.conf',

495

report_prefix=func_name,

496

designate_plugin=False,

timeout=1800)

# Step #6

show_step(6)

assert not results['fail'], self.show_failed_msg(results['fail'])

501

502

LOG.info("*************** DONE **************")

503

504

@pytest.mark.grab_versions

505

@pytest.mark.fail_snapshot

Dmitriy Kruglov

2019-08-25 19:26:44 +0200

[diff] [blame]

506

def _test_restart_rgw_node(self, func_name, underlay, config,

507

openstack_deployed, ceph_deployed,

508

core_actions, hardware,

509

salt_actions, openstack_actions,

510

rally, show_step):

Tatyana Leontovich

2018-01-04 17:58:00 +0200

[diff] [blame]

511

"""Test restart ceph rgw node

512

513

Scenario:

514

1. Find ceph rgw nodes

515

2. Check ceph health before restart

516

3. Restart 1 ceph rgw node

517

4. Check ceph health after restart

518

5. Run tempest smoke after failover

519

6. Check tempest report for failed tests

Requiremets:

- Salt cluster

- OpenStack cluster

- Ceph cluster

"""

openstack_actions._salt.local(

527

tgt='*', fun='cmd.run',

528

args='service ntp stop; ntpd -gq; service ntp start')

# STEP #1

show_step(1)

rgw_node_names = underlay.get_target_node_names(

533

target='rgw')

534

if not rgw_node_names:

535

pytest.skip('Skip as there are not rgw nodes in deploy')

# STEP #2

show_step(2)

# Get the ceph health output before restart

Hanna Arhipova

2021-10-20 14:30:05 +0300

[diff] [blame]

540

result, error = self.wait_healthy_ceph(ssh=underlay,

541

node_names=rgw_node_names)

542

assert result, error

Tatyana Leontovich

2018-01-04 17:58:00 +0200

[diff] [blame]

543

544

# STEP #3

545

show_step(3)

Vladimir Jigulin

ee1faa5

2018-06-25 13:00:51 +0400

[diff] [blame]

546

hardware.warm_restart_nodes(underlay, 'rgw01')

Tatyana Leontovich

2018-01-04 17:58:00 +0200

[diff] [blame]

547

548

openstack_actions._salt.local(

549

tgt='*', fun='cmd.run',

550

args='service ntp stop; ntpd -gq; service ntp start')

# STEP #4

show_step(4)

# Get the ceph health output after restart

Hanna Arhipova

2021-10-20 14:30:05 +0300

[diff] [blame]

555

result, error = self.wait_healthy_ceph(ssh=underlay,

556

node_names=rgw_node_names,

557

time_sec=120)

558

assert result, error

Tatyana Leontovich