Workaround for keepalived losing VIPs
- increase keepalived retry count to 10000
- fix staticmethod in failover test
- use the fixture 'sl_openstack_deployed' to not redeploy
stacklight in each testcase
Change-Id: I7c473c328520dfe06ec3d481475d3071c43a0706
Related-Bug:#PROD-15791
diff --git a/tcp_tests/fixtures/stacklight_fixtures.py b/tcp_tests/fixtures/stacklight_fixtures.py
index c04f48f..e985078 100644
--- a/tcp_tests/fixtures/stacklight_fixtures.py
+++ b/tcp_tests/fixtures/stacklight_fixtures.py
@@ -18,7 +18,6 @@
from tcp_tests import logger
from tcp_tests.helpers import ext
from tcp_tests.managers import sl_manager
-from tcp_tests.helpers import utils
LOG = logger.logger
@@ -52,7 +51,7 @@
:param tcp_actions: fixture provides SLManager instance
:rtype: SLManager
"""
- # Create Salt cluster
+ # Deploy SL services
if not config.stack_light.sl_installed:
steps_path = config.sl_deploy.sl_steps_path
commands = underlay.read_template(steps_path)
@@ -80,42 +79,14 @@
@pytest.mark.revert_snapshot(ext.SNAPSHOT.sl_deployed)
@pytest.fixture(scope='function')
-def deploy_sl(revert_snapshot, request, config,
- hardware, underlay, common_services_deployed,
- sl_actions):
- """Fixture to get or install OpenStack services on environment
+def sl_os_deployed(revert_snapshot,
+ openstack_deployed,
+ sl_deployed):
+ """Fixture to get or install SL and OpenStack services on environment
- :param revert_snapshot: fixture that reverts snapshot that is specified
- in test with @pytest.mark.revert_snapshot(<name>)
- :param request: fixture provides pytest data
- :param config: fixture provides oslo.config
- :param hardware: fixture provides enviromnet manager
- :param underlay: fixture provides underlay manager
- :param tcp_actions: fixture provides OpenstackManager instance
- :rtype: SLManager
+ Uses fixtures openstack_deployed and sl_deployed, with 'sl_deployed'
+ top-level snapshot.
- If you want to revert 'sl_deployed' snapshot, please use mark:
- @pytest.mark.revert_snapshot("sl_deployed")
+ Returns SLManager instance object
"""
- # Create Salt cluster
- if not config.stack_light.sl_installed:
- steps_path = config.sl_deploy.sl_steps_path
- commands = utils.read_template(steps_path)
- sl_actions.install(commands)
- hardware.create_snapshot(ext.SNAPSHOT.sl_deployed)
-
- else:
- # 1. hardware environment created and powered on
- # 2. config.underlay.ssh contains SSH access to provisioned nodes
- # (can be passed from external config with TESTS_CONFIGS variable)
- # 3. config.tcp.* options contain access credentials to the already
- # installed TCP API endpoint
- pass
- # Workaround for keepalived hang issue after env revert from snapshot
- # see https://mirantis.jira.com/browse/PROD-12038
- LOG.warning('Restarting keepalived service on controllers...')
- sl_actions._salt.local(tgt='ctl*', fun='cmd.run',
- args='systemctl restart keepalived.service')
- sl_actions._salt.local(tgt='mon*', fun='cmd.run',
- args='systemctl restart keepalived.service')
- return sl_actions
+ return sl_deployed
diff --git a/tcp_tests/templates/shared-salt.yaml b/tcp_tests/templates/shared-salt.yaml
index 955972c..607f908 100644
--- a/tcp_tests/templates/shared-salt.yaml
+++ b/tcp_tests/templates/shared-salt.yaml
@@ -504,6 +504,13 @@
skip_fail: false
{%- endif %}
+- description: "*Workaround* [PROD-15791] increase keepalived garp repeat count"
+ cmd: |
+ sed -i 's/^ garp_master_repeat .*$/ garp_master_repeat 100000/' /usr/share/salt-formulas/env/keepalived/files/keepalived.conf
+ node_name: {{ HOSTNAME_CFG01 }}
+ retry: {count: 1, delay: 5}
+ skip_fail: true
+
- description: Refresh pillars on salt-master minion
cmd: salt --hard-crash --state-output=mixed --state-verbose=False -C 'I@salt:master' saltutil.refresh_pillar
node_name: {{ HOSTNAME_CFG01 }}
diff --git a/tcp_tests/tests/system/test_failover.py b/tcp_tests/tests/system/test_failover.py
index 15dc2a1..b7f1f44 100644
--- a/tcp_tests/tests/system/test_failover.py
+++ b/tcp_tests/tests/system/test_failover.py
@@ -15,6 +15,7 @@
import pytest
from tcp_tests import logger
+from tcp_tests.helpers import ext
LOG = logger.logger
@@ -22,6 +23,7 @@
class TestFailover(object):
"""Test class for testing OpenStack nodes failover"""
+ @staticmethod
def check_influxdb_xfail(sl_deployed, node_name, value):
def check_influxdb_data():
@@ -156,9 +158,9 @@
@pytest.mark.grab_versions
@pytest.mark.fail_snapshot
- def test_restart_mon01_node(self, underlay, openstack_deployed,
- openstack_actions, sl_deployed,
- show_step):
+ @pytest.mark.revert_snapshot(ext.SNAPSHOT.sl_deployed)
+ def test_restart_mon01_node(self, sl_os_deployed,
+ openstack_actions, show_step):
"""Test restart mon01
Scenario:
@@ -179,10 +181,10 @@
# STEP #4
show_step(4)
- mon_nodes = sl_deployed.get_monitoring_nodes()
+ mon_nodes = sl_os_deployed.get_monitoring_nodes()
LOG.debug('Mon nodes list {0}'.format(mon_nodes))
- sl_deployed.check_prometheus_targets(mon_nodes)
- before_result = sl_deployed.run_sl_tests_json(
+ sl_os_deployed.check_prometheus_targets(mon_nodes)
+ before_result = sl_os_deployed.run_sl_tests_json(
'cfg01', '/root/stacklight-pytest/stacklight_tests/',
'tests/prometheus/', 'test_alerts.py')
failed_tests = [test['name'] for test in
@@ -192,11 +194,11 @@
openstack_actions.warm_restart_nodes('mon01')
# STEP #6
show_step(6)
- sl_deployed.check_prometheus_targets(mon_nodes)
+ sl_os_deployed.check_prometheus_targets(mon_nodes)
# STEP #7
show_step(7)
# Run SL component tetsts
- after_result = sl_deployed.run_sl_tests_json(
+ after_result = sl_os_deployed.run_sl_tests_json(
'cfg01', '/root/stacklight-pytest/stacklight_tests/',
'tests/prometheus/', 'test_alerts.py')
for test in after_result:
@@ -207,8 +209,9 @@
@pytest.mark.grab_versions
@pytest.mark.fail_snapshot
- def test_warm_shutdown_mon01_node(self, underlay, openstack_deployed,
- openstack_actions, sl_deployed,
+ @pytest.mark.revert_snapshot(ext.SNAPSHOT.sl_deployed)
+ def test_warm_shutdown_mon01_node(self, openstack_actions,
+ sl_os_deployed,
show_step):
"""Test warm shutdown mon01
@@ -229,10 +232,10 @@
# STEP #4
show_step(4)
- mon_nodes = sl_deployed.get_monitoring_nodes()
+ mon_nodes = sl_os_deployed.get_monitoring_nodes()
LOG.debug('Mon nodes list {0}'.format(mon_nodes))
- sl_deployed.check_prometheus_targets(mon_nodes)
- before_result = sl_deployed.run_sl_tests_json(
+ sl_os_deployed.check_prometheus_targets(mon_nodes)
+ before_result = sl_os_deployed.run_sl_tests_json(
'cfg01', '/root/stacklight-pytest/stacklight_tests/',
'tests/prometheus/', 'test_alerts.py')
failed_tests = [test['name'] for test in
@@ -243,7 +246,7 @@
# STEP #6
show_step(6)
# Run SL component tetsts
- after_result = sl_deployed.run_sl_tests_json(
+ after_result = sl_os_deployed.run_sl_tests_json(
'cfg01', '/root/stacklight-pytest/stacklight_tests/',
'tests/prometheus/', 'test_alerts.py')
for test in after_result:
@@ -254,10 +257,9 @@
@pytest.mark.grab_versions
@pytest.mark.fail_snapshot
- def test_kill_influxdb_relay_mon01_node(self, underlay,
- openstack_deployed,
- openstack_actions,
- sl_deployed):
+ @pytest.mark.revert_snapshot(ext.SNAPSHOT.sl_deployed)
+ def test_kill_influxdb_relay_mon01_node(self, sl_os_deployed,
+ show_step):
"""Test kill influxdb relay on mon01 node
Scenario:
@@ -275,33 +277,44 @@
"""
# STEP #1,2,3
+ show_step(1)
+ show_step(2)
+ show_step(3)
+
# STEP #4
- mon_nodes = sl_deployed.get_monitoring_nodes()
+ show_step(4)
+ mon_nodes = sl_os_deployed.get_monitoring_nodes()
LOG.debug('Mon nodes list {0}'.format(mon_nodes))
- before_result = sl_deployed.run_sl_tests_json(
+ before_result = sl_os_deployed.run_sl_tests_json(
'cfg01', '/root/stacklight-pytest/stacklight_tests/',
'tests/prometheus/', 'test_alerts.py')
failed_tests = [test['name'] for test in
before_result if 'passed' not in test['outcome']]
# STEP #5
- sl_deployed.kill_sl_service_on_node('mon01', 'influxdb-relay')
+ show_step(5)
+ sl_os_deployed.kill_sl_service_on_node('mon01', 'influxdb-relay')
# STEP #6
- sl_deployed.post_data_into_influx('mon02')
+ show_step(6)
+ sl_os_deployed.post_data_into_influx('mon02')
# STEP #7
- assert 'mymeas' in sl_deployed.check_data_in_influxdb('mon02')
- assert 'mymeas' in sl_deployed.check_data_in_influxdb('mon03')
+ show_step(7)
+ assert 'mymeas' in sl_os_deployed.check_data_in_influxdb('mon02')
+ assert 'mymeas' in sl_os_deployed.check_data_in_influxdb('mon03')
# STEP #8
- sl_deployed.start_service('mon01', 'influxdb-relay')
+ show_step(8)
+ sl_os_deployed.start_service('mon01', 'influxdb-relay')
# STEP #9
- assert 'mymeas' in sl_deployed.check_data_in_influxdb('mon01')
+ show_step(9)
+ assert 'mymeas' in sl_os_deployed.check_data_in_influxdb('mon01')
# STEP #10
- after_result = sl_deployed.run_sl_tests_json(
+ show_step(10)
+ after_result = sl_os_deployed.run_sl_tests_json(
'cfg01', '/root/stacklight-pytest/stacklight_tests/',
'tests/prometheus/', 'test_alerts.py')
for test in after_result:
@@ -312,10 +325,8 @@
@pytest.mark.grab_versions
@pytest.mark.fail_snapshot
- def test_kill_influxdb_mon01_node(self, underlay,
- openstack_deployed,
- openstack_actions,
- sl_deployed):
+ @pytest.mark.revert_snapshot(ext.SNAPSHOT.sl_deployed)
+ def test_kill_influxdb_mon01_node(self, sl_os_deployed, show_step):
"""Test kill influxdb on mon01 node
Scenario:
@@ -333,34 +344,43 @@
"""
# STEP #1,2,3
+ show_step(1)
+ show_step(2)
+ show_step(3)
# STEP #4
-
- mon_nodes = sl_deployed.get_monitoring_nodes()
+ show_step(4)
+ mon_nodes = sl_os_deployed.get_monitoring_nodes()
LOG.debug('Mon nodes list {0}'.format(mon_nodes))
- before_result = sl_deployed.run_sl_tests_json(
+ before_result = sl_os_deployed.run_sl_tests_json(
'cfg01', '/root/stacklight-pytest/stacklight_tests/',
'tests/prometheus/', 'test_alerts.py')
failed_tests = [test['name'] for test in
before_result if 'passed' not in test['outcome']]
# STEP #5
+ show_step(5)
+ sl_os_deployed.kill_sl_service_on_node('mon01', 'influxd')
- sl_deployed.kill_sl_service_on_node('mon01', 'influxd')
+ # STEP #6
+ show_step(6)
+ sl_os_deployed.post_data_into_influx('mon02')
- sl_deployed.post_data_into_influx('mon02')
# STEP #7
+ show_step(7)
+ assert 'mymeas' in sl_os_deployed.check_data_in_influxdb('mon02')
+ assert 'mymeas' in sl_os_deployed.check_data_in_influxdb('mon03')
- assert 'mymeas' in sl_deployed.check_data_in_influxdb('mon02')
- assert 'mymeas' in sl_deployed.check_data_in_influxdb('mon03')
# STEP #8
-
- sl_deployed.start_service('mon01', 'influxd')
+ show_step(8)
+ sl_os_deployed.start_service('mon01', 'influxd')
# STEP #9
- self.check_influxdb_xfail(sl_deployed, 'mon01', 'mymeas')
+ show_step(9)
+ self.check_influxdb_xfail(sl_os_deployed, 'mon01', 'mymeas')
# STEP #10
- after_result = sl_deployed.run_sl_tests_json(
+ show_step(10)
+ after_result = sl_os_deployed.run_sl_tests_json(
'cfg01', '/root/stacklight-pytest/stacklight_tests/',
'tests/prometheus/', 'test_alerts.py')
for test in after_result:
@@ -371,10 +391,9 @@
@pytest.mark.grab_versions
@pytest.mark.fail_snapshot
- def test_stop_influxdb_relay_mon_nodes(self, underlay,
- openstack_deployed,
- openstack_actions,
- sl_deployed):
+ @pytest.mark.revert_snapshot(ext.SNAPSHOT.sl_deployed)
+ def test_stop_influxdb_relay_mon_nodes(self, sl_os_deployed,
+ show_step):
"""Test stop influxdb relay on mon01 node
Scenario:
@@ -392,34 +411,45 @@
"""
# STEP #1,2,3
- mon_nodes = sl_deployed.get_monitoring_nodes()
+ show_step(1)
+ show_step(2)
+ show_step(3)
+
+ # STEP #4
+ show_step(4)
+ mon_nodes = sl_os_deployed.get_monitoring_nodes()
LOG.debug('Mon nodes list {0}'.format(mon_nodes))
- before_result = sl_deployed.run_sl_tests_json(
+ before_result = sl_os_deployed.run_sl_tests_json(
'cfg01', '/root/stacklight-pytest/stacklight_tests/',
'tests/prometheus/', 'test_alerts.py')
failed_tests = [test['name'] for test in
before_result if 'passed' not in test['outcome']]
# STEP #5
+ show_step(5)
+ sl_os_deployed.stop_sl_service_on_node('mon01', 'influxdb-relay')
+ sl_os_deployed.stop_sl_service_on_node('mon02', 'influxdb-relay')
- sl_deployed.stop_sl_service_on_node('mon01', 'influxdb-relay')
- sl_deployed.stop_sl_service_on_node('mon02', 'influxdb-relay')
# STEP #6
+ show_step(6)
+ sl_os_deployed.post_data_into_influx('mon03')
- sl_deployed.post_data_into_influx('mon03')
# STEP #7
+ show_step(7)
+ assert 'mymeas' in sl_os_deployed.check_data_in_influxdb('mon03')
- assert 'mymeas' in sl_deployed.check_data_in_influxdb('mon03')
# STEP #8
-
- sl_deployed.start_service('mon01', 'influxdb-relay')
- sl_deployed.start_service('mon02', 'influxdb-relay')
+ show_step(8)
+ sl_os_deployed.start_service('mon01', 'influxdb-relay')
+ sl_os_deployed.start_service('mon02', 'influxdb-relay')
# STEP #9
- assert 'mymeas' in sl_deployed.check_data_in_influxdb('mon01')
- assert 'mymeas' in sl_deployed.check_data_in_influxdb('mon02')
+ show_step(9)
+ assert 'mymeas' in sl_os_deployed.check_data_in_influxdb('mon01')
+ assert 'mymeas' in sl_os_deployed.check_data_in_influxdb('mon02')
# STEP #10
- after_result = sl_deployed.run_sl_tests_json(
+ show_step(10)
+ after_result = sl_os_deployed.run_sl_tests_json(
'cfg01', '/root/stacklight-pytest/stacklight_tests/',
'tests/prometheus/', 'test_alerts.py')
for test in after_result:
@@ -430,10 +460,8 @@
@pytest.mark.grab_versions
@pytest.mark.fail_snapshot
- def test_stop_influxdb_mon_nodes(self, underlay,
- openstack_deployed,
- openstack_actions,
- sl_deployed):
+ @pytest.mark.revert_snapshot(ext.SNAPSHOT.sl_deployed)
+ def test_stop_influxdb_mon_nodes(self, sl_os_deployed, show_step):
"""Test stop influxdb on mon01 node
Scenario:
@@ -451,34 +479,46 @@
"""
# STEP #1,2,3
- mon_nodes = sl_deployed.get_monitoring_nodes()
+ show_step(1)
+ show_step(2)
+ show_step(3)
+
+ # STEP #4
+ show_step(4)
+ mon_nodes = sl_os_deployed.get_monitoring_nodes()
LOG.debug('Mon nodes list {0}'.format(mon_nodes))
- before_result = sl_deployed.run_sl_tests_json(
+ before_result = sl_os_deployed.run_sl_tests_json(
'cfg01', '/root/stacklight-pytest/stacklight_tests/',
'tests/prometheus/', 'test_alerts.py')
failed_tests = [test['name'] for test in
before_result if 'passed' not in test['outcome']]
+
# STEP #5
+ show_step(5)
+ sl_os_deployed.stop_sl_service_on_node('mon01', 'influxdb')
+ sl_os_deployed.stop_sl_service_on_node('mon02', 'influxdb')
- sl_deployed.stop_sl_service_on_node('mon01', 'influxdb')
- sl_deployed.stop_sl_service_on_node('mon02', 'influxdb')
# STEP #6
+ show_step(6)
+ sl_os_deployed.post_data_into_influx('mon03')
- sl_deployed.post_data_into_influx('mon03')
# STEP #7
+ show_step(7)
+ assert 'mymeas' in sl_os_deployed.check_data_in_influxdb('mon03')
- assert 'mymeas' in sl_deployed.check_data_in_influxdb('mon03')
# STEP #8
-
- sl_deployed.start_service('mon01', 'influxdb')
- sl_deployed.start_service('mon02', 'influxdb')
+ show_step(8)
+ sl_os_deployed.start_service('mon01', 'influxdb')
+ sl_os_deployed.start_service('mon02', 'influxdb')
# STEP #9
- self.check_influxdb_xfail(sl_deployed, 'mon01', 'mymeas')
- self.check_influxdb_xfail(sl_deployed, 'mon02', 'mymeas')
+ show_step(9)
+ self.check_influxdb_xfail(sl_os_deployed, 'mon01', 'mymeas')
+ self.check_influxdb_xfail(sl_os_deployed, 'mon02', 'mymeas')
# STEP #10
- after_result = sl_deployed.run_sl_tests_json(
+ show_step(10)
+ after_result = sl_os_deployed.run_sl_tests_json(
'cfg01', '/root/stacklight-pytest/stacklight_tests/',
'tests/prometheus/', 'test_alerts.py')
for test in after_result: