Add InfluxDb HA tests
* Reboot mon node and check functionality
* Shutdown mon node and check functionality
* Kill influxdb on 1 node, and check functionality
* Kill influxdb-relay on 1 node and check functionality
* Gracefully stop influxdb on 2 nodes and check functionality
* Gracefully stop influxdb-relay on 2 nodes and check functionality
Change-Id: I21a123fa6ebb0f71250106f6df68cd5f412ad9fc
diff --git a/tcp_tests/managers/sl_manager.py b/tcp_tests/managers/sl_manager.py
index 3d8de25..2f76306 100644
--- a/tcp_tests/managers/sl_manager.py
+++ b/tcp_tests/managers/sl_manager.py
@@ -11,7 +11,7 @@
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
-
+import json
import os
from devops.helpers import decorators
@@ -87,10 +87,14 @@
in self.__underlay.node_names()
if node_to_run in node_name]
if skip_tests:
- cmd = "cd {0}; pytest -k 'not {1}' {2}".format(
- tests_path, skip_tests, test_to_run)
+ cmd = ("cd {0}; "
+ "export VOLUME_STATUS='available'; "
+ "pytest -k 'not {1}' {2}".format(
+ tests_path, skip_tests, test_to_run))
else:
- cmd = "cd {0}; pytest -k {1}".format(tests_path, test_to_run)
+ cmd = ("cd {0}; "
+ "export VOLUME_STATUS='available'; "
+ "pytest -k {1}".format(tests_path, test_to_run))
with self.__underlay.remote(node_name=target_node_name[0]) \
as node_remote:
LOG.debug("Run {0} on the node {1}".format(
@@ -99,6 +103,33 @@
LOG.debug("Test execution result is {}".format(result))
return result
+ def run_sl_tests_json(self, node_to_run, tests_path,
+ test_to_run, skip_tests):
+ target_node_name = [node_name for node_name
+ in self.__underlay.node_names()
+ if node_to_run in node_name]
+ if skip_tests:
+ cmd = ("cd {0}; "
+ "export VOLUME_STATUS='available'; "
+ "pytest --json=report.json -k 'not {1}' {2}".format(
+ tests_path, skip_tests, test_to_run))
+ else:
+ cmd = ("cd {0}; "
+ "export VOLUME_STATUS='available'; "
+ "pytest --json=report.json -k {1}".format(
+ tests_path, test_to_run))
+ with self.__underlay.remote(node_name=target_node_name[0]) \
+ as node_remote:
+ LOG.debug("Run {0} on the node {1}".format(
+ cmd, target_node_name[0]))
+ node_remote.execute('pip install pytest-json')
+ node_remote.execute(cmd)
+ res = node_remote.execute('cd {0}; cat report.json'.format(
+ tests_path))
+ LOG.debug("Test execution result is {}".format(res['stdout']))
+ result = json.loads(res['stdout'][0])
+ return result['report']['tests']
+
def download_sl_test_report(self, stored_node, file_path):
target_node_name = [node_name for node_name
in self.__underlay.node_names()
@@ -143,6 +174,8 @@
'Ip states after force restart {0}'.format(
self._salt.local(tgt='mon*',
fun='cmd.run', args='ip a')))
+ self._salt.local(tgt="mon*", fun='cmd.run',
+ args='systemctl restart keepalived')
current_targets = prometheus_client.get_targets()
LOG.debug('Current targets after install {0}'
@@ -151,3 +184,81 @@
for entry in current_targets:
assert 'up' in entry['health'], \
'Next target is down {}'.format(entry)
+
+ def kill_sl_service_on_node(self, node_sub_name, service_name):
+ target_node_name = [node_name for node_name
+ in self.__underlay.node_names()
+ if node_sub_name in node_name]
+ cmd = 'kill -9 $(pidof {0})'.format(service_name)
+ with self.__underlay.remote(node_name=target_node_name[0]) \
+ as node_remote:
+ LOG.debug("Run {0} on the node {1}".format(
+ cmd, target_node_name[0]))
+ res = node_remote.execute(cmd)
+ LOG.debug("Test execution result is {}".format(res))
+ assert res['exit_code'] == 0, (
+ 'Unexpected exit code for command {0}, '
+ 'current result {1}'.format(cmd, res))
+
+ def stop_sl_service_on_node(self, node_sub_name, service_name):
+ target_node_name = [node_name for node_name
+ in self.__underlay.node_names()
+ if node_sub_name in node_name]
+ cmd = 'systemctl stop {}'.format(service_name)
+ with self.__underlay.remote(node_name=target_node_name[0]) \
+ as node_remote:
+ LOG.debug("Run {0} on the node {1}".format(
+ cmd, target_node_name[0]))
+ res = node_remote.execute(cmd)
+ LOG.debug("Test execution result is {}".format(res))
+ assert res['exit_code'] == 0, (
+ 'Unexpected exit code for command {0}, '
+ 'current result {1}'.format(cmd, res))
+
+ def post_data_into_influx(self, node_sub_name):
+ target_node_name = [node_name for node_name
+ in self.__underlay.node_names()
+ if node_sub_name in node_name]
+ vip = self.get_sl_vip()
+ cmd = ("curl -POST 'http://{0}:8086/write?db=lma' -u "
+ "lma:lmapass --data-binary 'mymeas value=777'".format(vip))
+ with self.__underlay.remote(node_name=target_node_name[0]) \
+ as node_remote:
+ LOG.debug("Run {0} on the node {1}".format(
+ cmd, target_node_name[0]))
+ res = node_remote.execute(cmd)
+ assert res['exit_code'] == 0, (
+ 'Unexpected exit code for command {0}, '
+ 'current result {1}'.format(cmd, res))
+
+ def check_data_in_influxdb(self, node_sub_name):
+ target_node_name = [node_name for node_name
+ in self.__underlay.node_names()
+ if node_sub_name in node_name]
+ vip = self.get_sl_vip()
+ cmd = ("influx -host {0} -port 8086 -database lma "
+ "-username lma -password lmapass -execute "
+ "'select * from mymeas' -precision rfc3339;".format(vip))
+ with self.__underlay.remote(node_name=target_node_name[0]) \
+ as node_remote:
+ LOG.debug("Run {0} on the node {1}".format(
+ cmd, target_node_name[0]))
+ res = node_remote.execute(cmd)
+ assert res['exit_code'] == 0, (
+ 'Unexpected exit code for command {0}, '
+ 'current result {1}'.format(cmd, res))
+ return res['stdout'][0].rstrip()
+
+ def start_service(self, node_sub_name, service_name):
+ target_node_name = [node_name for node_name
+ in self.__underlay.node_names()
+ if node_sub_name in node_name]
+ cmd = 'systemctl start {0}'.format(service_name)
+ with self.__underlay.remote(node_name=target_node_name[0]) \
+ as node_remote:
+ LOG.debug("Run {0} on the node {1}".format(
+ cmd, target_node_name[0]))
+ res = node_remote.execute(cmd)
+ assert res['exit_code'] == 0, (
+ 'Unexpected exit code for command {0}, '
+ 'current result {1}'.format(cmd, res))
diff --git a/tcp_tests/templates/virtual-mcp-ocata-dvr/salt.yaml b/tcp_tests/templates/virtual-mcp-ocata-dvr/salt.yaml
index 2a8414a..7b39455 100644
--- a/tcp_tests/templates/virtual-mcp-ocata-dvr/salt.yaml
+++ b/tcp_tests/templates/virtual-mcp-ocata-dvr/salt.yaml
@@ -17,7 +17,7 @@
{{ SHARED.MACRO_RUN_SALT_MASTER_UNDERLAY_STATES() }}
-- description: "Workaround for PROD-14831 , add 'dns' role to ctl01 and ctl02 nodes"
+- description: "Workaround for PROD-14831 , add 'dns' role to cmp01 and cmp02 nodes"
cmd: |
set -e;
apt-get -y install python-virtualenv python-pip build-essential python-dev libssl-dev;
diff --git a/tcp_tests/tests/system/test_failover.py b/tcp_tests/tests/system/test_failover.py
index a8bb6b8..11173ce 100644
--- a/tcp_tests/tests/system/test_failover.py
+++ b/tcp_tests/tests/system/test_failover.py
@@ -11,7 +11,6 @@
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
-
import pytest
from tcp_tests import logger
@@ -148,8 +147,10 @@
1. Prepare salt on hosts
2. Setup controller nodes
3. Setup compute, monitoring nodes
- 4. Restart mon01
- 5. Run LMA smoke after failover
+ 4. Check targets before restart
+ 5. Restart mon01
+ 6. Check targets after restart
+ 6. Run LMA smoke after failover
"""
@@ -160,15 +161,30 @@
# STEP #4
show_step(4)
- openstack_actions.warm_restart_nodes('mon01')
+ mon_nodes = sl_deployed.get_monitoring_nodes()
+ LOG.debug('Mon nodes list {0}'.format(mon_nodes))
+ sl_deployed.check_prometheus_targets(mon_nodes)
+ before_result = sl_deployed.run_sl_tests_json(
+ 'cfg01', '/root/stacklight-pytest/stacklight_tests/',
+ 'tests/prometheus/', 'test_alerts.py')
+ failed_tests = [test['name'] for test in
+ before_result if 'passed' not in test['outcome']]
# STEP #5
show_step(5)
+ openstack_actions.warm_restart_nodes('mon01')
+ # STEP #6
+ show_step(6)
+ sl_deployed.check_prometheus_targets(mon_nodes)
+ # STEP #7
+ show_step(7)
# Run SL component tetsts
- sl_deployed.run_sl_functional_tests(
- 'cfg01',
- '/root/stacklight-pytest/stacklight_tests/',
- 'tests/prometheus/test_smoke.py',
- 'test_alerts.py')
+ after_result = sl_deployed.run_sl_tests_json(
+ 'cfg01', '/root/stacklight-pytest/stacklight_tests/',
+ 'tests/prometheus/', 'test_alerts.py')
+ for test in after_result:
+ if test['name'] not in failed_tests:
+ assert 'passed' in test['outcome'], \
+ 'Failed test {}'.format(test)
LOG.info("*************** DONE **************")
@pytest.mark.grab_versions
@@ -182,8 +198,9 @@
1. Prepare salt on hosts
2. Setup controller nodes
3. Setup compute, monitoring nodes
- 4. Shutdown mon01
- 5. Run LMA smoke after failover
+ 4. Check LMA before mon node shutdown
+ 5. Shutdown mon01 node
+ 6. Run LMA tests after failover
"""
@@ -194,12 +211,259 @@
# STEP #4
show_step(4)
- openstack_actions.warm_shutdown_openstack_nodes('mon01')
+ mon_nodes = sl_deployed.get_monitoring_nodes()
+ LOG.debug('Mon nodes list {0}'.format(mon_nodes))
+ sl_deployed.check_prometheus_targets(mon_nodes)
+ before_result = sl_deployed.run_sl_tests_json(
+ 'cfg01', '/root/stacklight-pytest/stacklight_tests/',
+ 'tests/prometheus/', 'test_alerts.py')
+ failed_tests = [test['name'] for test in
+ before_result if 'passed' not in test['outcome']]
# STEP #5
show_step(5)
- sl_deployed.run_sl_functional_tests(
- 'cfg01',
- '/root/stacklight-pytest/stacklight_tests/',
- 'tests/prometheus/test_smoke.py',
- 'test_alerts.py')
+ openstack_actions.warm_shutdown_openstack_nodes('mon01')
+ # STEP #6
+ show_step(6)
+ # Run SL component tetsts
+ after_result = sl_deployed.run_sl_tests_json(
+ 'cfg01', '/root/stacklight-pytest/stacklight_tests/',
+ 'tests/prometheus/', 'test_alerts.py')
+ for test in after_result:
+ if test['name'] not in failed_tests:
+ assert 'passed' in test['outcome'], \
+ 'Failed test {}'.format(test)
+ LOG.info("*************** DONE **************")
+
+ @pytest.mark.grab_versions
+ @pytest.mark.fail_snapshot
+ def test_kill_influxdb_relay_mon01_node(self, underlay,
+ openstack_deployed,
+ openstack_actions,
+ sl_deployed):
+ """Test kill influxdb relay on mon01 node
+
+ Scenario:
+ 1. Prepare salt on hosts
+ 2. Setup controller nodes
+ 3. Setup compute, monitoring nodes
+ 4. Check LMA before mon node shutdown
+ 5. Kill influxdb relay on mon01 node
+ 6. Post data into influx
+ 7. Get data from all healthy nodes
+ 8. Start influx db
+ 9. Request data on mon01
+ 10. Run LMA tests after fail and compare with result before fail
+
+
+ """
+ # STEP #1,2,3
+ # STEP #4
+ mon_nodes = sl_deployed.get_monitoring_nodes()
+ LOG.debug('Mon nodes list {0}'.format(mon_nodes))
+ before_result = sl_deployed.run_sl_tests_json(
+ 'cfg01', '/root/stacklight-pytest/stacklight_tests/',
+ 'tests/prometheus/', 'test_alerts.py')
+ failed_tests = [test['name'] for test in
+ before_result if 'passed' not in test['outcome']]
+ # STEP #5
+
+ sl_deployed.kill_sl_service_on_node('mon01', 'influxdb-relay')
+ # STEP #6
+
+ sl_deployed.post_data_into_influx('mon02')
+ # STEP #7
+
+ assert 'mymeas' in sl_deployed.check_data_in_influxdb('mon02')
+ assert 'mymeas' in sl_deployed.check_data_in_influxdb('mon03')
+ # STEP #8
+
+ sl_deployed.start_service('mon01', 'influxdb-relay')
+ # STEP #9
+
+ assert 'mymeas' in sl_deployed.check_data_in_influxdb('mon01')
+ # STEP #10
+
+ after_result = sl_deployed.run_sl_tests_json(
+ 'cfg01', '/root/stacklight-pytest/stacklight_tests/',
+ 'tests/prometheus/', 'test_alerts.py')
+ for test in after_result:
+ if test['name'] not in failed_tests:
+ assert 'passed' in test['outcome'], \
+ 'Failed test {}'.format(test)
+ LOG.info("*************** DONE **************")
+
+ @pytest.mark.grab_versions
+ @pytest.mark.fail_snapshot
+ def test_kill_influxdb_mon01_node(self, underlay,
+ openstack_deployed,
+ openstack_actions,
+ sl_deployed):
+ """Test kill influxdb on mon01 node
+
+ Scenario:
+ 1. Prepare salt on hosts
+ 2. Setup controller nodes
+ 3. Setup compute, monitoring nodes
+ 4. Check LMA before mon node shutdown
+ 5. Kill influxdb on mon01 node
+ 6. Post data into influx
+ 7. Get data from all healthy nodes
+ 8. Start influx db
+ 9. Request data on mon01
+ 10. Run LMA tests after fail and compare with result before fail
+
+
+ """
+ # STEP #1,2,3
+
+ # STEP #4
+
+ mon_nodes = sl_deployed.get_monitoring_nodes()
+ LOG.debug('Mon nodes list {0}'.format(mon_nodes))
+ before_result = sl_deployed.run_sl_tests_json(
+ 'cfg01', '/root/stacklight-pytest/stacklight_tests/',
+ 'tests/prometheus/', 'test_alerts.py')
+ failed_tests = [test['name'] for test in
+ before_result if 'passed' not in test['outcome']]
+ # STEP #5
+
+ sl_deployed.kill_sl_service_on_node('mon01', 'influxd')
+
+ sl_deployed.post_data_into_influx('mon02')
+ # STEP #7
+
+ assert 'mymeas' in sl_deployed.check_data_in_influxdb('mon02')
+ assert 'mymeas' in sl_deployed.check_data_in_influxdb('mon03')
+ # STEP #8
+
+ sl_deployed.start_service('mon01', 'influxd')
+ # STEP #9
+ assert 'mymeas' in sl_deployed.check_data_in_influxdb('mon01')
+ # STEP #10
+
+ after_result = sl_deployed.run_sl_tests_json(
+ 'cfg01', '/root/stacklight-pytest/stacklight_tests/',
+ 'tests/prometheus/', 'test_alerts.py')
+ for test in after_result:
+ if test['name'] not in failed_tests:
+ assert 'passed' in test['outcome'], \
+ 'Failed test {}'.format(test)
+ LOG.info("*************** DONE **************")
+
+ @pytest.mark.grab_versions
+ @pytest.mark.fail_snapshot
+ def test_stop_influxdb_relay_mon_nodes(self, underlay,
+ openstack_deployed,
+ openstack_actions,
+ sl_deployed):
+ """Test stop influxdb relay on mon01 node
+
+ Scenario:
+ 1. Prepare salt on hosts
+ 2. Setup controller nodes
+ 3. Setup compute, monitoring nodes
+ 4. Check LMA before mon node shutdown
+ 5. Stop influxdb relay on mon01 and mon02 nodes
+ 6. Post data into influx
+ 7. Get data from all healthy nodes
+ 8. Start influx db
+ 9. Request data on mon01, 02
+ 10. Run LMA tests after fail and compare with result before fail
+
+
+ """
+ # STEP #1,2,3
+ mon_nodes = sl_deployed.get_monitoring_nodes()
+ LOG.debug('Mon nodes list {0}'.format(mon_nodes))
+ before_result = sl_deployed.run_sl_tests_json(
+ 'cfg01', '/root/stacklight-pytest/stacklight_tests/',
+ 'tests/prometheus/', 'test_alerts.py')
+ failed_tests = [test['name'] for test in
+ before_result if 'passed' not in test['outcome']]
+ # STEP #5
+
+ sl_deployed.stop_sl_service_on_node('mon01', 'influxdb-relay')
+ sl_deployed.stop_sl_service_on_node('mon02', 'influxdb-relay')
+ # STEP #6
+
+ sl_deployed.post_data_into_influx('mon03')
+ # STEP #7
+
+ assert 'mymeas' in sl_deployed.check_data_in_influxdb('mon03')
+ # STEP #8
+
+ sl_deployed.start_service('mon01', 'influxdb-relay')
+ sl_deployed.start_service('mon02', 'influxdb-relay')
+ # STEP #9
+
+ assert 'mymeas' in sl_deployed.check_data_in_influxdb('mon01')
+ assert 'mymeas' in sl_deployed.check_data_in_influxdb('mon02')
+ # STEP #10
+
+ after_result = sl_deployed.run_sl_tests_json(
+ 'cfg01', '/root/stacklight-pytest/stacklight_tests/',
+ 'tests/prometheus/', 'test_alerts.py')
+ for test in after_result:
+ if test['name'] not in failed_tests:
+ assert 'passed' in test['outcome'], \
+ 'Failed test {}'.format(test)
+ LOG.info("*************** DONE **************")
+
+ @pytest.mark.grab_versions
+ @pytest.mark.fail_snapshot
+ def test_stop_influxdb_mon_nodes(self, underlay,
+ openstack_deployed,
+ openstack_actions,
+ sl_deployed):
+ """Test stop influxdb on mon01 node
+
+ Scenario:
+ 1. Prepare salt on hosts
+ 2. Setup controller nodes
+ 3. Setup compute, monitoring nodes
+ 4. Check LMA before mon node shutdown
+ 5. Stop influxdb on mon01 and mon02 node
+ 6. Post data into influx
+ 7. Get data from all healthy nodes
+ 8. Start influx db
+ 9. Request data on mon01
+ 10. Run LMA tests after fail and compare with result before fail
+
+
+ """
+ # STEP #1,2,3
+ mon_nodes = sl_deployed.get_monitoring_nodes()
+ LOG.debug('Mon nodes list {0}'.format(mon_nodes))
+ before_result = sl_deployed.run_sl_tests_json(
+ 'cfg01', '/root/stacklight-pytest/stacklight_tests/',
+ 'tests/prometheus/', 'test_alerts.py')
+ failed_tests = [test['name'] for test in
+ before_result if 'passed' not in test['outcome']]
+ # STEP #5
+
+ sl_deployed.stop_sl_service_on_node('mon01', 'influxdb')
+ sl_deployed.stop_sl_service_on_node('mon02', 'influxdb')
+ # STEP #6
+
+ sl_deployed.post_data_into_influx('mon03')
+ # STEP #7
+
+ assert 'mymeas' in sl_deployed.check_data_in_influxdb('mon03')
+ # STEP #8
+
+ sl_deployed.start_service('mon01', 'influxdb')
+ sl_deployed.start_service('mon02', 'influxdb')
+ # STEP #9
+
+ assert 'mymeas' in sl_deployed.check_data_in_influxdb('mon01')
+ assert 'mymeas' in sl_deployed.check_data_in_influxdb('mon02')
+ # STEP #10
+
+ after_result = sl_deployed.run_sl_tests_json(
+ 'cfg01', '/root/stacklight-pytest/stacklight_tests/',
+ 'tests/prometheus/', 'test_alerts.py')
+ for test in after_result:
+ if test['name'] not in failed_tests:
+ assert 'passed' in test['outcome'], \
+ 'Failed test {}'.format(test)
LOG.info("*************** DONE **************")