Add InfluxDb HA tests

* Reboot mon node and check functionality
* Shutdown mon node and check functionality
* Kill influxdb on 1 node, and check functionality
* Kill influxdb-relay on 1 node and check functionality
* Gracefully stop influxdb on 2 nodes and check functionality
* Gracefully stop influxdb-relay on 2 nodes and check functionality

Change-Id: I21a123fa6ebb0f71250106f6df68cd5f412ad9fc
diff --git a/tcp_tests/managers/sl_manager.py b/tcp_tests/managers/sl_manager.py
index 3d8de25..2f76306 100644
--- a/tcp_tests/managers/sl_manager.py
+++ b/tcp_tests/managers/sl_manager.py
@@ -11,7 +11,7 @@
 #    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 #    License for the specific language governing permissions and limitations
 #    under the License.
-
+import json
 import os
 
 from devops.helpers import decorators
@@ -87,10 +87,14 @@
                             in self.__underlay.node_names()
                             if node_to_run in node_name]
         if skip_tests:
-            cmd = "cd {0}; pytest -k 'not {1}' {2}".format(
-                tests_path, skip_tests, test_to_run)
+            cmd = ("cd {0}; "
+                   "export VOLUME_STATUS='available'; "
+                   "pytest -k 'not {1}' {2}".format(
+                tests_path, skip_tests, test_to_run))
         else:
-            cmd = "cd {0}; pytest -k {1}".format(tests_path, test_to_run)
+            cmd = ("cd {0}; "
+                   "export VOLUME_STATUS='available'; "
+                   "pytest -k {1}".format(tests_path, test_to_run))
         with self.__underlay.remote(node_name=target_node_name[0]) \
                 as node_remote:
             LOG.debug("Run {0} on the node {1}".format(
@@ -99,6 +103,33 @@
             LOG.debug("Test execution result is {}".format(result))
         return result
 
+    def run_sl_tests_json(self, node_to_run, tests_path,
+                          test_to_run, skip_tests):
+        target_node_name = [node_name for node_name
+                            in self.__underlay.node_names()
+                            if node_to_run in node_name]
+        if skip_tests:
+            cmd = ("cd {0}; "
+                   "export VOLUME_STATUS='available'; "
+                   "pytest  --json=report.json -k 'not {1}' {2}".format(
+                tests_path, skip_tests, test_to_run))
+        else:
+            cmd = ("cd {0}; "
+                   "export VOLUME_STATUS='available'; "
+                   "pytest --json=report.json -k {1}".format(
+                tests_path, test_to_run))
+        with self.__underlay.remote(node_name=target_node_name[0]) \
+                as node_remote:
+            LOG.debug("Run {0} on the node {1}".format(
+                cmd, target_node_name[0]))
+            node_remote.execute('pip install pytest-json')
+            node_remote.execute(cmd)
+            res = node_remote.execute('cd {0}; cat report.json'.format(
+                tests_path))
+            LOG.debug("Test execution result is {}".format(res['stdout']))
+            result = json.loads(res['stdout'][0])
+        return result['report']['tests']
+
     def download_sl_test_report(self, stored_node, file_path):
         target_node_name = [node_name for node_name
                             in self.__underlay.node_names()
@@ -143,6 +174,8 @@
                 'Ip states after force restart {0}'.format(
                     self._salt.local(tgt='mon*',
                                      fun='cmd.run', args='ip a')))
+            self._salt.local(tgt="mon*", fun='cmd.run',
+                             args='systemctl restart keepalived')
             current_targets = prometheus_client.get_targets()
 
         LOG.debug('Current targets after install {0}'
@@ -151,3 +184,81 @@
         for entry in current_targets:
             assert 'up' in entry['health'], \
                 'Next target is down {}'.format(entry)
+
+    def kill_sl_service_on_node(self, node_sub_name, service_name):
+        target_node_name = [node_name for node_name
+                            in self.__underlay.node_names()
+                            if node_sub_name in node_name]
+        cmd = 'kill -9 $(pidof {0})'.format(service_name)
+        with self.__underlay.remote(node_name=target_node_name[0]) \
+                as node_remote:
+            LOG.debug("Run {0} on the node {1}".format(
+                cmd, target_node_name[0]))
+            res = node_remote.execute(cmd)
+            LOG.debug("Test execution result is {}".format(res))
+            assert res['exit_code'] == 0, (
+                'Unexpected exit code for command {0}, '
+                'current result {1}'.format(cmd, res))
+
+    def stop_sl_service_on_node(self, node_sub_name, service_name):
+        target_node_name = [node_name for node_name
+                            in self.__underlay.node_names()
+                            if node_sub_name in node_name]
+        cmd = 'systemctl stop {}'.format(service_name)
+        with self.__underlay.remote(node_name=target_node_name[0]) \
+                as node_remote:
+            LOG.debug("Run {0} on the node {1}".format(
+                cmd, target_node_name[0]))
+            res = node_remote.execute(cmd)
+            LOG.debug("Test execution result is {}".format(res))
+            assert res['exit_code'] == 0, (
+                'Unexpected exit code for command {0}, '
+                'current result {1}'.format(cmd, res))
+
+    def post_data_into_influx(self, node_sub_name):
+        target_node_name = [node_name for node_name
+                            in self.__underlay.node_names()
+                            if node_sub_name in node_name]
+        vip = self.get_sl_vip()
+        cmd = ("curl -POST 'http://{0}:8086/write?db=lma' -u "
+               "lma:lmapass --data-binary 'mymeas value=777'".format(vip))
+        with self.__underlay.remote(node_name=target_node_name[0]) \
+                as node_remote:
+            LOG.debug("Run {0} on the node {1}".format(
+                cmd, target_node_name[0]))
+            res = node_remote.execute(cmd)
+            assert res['exit_code'] == 0, (
+                'Unexpected exit code for command {0}, '
+                'current result {1}'.format(cmd, res))
+
+    def check_data_in_influxdb(self, node_sub_name):
+        target_node_name = [node_name for node_name
+                            in self.__underlay.node_names()
+                            if node_sub_name in node_name]
+        vip = self.get_sl_vip()
+        cmd = ("influx -host {0} -port 8086 -database lma  "
+               "-username lma -password lmapass -execute "
+               "'select * from mymeas' -precision rfc3339;".format(vip))
+        with self.__underlay.remote(node_name=target_node_name[0]) \
+                as node_remote:
+            LOG.debug("Run {0} on the node {1}".format(
+                cmd, target_node_name[0]))
+            res = node_remote.execute(cmd)
+            assert res['exit_code'] == 0, (
+                'Unexpected exit code for command {0}, '
+                'current result {1}'.format(cmd, res))
+            return res['stdout'][0].rstrip()
+
+    def start_service(self, node_sub_name, service_name):
+        target_node_name = [node_name for node_name
+                            in self.__underlay.node_names()
+                            if node_sub_name in node_name]
+        cmd = 'systemctl start {0}'.format(service_name)
+        with self.__underlay.remote(node_name=target_node_name[0]) \
+                as node_remote:
+            LOG.debug("Run {0} on the node {1}".format(
+                cmd, target_node_name[0]))
+            res = node_remote.execute(cmd)
+            assert res['exit_code'] == 0, (
+                'Unexpected exit code for command {0}, '
+                'current result {1}'.format(cmd, res))
diff --git a/tcp_tests/templates/virtual-mcp-ocata-dvr/salt.yaml b/tcp_tests/templates/virtual-mcp-ocata-dvr/salt.yaml
index 2a8414a..7b39455 100644
--- a/tcp_tests/templates/virtual-mcp-ocata-dvr/salt.yaml
+++ b/tcp_tests/templates/virtual-mcp-ocata-dvr/salt.yaml
@@ -17,7 +17,7 @@
 
 {{ SHARED.MACRO_RUN_SALT_MASTER_UNDERLAY_STATES() }}
 
-- description: "Workaround for PROD-14831 , add 'dns' role to ctl01 and ctl02 nodes"
+- description: "Workaround for PROD-14831 , add 'dns' role to cmp01 and cmp02 nodes"
   cmd: |
     set -e;
     apt-get -y install python-virtualenv python-pip build-essential python-dev libssl-dev;
diff --git a/tcp_tests/tests/system/test_failover.py b/tcp_tests/tests/system/test_failover.py
index a8bb6b8..11173ce 100644
--- a/tcp_tests/tests/system/test_failover.py
+++ b/tcp_tests/tests/system/test_failover.py
@@ -11,7 +11,6 @@
 #    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 #    License for the specific language governing permissions and limitations
 #    under the License.
-
 import pytest
 
 from tcp_tests import logger
@@ -148,8 +147,10 @@
             1. Prepare salt on hosts
             2. Setup controller nodes
             3. Setup compute, monitoring nodes
-            4. Restart mon01
-            5. Run LMA smoke after failover
+            4. Check targets before restart
+            5. Restart mon01
+            6. Check targets after restart
+            6. Run LMA smoke after failover
 
 
         """
@@ -160,15 +161,30 @@
 
         # STEP #4
         show_step(4)
-        openstack_actions.warm_restart_nodes('mon01')
+        mon_nodes = sl_deployed.get_monitoring_nodes()
+        LOG.debug('Mon nodes list {0}'.format(mon_nodes))
+        sl_deployed.check_prometheus_targets(mon_nodes)
+        before_result = sl_deployed.run_sl_tests_json(
+            'cfg01', '/root/stacklight-pytest/stacklight_tests/',
+            'tests/prometheus/', 'test_alerts.py')
+        failed_tests = [test['name'] for test in
+                        before_result if 'passed' not in test['outcome']]
         # STEP #5
         show_step(5)
+        openstack_actions.warm_restart_nodes('mon01')
+        # STEP #6
+        show_step(6)
+        sl_deployed.check_prometheus_targets(mon_nodes)
+        # STEP #7
+        show_step(7)
         # Run SL component tetsts
-        sl_deployed.run_sl_functional_tests(
-            'cfg01',
-            '/root/stacklight-pytest/stacklight_tests/',
-            'tests/prometheus/test_smoke.py',
-            'test_alerts.py')
+        after_result = sl_deployed.run_sl_tests_json(
+            'cfg01', '/root/stacklight-pytest/stacklight_tests/',
+            'tests/prometheus/', 'test_alerts.py')
+        for test in after_result:
+            if test['name'] not in failed_tests:
+                assert 'passed' in test['outcome'], \
+                    'Failed test {}'.format(test)
         LOG.info("*************** DONE **************")
 
     @pytest.mark.grab_versions
@@ -182,8 +198,9 @@
             1. Prepare salt on hosts
             2. Setup controller nodes
             3. Setup compute, monitoring nodes
-            4. Shutdown mon01
-            5. Run LMA smoke after failover
+            4. Check LMA before mon node shutdown
+            5. Shutdown mon01 node
+            6. Run LMA tests after failover
 
 
         """
@@ -194,12 +211,259 @@
 
         # STEP #4
         show_step(4)
-        openstack_actions.warm_shutdown_openstack_nodes('mon01')
+        mon_nodes = sl_deployed.get_monitoring_nodes()
+        LOG.debug('Mon nodes list {0}'.format(mon_nodes))
+        sl_deployed.check_prometheus_targets(mon_nodes)
+        before_result = sl_deployed.run_sl_tests_json(
+            'cfg01', '/root/stacklight-pytest/stacklight_tests/',
+            'tests/prometheus/', 'test_alerts.py')
+        failed_tests = [test['name'] for test in
+                        before_result if 'passed' not in test['outcome']]
         # STEP #5
         show_step(5)
-        sl_deployed.run_sl_functional_tests(
-            'cfg01',
-            '/root/stacklight-pytest/stacklight_tests/',
-            'tests/prometheus/test_smoke.py',
-            'test_alerts.py')
+        openstack_actions.warm_shutdown_openstack_nodes('mon01')
+        # STEP #6
+        show_step(6)
+        # Run SL component tetsts
+        after_result = sl_deployed.run_sl_tests_json(
+            'cfg01', '/root/stacklight-pytest/stacklight_tests/',
+            'tests/prometheus/', 'test_alerts.py')
+        for test in after_result:
+            if test['name'] not in failed_tests:
+                assert 'passed' in test['outcome'], \
+                    'Failed test {}'.format(test)
+        LOG.info("*************** DONE **************")
+
+    @pytest.mark.grab_versions
+    @pytest.mark.fail_snapshot
+    def test_kill_influxdb_relay_mon01_node(self, underlay,
+                                            openstack_deployed,
+                                            openstack_actions,
+                                            sl_deployed):
+        """Test kill influxdb relay on mon01 node
+
+        Scenario:
+            1. Prepare salt on hosts
+            2. Setup controller nodes
+            3. Setup compute, monitoring nodes
+            4. Check LMA before mon node shutdown
+            5. Kill influxdb relay on mon01 node
+            6. Post data into influx
+            7. Get data from all healthy nodes
+            8. Start influx db
+            9. Request data on mon01
+            10. Run LMA tests after fail and compare with result before fail
+
+
+        """
+        # STEP #1,2,3
+        # STEP #4
+        mon_nodes = sl_deployed.get_monitoring_nodes()
+        LOG.debug('Mon nodes list {0}'.format(mon_nodes))
+        before_result = sl_deployed.run_sl_tests_json(
+            'cfg01', '/root/stacklight-pytest/stacklight_tests/',
+            'tests/prometheus/', 'test_alerts.py')
+        failed_tests = [test['name'] for test in
+                        before_result if 'passed' not in test['outcome']]
+        # STEP #5
+
+        sl_deployed.kill_sl_service_on_node('mon01', 'influxdb-relay')
+        # STEP #6
+
+        sl_deployed.post_data_into_influx('mon02')
+        # STEP #7
+
+        assert 'mymeas' in sl_deployed.check_data_in_influxdb('mon02')
+        assert 'mymeas' in sl_deployed.check_data_in_influxdb('mon03')
+        # STEP #8
+
+        sl_deployed.start_service('mon01', 'influxdb-relay')
+        # STEP #9
+
+        assert 'mymeas' in sl_deployed.check_data_in_influxdb('mon01')
+        # STEP #10
+
+        after_result = sl_deployed.run_sl_tests_json(
+            'cfg01', '/root/stacklight-pytest/stacklight_tests/',
+            'tests/prometheus/', 'test_alerts.py')
+        for test in after_result:
+            if test['name'] not in failed_tests:
+                assert 'passed' in test['outcome'], \
+                    'Failed test {}'.format(test)
+        LOG.info("*************** DONE **************")
+
+    @pytest.mark.grab_versions
+    @pytest.mark.fail_snapshot
+    def test_kill_influxdb_mon01_node(self, underlay,
+                                      openstack_deployed,
+                                      openstack_actions,
+                                      sl_deployed):
+        """Test kill influxdb on mon01 node
+
+        Scenario:
+            1. Prepare salt on hosts
+            2. Setup controller nodes
+            3. Setup compute, monitoring nodes
+            4. Check LMA before mon node shutdown
+            5. Kill influxdb on mon01 node
+            6. Post data into influx
+            7. Get data from all healthy nodes
+            8. Start influx db
+            9. Request data on mon01
+            10. Run LMA tests after fail and compare with result before fail
+
+
+        """
+        # STEP #1,2,3
+
+        # STEP #4
+
+        mon_nodes = sl_deployed.get_monitoring_nodes()
+        LOG.debug('Mon nodes list {0}'.format(mon_nodes))
+        before_result = sl_deployed.run_sl_tests_json(
+            'cfg01', '/root/stacklight-pytest/stacklight_tests/',
+            'tests/prometheus/', 'test_alerts.py')
+        failed_tests = [test['name'] for test in
+                        before_result if 'passed' not in test['outcome']]
+        # STEP #5
+
+        sl_deployed.kill_sl_service_on_node('mon01', 'influxd')
+
+        sl_deployed.post_data_into_influx('mon02')
+        # STEP #7
+
+        assert 'mymeas' in sl_deployed.check_data_in_influxdb('mon02')
+        assert 'mymeas' in sl_deployed.check_data_in_influxdb('mon03')
+        # STEP #8
+
+        sl_deployed.start_service('mon01', 'influxd')
+        # STEP #9
+        assert 'mymeas' in sl_deployed.check_data_in_influxdb('mon01')
+        # STEP #10
+
+        after_result = sl_deployed.run_sl_tests_json(
+            'cfg01', '/root/stacklight-pytest/stacklight_tests/',
+            'tests/prometheus/', 'test_alerts.py')
+        for test in after_result:
+            if test['name'] not in failed_tests:
+                assert 'passed' in test['outcome'], \
+                    'Failed test {}'.format(test)
+        LOG.info("*************** DONE **************")
+
+    @pytest.mark.grab_versions
+    @pytest.mark.fail_snapshot
+    def test_stop_influxdb_relay_mon_nodes(self, underlay,
+                                            openstack_deployed,
+                                            openstack_actions,
+                                            sl_deployed):
+        """Test stop influxdb relay on mon01 node
+
+        Scenario:
+            1. Prepare salt on hosts
+            2. Setup controller nodes
+            3. Setup compute, monitoring nodes
+            4. Check LMA before mon node shutdown
+            5. Stop influxdb relay on mon01 and mon02 nodes
+            6. Post data into influx
+            7. Get data from all healthy nodes
+            8. Start influx db
+            9. Request data on mon01, 02
+            10. Run LMA tests after fail and compare with result before fail
+
+
+        """
+        # STEP #1,2,3
+        mon_nodes = sl_deployed.get_monitoring_nodes()
+        LOG.debug('Mon nodes list {0}'.format(mon_nodes))
+        before_result = sl_deployed.run_sl_tests_json(
+            'cfg01', '/root/stacklight-pytest/stacklight_tests/',
+            'tests/prometheus/', 'test_alerts.py')
+        failed_tests = [test['name'] for test in
+                        before_result if 'passed' not in test['outcome']]
+        # STEP #5
+
+        sl_deployed.stop_sl_service_on_node('mon01', 'influxdb-relay')
+        sl_deployed.stop_sl_service_on_node('mon02', 'influxdb-relay')
+        # STEP #6
+
+        sl_deployed.post_data_into_influx('mon03')
+        # STEP #7
+
+        assert 'mymeas' in sl_deployed.check_data_in_influxdb('mon03')
+        # STEP #8
+
+        sl_deployed.start_service('mon01', 'influxdb-relay')
+        sl_deployed.start_service('mon02', 'influxdb-relay')
+        # STEP #9
+
+        assert 'mymeas' in sl_deployed.check_data_in_influxdb('mon01')
+        assert 'mymeas' in sl_deployed.check_data_in_influxdb('mon02')
+        # STEP #10
+
+        after_result = sl_deployed.run_sl_tests_json(
+            'cfg01', '/root/stacklight-pytest/stacklight_tests/',
+            'tests/prometheus/', 'test_alerts.py')
+        for test in after_result:
+            if test['name'] not in failed_tests:
+                assert 'passed' in test['outcome'], \
+                    'Failed test {}'.format(test)
+        LOG.info("*************** DONE **************")
+
+    @pytest.mark.grab_versions
+    @pytest.mark.fail_snapshot
+    def test_stop_influxdb_mon_nodes(self, underlay,
+                                      openstack_deployed,
+                                      openstack_actions,
+                                      sl_deployed):
+        """Test stop influxdb on mon01 node
+
+        Scenario:
+            1. Prepare salt on hosts
+            2. Setup controller nodes
+            3. Setup compute, monitoring nodes
+            4. Check LMA before mon node shutdown
+            5. Stop influxdb on mon01 and mon02 node
+            6. Post data into influx
+            7. Get data from all healthy nodes
+            8. Start influx db
+            9. Request data on mon01
+            10. Run LMA tests after fail and compare with result before fail
+
+
+        """
+        # STEP #1,2,3
+        mon_nodes = sl_deployed.get_monitoring_nodes()
+        LOG.debug('Mon nodes list {0}'.format(mon_nodes))
+        before_result = sl_deployed.run_sl_tests_json(
+            'cfg01', '/root/stacklight-pytest/stacklight_tests/',
+            'tests/prometheus/', 'test_alerts.py')
+        failed_tests = [test['name'] for test in
+                        before_result if 'passed' not in test['outcome']]
+        # STEP #5
+
+        sl_deployed.stop_sl_service_on_node('mon01', 'influxdb')
+        sl_deployed.stop_sl_service_on_node('mon02', 'influxdb')
+        # STEP #6
+
+        sl_deployed.post_data_into_influx('mon03')
+        # STEP #7
+
+        assert 'mymeas' in sl_deployed.check_data_in_influxdb('mon03')
+        # STEP #8
+
+        sl_deployed.start_service('mon01', 'influxdb')
+        sl_deployed.start_service('mon02', 'influxdb')
+        # STEP #9
+
+        assert 'mymeas' in sl_deployed.check_data_in_influxdb('mon01')
+        assert 'mymeas' in sl_deployed.check_data_in_influxdb('mon02')
+        # STEP #10
+      
+        after_result = sl_deployed.run_sl_tests_json(
+            'cfg01', '/root/stacklight-pytest/stacklight_tests/',
+            'tests/prometheus/', 'test_alerts.py')
+        for test in after_result:
+            if test['name'] not in failed_tests:
+                assert 'passed' in test['outcome'], \
+                    'Failed test {}'.format(test)
         LOG.info("*************** DONE **************")