Add retry to check of the prometheus targets

* move common methods to sl_manager.py
* add retry to check prometheus targets
* fix address replace in salt-shared.yml

Change-Id: I33c6536c515ed0e967d11bf9cbf81899f399615e
Reviewed-on: https://review.gerrithub.io/378331
Reviewed-by: Dennis Dmitriev <dis.xcom@gmail.com>
Tested-by: Dennis Dmitriev <dis.xcom@gmail.com>
diff --git a/tcp_tests/managers/sl_manager.py b/tcp_tests/managers/sl_manager.py
index af75ff4..9ed13db 100644
--- a/tcp_tests/managers/sl_manager.py
+++ b/tcp_tests/managers/sl_manager.py
@@ -14,6 +14,8 @@
 
 import os
 
+from devops.helpers import decorators
+
 from tcp_tests.managers.execute_commands import ExecuteCommandsMixin
 from tcp_tests.managers.clients.prometheus import prometheus_client
 from tcp_tests import logger
@@ -97,3 +99,48 @@
             r.download(
                 destination=file_path,
                 target=os.getcwd())
+
+    def check_docker_services(self, nodes, expected_services):
+        """Check presense of the specified docker services on all the nodes
+        :param nodes: list of strings, names of nodes to check
+        :param expected_services: list of strings, names of services to find
+        """
+        for node in nodes:
+            services_status = self.get_service_info_from_node(node)
+            assert len(services_status) == len(expected_services), \
+                'Some services are missed on node {0}. ' \
+                'Current service list: {1}\nExpected service list: {2}' \
+                .format(node, services_status, expected_services)
+            for service in expected_services:
+                assert service in services_status,\
+                    'Missing service {0} in {1}'.format(service, services_status)
+                assert '0' not in services_status.get(service),\
+                    'Service {0} failed to start'.format(service)
+
+    @decorators.retry(AssertionError, count=10, delay=5)
+    def check_prometheus_targets(self, nodes):
+        """Check the status for Prometheus targets
+        :param nodes: list of strings, names of nodes with keepalived VIP
+        """
+        prometheus_client = self.api
+        try:
+            current_targets = prometheus_client.get_targets()
+        except:
+            LOG.info('Restarting keepalived service on mon nodes...')
+            for node in nodes:
+                self._salt.local(tgt=node, fun='cmd.run',
+                                       args='systemctl restart keepalived')
+            LOG.warning(
+                'Ip states after force restart {0}'.format(
+                    self._salt.local(tgt='mon*',
+                                           fun='cmd.run', args='ip a')))
+            current_targets = prometheus_client.get_targets()
+
+        LOG.debug('Current targets after install {0}'
+                  .format(current_targets))
+        # Assert that targets are up
+        for entry in current_targets:
+            assert 'up' in entry['health'], \
+                'Next target is down {}'.format(entry)
+
+
diff --git a/tcp_tests/templates/shared-salt.yaml b/tcp_tests/templates/shared-salt.yaml
index bc9a14e..8ce1570 100644
--- a/tcp_tests/templates/shared-salt.yaml
+++ b/tcp_tests/templates/shared-salt.yaml
@@ -117,21 +117,22 @@
     # 192.168.10 -> 10.16.0 (generated network for admin)
     # 10.16.0 -> <external network>
     # So let's replace constant networks to the keywords, and then keywords to the desired networks.
-    find /srv/salt/reclass/ -type f -exec sed -i 's/192\.168\.10\./==IPV4_NET_ADMIN_PREFIX==/g' {} +
-    find /srv/salt/reclass/ -type f -exec sed -i 's/172\.16\.10\./==IPV4_NET_CONTROL_PREFIX==/g' {} +
-    find /srv/salt/reclass/ -type f -exec sed -i 's/10\.1\.0\./==IPV4_NET_TENANT_PREFIX==/g' {} +
-    find /srv/salt/reclass/ -type f -exec sed -i 's/10\.16\.0\./==IPV4_NET_EXTERNAL_PREFIX==/g' {} +
+    export REPLACE_DIRS="/srv/salt/reclass/classes/ /srv/salt/reclass/nodes/"
+    find ${REPLACE_DIRS} -type f -exec sed -i 's/192\.168\.10\./==IPV4_NET_ADMIN_PREFIX==/g' {} +
+    find ${REPLACE_DIRS} -type f -exec sed -i 's/172\.16\.10\./==IPV4_NET_CONTROL_PREFIX==/g' {} +
+    find ${REPLACE_DIRS} -type f -exec sed -i 's/10\.1\.0\./==IPV4_NET_TENANT_PREFIX==/g' {} +
+    find ${REPLACE_DIRS} -type f -exec sed -i 's/10\.16\.0\./==IPV4_NET_EXTERNAL_PREFIX==/g' {} +
 
-    find /srv/salt/reclass/ -type f -exec sed -i 's/==IPV4_NET_ADMIN_PREFIX==/{{ IPV4_NET_ADMIN_PREFIX }}./g' {} +
-    find /srv/salt/reclass/ -type f -exec sed -i 's/==IPV4_NET_CONTROL_PREFIX==/{{ IPV4_NET_CONTROL_PREFIX }}./g' {} +
-    find /srv/salt/reclass/ -type f -exec sed -i 's/==IPV4_NET_TENANT_PREFIX==/{{ IPV4_NET_TENANT_PREFIX }}./g' {} +
-    find /srv/salt/reclass/ -type f -exec sed -i 's/==IPV4_NET_EXTERNAL_PREFIX==/{{ IPV4_NET_EXTERNAL_PREFIX }}./g' {} +
+    find ${REPLACE_DIRS} -type f -exec sed -i 's/==IPV4_NET_ADMIN_PREFIX==/{{ IPV4_NET_ADMIN_PREFIX }}./g' {} +
+    find ${REPLACE_DIRS} -type f -exec sed -i 's/==IPV4_NET_CONTROL_PREFIX==/{{ IPV4_NET_CONTROL_PREFIX }}./g' {} +
+    find ${REPLACE_DIRS} -type f -exec sed -i 's/==IPV4_NET_TENANT_PREFIX==/{{ IPV4_NET_TENANT_PREFIX }}./g' {} +
+    find ${REPLACE_DIRS} -type f -exec sed -i 's/==IPV4_NET_EXTERNAL_PREFIX==/{{ IPV4_NET_EXTERNAL_PREFIX }}./g' {} +
 
-    find /srv/salt/reclass/ -type f -exec sed -i 's/apt_mk_version:.*/apt_mk_version: {{ REPOSITORY_SUITE }}/g' {} +
+    find ${REPLACE_DIRS} -type f -exec sed -i 's/apt_mk_version:.*/apt_mk_version: {{ REPOSITORY_SUITE }}/g' {} +
 
     {%- if IS_CONTRAIL_LAB %}
     # vSRX IPs for tcp-qa images have 172.16.10.90 hardcoded
-    find /srv/salt/reclass/ -type f -exec sed -i 's/opencontrail_router01_address:.*/opencontrail_router01_address: 172.16.10.90/g' {} +
+    find ${REPLACE_DIRS} -type f -exec sed -i 's/opencontrail_router01_address:.*/opencontrail_router01_address: 172.16.10.90/g' {} +
     {%- endif %}
 
     # Disable checkouting the model from remote repository
diff --git a/tcp_tests/templates/virtual-mcp11-k8s-contrail/k8s.yaml b/tcp_tests/templates/virtual-mcp11-k8s-contrail/k8s.yaml
index 84766d9..8fc7977 100644
--- a/tcp_tests/templates/virtual-mcp11-k8s-contrail/k8s.yaml
+++ b/tcp_tests/templates/virtual-mcp11-k8s-contrail/k8s.yaml
@@ -21,8 +21,8 @@
 # TODO Remove workaround when linklocal on kube-api VIP on ens3 works fine
 - description: Replace kube-api VIP with IP of one controller
   cmd: |
-    find /srv/salt/reclass/ -type f -exec sed -i 's/ipf_addresses:\ \${_param:kubernetes_control_address}/ipf_addresses:\ \${_param:kubernetes_control_node01_address}/g' {} +
-    find /srv/salt/reclass/ -type f -exec sed -i 's/ipf_port:\ 443/ipf_port:\ 6443/g' {} +
+    find /srv/salt/reclass/classes/ -type f -exec sed -i 's/ipf_addresses:\ \${_param:kubernetes_control_address}/ipf_addresses:\ \${_param:kubernetes_control_node01_address}/g' {} +
+    find /srv/salt/reclass/classes/ -type f -exec sed -i 's/ipf_port:\ 443/ipf_port:\ 6443/g' {} +
   node_name: {{ HOSTNAME_CFG01 }}
   retry: {count: 1, delay: 1}
   skip_fail: false
diff --git a/tcp_tests/tests/system/test_install_mcp11_ovs_ocata.py b/tcp_tests/tests/system/test_install_mcp11_ovs_ocata.py
index 7c2f788..0ee71f4 100644
--- a/tcp_tests/tests/system/test_install_mcp11_ovs_ocata.py
+++ b/tcp_tests/tests/system/test_install_mcp11_ovs_ocata.py
@@ -46,7 +46,7 @@
     @pytest.mark.cz8119
     def test_mcp11_ocata_ovs_sl_install(self, underlay, config,
                                         openstack_deployed,
-                                        sl_deployed, sl_actions, show_step):
+                                        sl_deployed, show_step):
         """Test for deploying an mcp environment and check it
         Scenario:
         1. Prepare salt on hosts
@@ -54,9 +54,9 @@
         3. Setup compute nodes
         4. Get monitoring nodes
         5. Check that docker services are running
-        6. Check current targets are UP
-        7. Check grafana dashboards
-
+        6. Check current prometheus targets are UP
+        7. Run SL component tests
+        8. Download SL component tests report
         """
         expected_service_list = ['monitoring_remote_storage_adapter',
                                  'monitoring_server',
@@ -65,43 +65,25 @@
                                  'monitoring_alertmanager',
                                  'monitoring_remote_collector',
                                  'monitoring_pushgateway']
-        # STEP #4
-        mon_nodes = sl_actions.get_monitoring_nodes()
+        show_step(4)
+        mon_nodes = sl_deployed.get_monitoring_nodes()
         LOG.debug('Mon nodes list {0}'.format(mon_nodes))
-        for node in mon_nodes:
-            services_status = sl_actions.get_service_info_from_node(node)
-            assert len(services_status) == len(expected_service_list), \
-                'Some services are missed on node {0}. ' \
-                'Current service list {1}'.format(node, services_status)
-            for service in expected_service_list:
-                assert service in services_status, \
-                    'Missing service {0} in {1}'.format(service, services_status)
-                assert '0' not in services_status.get(service), \
-                    'Service {0} failed to start'.format(service)
-        prometheus_client = sl_deployed.api
-        try:
-            current_targets = prometheus_client.get_targets()
-            LOG.debug('Current targets after install {0}'.format(current_targets))
-        except:
-            LOG.info('Restarting keepalived service on mon nodes...')
-            sl_actions._salt.local(tgt='mon*', fun='cmd.run',
-                                   args='systemctl restart keepalived')
-            LOG.warning(
-                'Ip states after force restart {0}'.format(
-                    sl_actions._salt.local(tgt='mon*',
-                                           fun='cmd.run', args='ip a')))
-            current_targets = prometheus_client.get_targets()
-            LOG.debug('Current targets after install {0}'.format(current_targets))
-        # Assert that targets are up
-        for entry in current_targets:
-            assert 'up' in entry['health'], \
-                'Next target is down {}'.format(entry)
+
+        show_step(5)
+        sl_deployed.check_docker_services(mon_nodes, expected_service_list)
+
+        show_step(6)
+        sl_deployed.check_prometheus_targets(mon_nodes)
+
+        show_step(7)
         # Run SL component tetsts
-        sl_actions.run_sl_functional_tests(
+        sl_deployed.run_sl_functional_tests(
             'cfg01',
             '/root/stacklight-pytest/stacklight_tests/tests/prometheus')
+
+        show_step(8)
         # Download report
-        sl_actions.download_sl_test_report(
+        sl_deployed.download_sl_test_report(
             'cfg01',
             '/root/stacklight-pytest/stacklight_tests')
         LOG.info("*************** DONE **************")
@@ -128,7 +110,7 @@
     @pytest.mark.cz8120
     def test_mcp11_ocata_dvr_sl_install(self, underlay, config,
                                         openstack_deployed,
-                                        sl_deployed, sl_actions, show_step):
+                                        sl_deployed, show_step):
         """Test for deploying an mcp environment and check it
         Scenario:
         1. Prepare salt on hosts
@@ -136,9 +118,9 @@
         3. Setup compute nodes
         4. Get monitoring nodes
         5. Check that docker services are running
-        6. Check current targets are UP
-        7. Check grafana dashboards
-
+        6. Check current prometheus targets are UP
+        7. Run SL component tests
+        8. Download SL component tests report
         """
         expected_service_list = ['monitoring_remote_storage_adapter',
                                  'monitoring_server',
@@ -147,48 +129,25 @@
                                  'monitoring_alertmanager',
                                  'monitoring_remote_collector',
                                  'monitoring_pushgateway']
-        # STEP #4
-        mon_nodes = sl_actions.get_monitoring_nodes()
+        show_step(4)
+        mon_nodes = sl_deployed.get_monitoring_nodes()
         LOG.debug('Mon nodes list {0}'.format(mon_nodes))
-        for node in mon_nodes:
-            services_status = sl_actions.get_service_info_from_node(node)
-            assert len(services_status) == len(expected_service_list), \
-                'Some services are missed on node {0}. ' \
-                'Current service list {1}'.format(node, services_status)
-            for service in expected_service_list:
-                assert service in services_status,\
-                    'Missing service {0} in {1}'.format(service, services_status)
-                assert '0' not in services_status.get(service),\
-                    'Service {0} failed to start'.format(service)
-        prometheus_client = sl_deployed.api
-        try:
-            current_targets = prometheus_client.get_targets()
-            LOG.debug('Current targets after install {0}'.format(current_targets))
-        except:
-            LOG.info('Restarting keepalived service on mon nodes...')
-            sl_actions._salt.local(tgt='mon*', fun='cmd.run',
-                                   args='systemctl restart keepalived')
-            LOG.warning(
-                'Ip states after force restart {0}'.format(
-                    sl_actions._salt.local(tgt='mon*',
-                                           fun='cmd.run', args='ip a')))
-            current_targets = prometheus_client.get_targets()
-            LOG.debug('Current targets after install {0}'.format(current_targets))
-        # Assert that targets are up
-        for entry in current_targets:
-            assert 'up' in entry['health'], \
-                'Next target is down {}'.format(entry)
 
-            # Assert that targets are up
-            for entry in current_targets:
-                assert 'up' in entry['health'], \
-                    'Next target is down {}'.format(entry)
-        # Run SL component tetsts
-        sl_actions.run_sl_functional_tests(
+        show_step(5)
+        sl_deployed.check_docker_services(mon_nodes, expected_service_list)
+
+        show_step(6)
+        sl_deployed.check_prometheus_targets(mon_nodes)
+
+        show_step(7)
+        # Run SL component tests
+        sl_deployed.run_sl_functional_tests(
             'cfg01',
             '/root/stacklight-pytest/stacklight_tests/tests/prometheus')
+
+        show_step(8)
         # Download report
-        sl_actions.download_sl_test_report(
+        sl_deployed.download_sl_test_report(
             'cfg01',
             '/root/stacklight-pytest/stacklight_tests')
         LOG.info("*************** DONE **************")