Add smoke checks for Sl deploy

* check that needed services are run in docker
* check that all targets in prometheus are up
* add intial deploy in octa ovs

Change-Id: Id12c631aca99ecff82c4d36bfc4f278cd52bcbe4
Reviewed-on: https://review.gerrithub.io/376416
Reviewed-by: Tatyanka Leontovich <tleontovich@mirantis.com>
Tested-by: Tatyanka Leontovich <tleontovich@mirantis.com>
diff --git a/tcp_tests/managers/sl_manager.py b/tcp_tests/managers/sl_manager.py
index 23d362e..ccb6030 100644
--- a/tcp_tests/managers/sl_manager.py
+++ b/tcp_tests/managers/sl_manager.py
@@ -14,6 +14,9 @@
 
 from tcp_tests.managers.execute_commands import ExecuteCommandsMixin
 from tcp_tests.managers.clients.prometheus import prometheus_client
+from tcp_tests import logger
+
+LOG = logger.logger
 
 
 class SLManager(ExecuteCommandsMixin):
@@ -57,3 +60,19 @@
                 port=self.__config.stack_light.sl_prometheus_port,
                 proto=self.__config.stack_light.sl_prometheus_proto)
         return self._p_client
+
+    def get_monitoring_nodes(self):
+        return [node_name for node_name
+                in self.__underlay.node_names() if 'mon' in node_name]
+
+    def get_service_info_from_node(self, node_name):
+        service_stat_dict = {}
+        with self.__underlay.remote(node_name=node_name) as node_remote:
+            result = node_remote.execute(
+                "docker service ls --format '{{.Name}}:{{.Replicas}}'")
+            LOG.debug("Service ls result {0} from node {1}".format(
+                result['stdout'], node_name))
+            for line in result['stdout']:
+                tmp = line.split(':')
+                service_stat_dict.update({tmp[0]: tmp[1]})
+        return service_stat_dict
diff --git a/tcp_tests/templates/virtual-mcp-ocata-ovs/sl.yaml b/tcp_tests/templates/virtual-mcp-ocata-ovs/sl.yaml
new file mode 100644
index 0000000..13237ee
--- /dev/null
+++ b/tcp_tests/templates/virtual-mcp-ocata-ovs/sl.yaml
@@ -0,0 +1,184 @@
+{% from 'virtual-mcp-ocata/underlay.yaml' import HOSTNAME_CFG01 with context %}
+
+# Install docker swarm
+- description: Install keepalived on mon nodes
+  cmd: salt --hard-crash --state-output=mixed --state-verbose=False
+    -C 'mon*' state.sls keepalived
+  node_name: {{ HOSTNAME_CFG01 }}
+  retry: {count: 1, delay: 10}
+  skip_fail: false
+
+- description: Check the VIP on mon nodes
+  cmd: |
+    SL_VIP=`salt-call --out=newline_values_only pillar.get _param:stacklight_monitor_address`;
+    echo "_param:stacklight_monitor_address (vip): ${SL_VIP}";
+    salt --hard-crash --state-output=mixed --state-verbose=False -C 'mon*' cmd.run "ip a | grep ${SL_VIP}" | grep -B1 ${SL_VIP}
+  node_name: {{ HOSTNAME_CFG01 }}
+  retry: {count: 1, delay: 5}
+  skip_fail: false
+
+- description: Configure docker service
+  cmd: salt -C 'I@docker:swarm' state.sls docker.host
+  node_name: {{ HOSTNAME_CFG01 }}
+  retry: {count: 1, delay: 10}
+  skip_fail: false
+
+- description: Install docker swarm on master node
+  cmd: salt -C 'I@docker:swarm:role:master' state.sls docker.swarm
+  node_name: {{ HOSTNAME_CFG01 }}
+  retry: {count: 1, delay: 10}
+  skip_fail: false
+
+- description: Send grains to the swarm slave nodes
+  cmd: salt -C 'I@docker:swarm' state.sls salt.minion.grains
+  node_name: {{ HOSTNAME_CFG01 }}
+  retry: {count: 1, delay: 10}
+  skip_fail: false
+
+- description:  Update mine
+  cmd: salt -C 'I@docker:swarm' mine.update
+  node_name: {{ HOSTNAME_CFG01 }}
+  retry: {count: 1, delay: 10}
+  skip_fail: false
+
+- description:  Refresh modules
+  cmd: salt -C 'I@docker:swarm' saltutil.refresh_modules; sleep 5;
+  node_name: {{ HOSTNAME_CFG01 }}
+  retry: {count: 1, delay: 10}
+  skip_fail: false
+
+- description:  Rerun swarm on slaves to proper token population
+  cmd: salt -C 'I@docker:swarm:role:master' state.sls docker.swarm
+  node_name: {{ HOSTNAME_CFG01 }}
+  retry: {count: 1, delay: 10}
+  skip_fail: false
+
+- description:  Configure slave nodes
+  cmd: salt -C 'I@docker:swarm:role:manager' state.sls docker.swarm -b 1
+  node_name: {{ HOSTNAME_CFG01 }}
+  retry: {count: 1, delay: 10}
+  skip_fail: false
+
+- description:  List registered Docker swarm nodes
+  cmd: salt -C 'I@docker:swarm:role:master' cmd.run 'docker node ls'
+  node_name: {{ HOSTNAME_CFG01 }}
+  retry: {count: 1, delay: 10}
+  skip_fail: false
+
+# Install slv2 infra
+- description: Install telegraf
+  cmd: salt -C 'I@telegraf:agent or I@telegraf:remote_agent' state.sls telegraf
+  node_name: {{ HOSTNAME_CFG01 }}
+  retry: {count: 2, delay: 10}
+  skip_fail: false
+
+- description: Configure Prometheus exporters
+  cmd: salt -C 'I@prometheus:exporters' state.sls prometheus
+  node_name: {{ HOSTNAME_CFG01 }}
+  retry: {count: 1, delay: 10}
+  skip_fail: false
+
+- description: Configure collector
+  cmd: salt -C 'I@heka:log_collector' state.sls heka.log_collector
+  node_name: {{ HOSTNAME_CFG01 }}
+  retry: {count: 1, delay: 10}
+  skip_fail: false
+
+- description: Install elasticsearch server
+  cmd: salt -C 'I@elasticsearch:server' state.sls elasticsearch.server -b 1
+  node_name: {{ HOSTNAME_CFG01 }}
+  retry: {count: 1, delay: 10}
+  skip_fail: false
+
+- description: Install kibana server
+  cmd: salt -C 'I@kibana:server' state.sls kibana.server -b 1
+  node_name: {{ HOSTNAME_CFG01 }}
+  retry: {count: 1, delay: 10}
+  skip_fail: false
+
+- description: Install elasticsearch client
+  cmd: salt -C 'I@elasticsearch:client' state.sls elasticsearch.client
+  node_name: {{ HOSTNAME_CFG01 }}
+  retry: {count: 1, delay: 10}
+  skip_fail: false
+
+- description: Install kibana client
+  cmd: salt -C 'I@kibana:client' state.sls kibana.client
+  node_name: {{ HOSTNAME_CFG01 }}
+  retry: {count: 1, delay: 10}
+  skip_fail: false
+
+- description: Check influix db
+  cmd: |
+    INFLUXDB_SERVICE=`salt -C 'I@influxdb:server' test.ping 1>/dev/null 2>&1 && echo true`;
+    echo "Influxdb service presence: ${INFLUXDB_SERVICE}";
+    if [[ "$INFLUXDB_SERVICE" == "true" ]]; then
+        salt -C 'I@influxdb:server' state.sls influxdb
+    fi
+  node_name: {{ HOSTNAME_CFG01 }}
+  retry: {count: 1, delay: 5}
+  skip_fail: true
+
+# Collect grains needed to configure the services
+
+- description: Get grains
+  cmd: salt -C 'I@salt:minion' state.sls salt.minion.grains
+  node_name: {{ HOSTNAME_CFG01 }}
+  retry: {count: 1, delay: 10}
+  skip_fail: false
+
+- description: Sync modules
+  cmd: salt -C 'I@salt:minion' saltutil.refresh_modules
+  node_name: {{ HOSTNAME_CFG01 }}
+  retry: {count: 1, delay: 10}
+  skip_fail: false
+
+- description: Update mine
+  cmd: salt -C 'I@salt:minion' mine.update; sleep 5;
+  node_name: {{ HOSTNAME_CFG01 }}
+  retry: {count: 1, delay: 10}
+  skip_fail: false
+
+# Change environment configuration before deploy
+- description: Set SL docker images deploy parameters
+  cmd: |
+  {% for sl_opt, value in config.sl_deploy.items() %}
+    {% if value|string() %}
+    salt-call reclass.cluster_meta_set {{ sl_opt }} {{ value }};
+    {% endif %}
+  {% endfor %}
+  node_name: {{ HOSTNAME_CFG01 }}
+  retry: {count: 1, delay: 1}
+  skip_fail: false
+
+# Configure the services running in Docker Swarm
+- description: Install prometheus alertmanager
+  cmd: salt -C 'I@docker:swarm' state.sls prometheus,heka.remote_collector -b 1
+  node_name: {{ HOSTNAME_CFG01 }}
+  retry: {count: 1, delay: 10}
+  skip_fail: false
+
+- description: run docker state
+  cmd: salt -C 'I@docker:swarm:role:master' state.sls docker
+  node_name: {{ HOSTNAME_CFG01 }}
+  retry: {count: 1, delay: 10}
+  skip_fail: false
+
+- description: docker ps
+  cmd: salt -C 'I@docker:swarm' dockerng.ps
+  node_name: {{ HOSTNAME_CFG01 }}
+  retry: {count: 1, delay: 10}
+  skip_fail: false
+
+- description: Configure Grafana dashboards and datasources
+  cmd: sleep 30;  salt -C 'I@grafana:client' state.sls grafana.client
+  node_name: {{ HOSTNAME_CFG01 }}
+  retry: {count: 2, delay: 10}
+  skip_fail: false
+
+- description: Run salt minion to create cert files
+  cmd: salt --hard-crash --state-output=mixed --state-verbose=False "*" state.sls salt.minion
+  node_name: {{ HOSTNAME_CFG01 }}
+  retry: {count: 1, delay: 10}
+  skip_fail: false
+
diff --git a/tcp_tests/templates/virtual-mcp-ocata-ovs/underlay.yaml b/tcp_tests/templates/virtual-mcp-ocata-ovs/underlay.yaml
index fc0b675..222ba69 100644
--- a/tcp_tests/templates/virtual-mcp-ocata-ovs/underlay.yaml
+++ b/tcp_tests/templates/virtual-mcp-ocata-ovs/underlay.yaml
@@ -19,6 +19,9 @@
 {% set HOSTNAME_CTL03 = os_env('HOSTNAME_CTL03', 'ctl03.' + DOMAIN_NAME) %}
 {% set HOSTNAME_CMP01 = os_env('HOSTNAME_CMP01', 'cmp01.' + DOMAIN_NAME) %}
 {% set HOSTNAME_CMP02 = os_env('HOSTNAME_CMP02', 'cmp02.' + DOMAIN_NAME) %}
+{% set HOSTNAME_MON01 = os_env('HOSTNAME_MON01', 'mon01.' + DOMAIN_NAME) %}
+{% set HOSTNAME_MON02 = os_env('HOSTNAME_MON02', 'mon02.' + DOMAIN_NAME) %}
+{% set HOSTNAME_MON03 = os_env('HOSTNAME_MON03', 'mon03.' + DOMAIN_NAME) %}
 {% set HOSTNAME_GTW01 = os_env('HOSTNAME_GTW01', 'gtw01.' + DOMAIN_NAME) %}
 {% set HOSTNAME_PRX01 = os_env('HOSTNAME_PRX01', 'prx01.' + DOMAIN_NAME) %}
 
@@ -39,6 +42,9 @@
             default_{{ HOSTNAME_CTL03 }}: +103
             default_{{ HOSTNAME_CMP01 }}: +105
             default_{{ HOSTNAME_CMP02 }}: +106
+            default_{{ HOSTNAME_MON01 }}: +107
+            default_{{ HOSTNAME_MON02 }}: +108
+            default_{{ HOSTNAME_MON03 }}: +109
             default_{{ HOSTNAME_GTW01 }}: +110
             default_{{ HOSTNAME_PRX01 }}: +121
           ip_ranges:
@@ -56,6 +62,9 @@
             default_{{ HOSTNAME_CTL03 }}: +103
             default_{{ HOSTNAME_CMP01 }}: +105
             default_{{ HOSTNAME_CMP02 }}: +106
+            default_{{ HOSTNAME_MON01 }}: +107
+            default_{{ HOSTNAME_MON02 }}: +108
+            default_{{ HOSTNAME_MON03 }}: +109
             default_{{ HOSTNAME_GTW01 }}: +110
             default_{{ HOSTNAME_PRX01 }}: +121
           ip_ranges:
@@ -73,6 +82,9 @@
             default_{{ HOSTNAME_CTL03 }}: +103
             default_{{ HOSTNAME_CMP01 }}: +105
             default_{{ HOSTNAME_CMP02 }}: +106
+            default_{{ HOSTNAME_MON01 }}: +107
+            default_{{ HOSTNAME_MON02 }}: +108
+            default_{{ HOSTNAME_MON03 }}: +109
             default_{{ HOSTNAME_GTW01 }}: +110
             default_{{ HOSTNAME_PRX01 }}: +121
           ip_ranges:
@@ -90,6 +102,9 @@
             default_{{ HOSTNAME_CTL03 }}: +103
             default_{{ HOSTNAME_CMP01 }}: +105
             default_{{ HOSTNAME_CMP02 }}: +106
+            default_{{ HOSTNAME_MON01 }}: +107
+            default_{{ HOSTNAME_MON02 }}: +108
+            default_{{ HOSTNAME_MON03 }}: +109
             default_{{ HOSTNAME_GTW01 }}: +110
             default_{{ HOSTNAME_PRX01 }}: +121
           ip_ranges:
@@ -280,12 +295,89 @@
 
               interfaces: *interfaces
               network_config: *network_config
+              - name: {{ HOSTNAME_MON01 }}
+            role: salt_minion
+            params:
+              vcpu: !os_env SLAVE_NODE_CPU, 3
+              memory: !os_env SLAVE_NODE_MEMORY, 2048
+              boot:
+                - hd
+              cloud_init_volume_name: iso
+              cloud_init_iface_up: ens3
+              volumes:
+                - name: system
+                  capacity: !os_env NODE_VOLUME_SIZE, 150
+                  backing_store: cloudimage1604
+                  format: qcow2
+                - name: iso  # Volume with name 'iso' will be used
+                             # for store image with cloud-init metadata.
+                  capacity: 1
+                  format: raw
+                  device: cdrom
+                  bus: ide
+                  cloudinit_meta_data: !include underlay--meta-data.yaml
+                  cloudinit_user_data: !include underlay--user-data1604.yaml
+
+              interfaces: *interfaces
+              network_config: *network_config
+
+          - name: {{ HOSTNAME_MON02 }}
+            role: salt_minion
+            params:
+              vcpu: !os_env SLAVE_NODE_CPU, 3
+              memory: !os_env SLAVE_NODE_MEMORY, 2048
+              boot:
+                - hd
+              cloud_init_volume_name: iso
+              cloud_init_iface_up: ens3
+              volumes:
+                - name: system
+                  capacity: !os_env NODE_VOLUME_SIZE, 150
+                  backing_store: cloudimage1604
+                  format: qcow2
+                - name: iso  # Volume with name 'iso' will be used
+                             # for store image with cloud-init metadata.
+                  capacity: 1
+                  format: raw
+                  device: cdrom
+                  bus: ide
+                  cloudinit_meta_data: !include underlay--meta-data.yaml
+                  cloudinit_user_data: !include underlay--user-data1604.yaml
+
+              interfaces: *interfaces
+              network_config: *network_config
+
+          - name: {{ HOSTNAME_MON03 }}
+            role: salt_minion
+            params:
+              vcpu: !os_env SLAVE_NODE_CPU, 3
+              memory: !os_env SLAVE_NODE_MEMORY, 2048
+              boot:
+                - hd
+              cloud_init_volume_name: iso
+              cloud_init_iface_up: ens3
+              volumes:
+                - name: system
+                  capacity: !os_env NODE_VOLUME_SIZE, 150
+                  backing_store: cloudimage1604
+                  format: qcow2
+                - name: iso  # Volume with name 'iso' will be used
+                             # for store image with cloud-init metadata.
+                  capacity: 1
+                  format: raw
+                  device: cdrom
+                  bus: ide
+                  cloudinit_meta_data: !include underlay--meta-data.yaml
+                  cloudinit_user_data: !include underlay--user-data1604.yaml
+
+              interfaces: *interfaces
+              network_config: *network_config
 
           - name: {{ HOSTNAME_PRX01 }}
             role: salt_minion
             params:
               vcpu: !os_env SLAVE_NODE_CPU, 1
-              memory: !os_env SLAVE_NODE_MEMORY, 8192
+              memory: !os_env SLAVE_NODE_MEMORY, 2048
               boot:
                 - hd
               cloud_init_volume_name: iso
diff --git a/tcp_tests/tests/system/test_install_mcp11_ovs_ocata.py b/tcp_tests/tests/system/test_install_mcp11_ovs_ocata.py
index ba641c6..b06b991 100644
--- a/tcp_tests/tests/system/test_install_mcp11_ovs_ocata.py
+++ b/tcp_tests/tests/system/test_install_mcp11_ovs_ocata.py
@@ -36,6 +36,61 @@
         LOG.info("*************** DONE **************")
 
     @pytest.mark.fail_snapshot
+    def test_mcp11_ocata_ovs_sl_install(self, underlay, config,
+                                        openstack_deployed,
+                                        sl_deployed, sl_actions, show_step):
+        """Test for deploying an mcp environment and check it
+        Scenario:
+        1. Prepare salt on hosts
+        2. Setup controller nodes
+        3. Setup compute nodes
+        4. Get monitoring nodes
+        5. Check that docker services are running
+        6. Check current targets are UP
+        7. Check grafana dashboards
+
+        """
+        expected_service_list = ['monitoring_remote_storage_adapter',
+                                 'monitoring_server',
+                                 'monitoring_remote_agent',
+                                 'dashboard_grafana',
+                                 'monitoring_alertmanager',
+                                 'monitoring_remote_collector',
+                                 'monitoring_pushgateway']
+        # STEP #4
+        mon_nodes = sl_actions.get_monitoring_nodes()
+        LOG.debug('Mon nodes list {0}'.format(mon_nodes))
+        for node in mon_nodes:
+            services_status = sl_actions.get_service_info_from_node(node)
+            assert len(services_status) == len(expected_service_list), \
+                'Some services are missed on node {0}. ' \
+                'Current service list {1}'.format(node, services_status)
+            for service in expected_service_list:
+                assert service in services_status, \
+                    'Missing service {0} in {1}'.format(service, services_status)
+                assert '0' not in services_status.get(service), \
+                    'Service {0} failed to start'.format(service)
+        prometheus_client = sl_deployed.api
+        try:
+            current_targets = prometheus_client.get_targets()
+            LOG.debug('Current targets after install {0}'.format(current_targets))
+        except:
+            LOG.info('Restarting keepalived service on mon nodes...')
+            sl_actions._salt.local(tgt='mon*', fun='cmd.run',
+                                   args='systemctl restart keepalived')
+            LOG.warning(
+                'Ip states after force restart {0}'.format(
+                    sl_actions._salt.local(tgt='mon*',
+                                           fun='cmd.run', args='ip a')))
+            current_targets = prometheus_client.get_targets()
+            LOG.debug('Current targets after install {0}'.format(current_targets))
+        # Assert that targets are up
+        for entry in current_targets:
+            assert 'up' in entry['health'], \
+                'Next target is down {}'.format(entry)
+        LOG.info("*************** DONE **************")
+
+    @pytest.mark.fail_snapshot
     def test_mcp11_ocata_dvr_install(self, underlay, openstack_deployed,
                                           show_step):
         """Test for deploying an mcp environment and check it
@@ -48,15 +103,58 @@
         LOG.info("*************** DONE **************")
 
     @pytest.mark.fail_snapshot
-    def test_mcp11_ocata_dvr_sl_install(self, underlay, openstack_deployed,
+    def test_mcp11_ocata_dvr_sl_install(self, underlay, config,
+                                        openstack_deployed,
                                         sl_deployed, sl_actions, show_step):
         """Test for deploying an mcp environment and check it
         Scenario:
         1. Prepare salt on hosts
         2. Setup controller nodes
         3. Setup compute nodes
+        4. Get monitoring nodes
+        5. Check that docker services are running
+        6. Check current targets are UP
+        7. Check grafana dashboards
 
         """
+        expected_service_list = ['monitoring_remote_storage_adapter',
+                                 'monitoring_server',
+                                 'monitoring_remote_agent',
+                                 'dashboard_grafana',
+                                 'monitoring_alertmanager',
+                                 'monitoring_remote_collector',
+                                 'monitoring_pushgateway']
+        # STEP #4
+        mon_nodes = sl_actions.get_monitoring_nodes()
+        LOG.debug('Mon nodes list {0}'.format(mon_nodes))
+        for node in mon_nodes:
+            services_status = sl_actions.get_service_info_from_node(node)
+            assert len(services_status) == len(expected_service_list), \
+                'Some services are missed on node {0}. ' \
+                'Current service list {1}'.format(node, services_status)
+            for service in expected_service_list:
+                assert service in services_status,\
+                    'Missing service {0} in {1}'.format(service, services_status)
+                assert '0' not in services_status.get(service),\
+                    'Service {0} failed to start'.format(service)
+        prometheus_client = sl_deployed.api
+        try:
+            current_targets = prometheus_client.get_targets()
+            LOG.debug('Current targets after install {0}'.format(current_targets))
+        except:
+            LOG.info('Restarting keepalived service on mon nodes...')
+            sl_actions._salt.local(tgt='mon*', fun='cmd.run',
+                                   args='systemctl restart keepalived')
+            LOG.warning(
+                'Ip states after force restart {0}'.format(
+                    sl_actions._salt.local(tgt='mon*',
+                                           fun='cmd.run', args='ip a')))
+            current_targets = prometheus_client.get_targets()
+            LOG.debug('Current targets after install {0}'.format(current_targets))
+        # Assert that targets are up
+        for entry in current_targets:
+            assert 'up' in entry['health'], \
+                'Next target is down {}'.format(entry)
         LOG.info("*************** DONE **************")
 
     @pytest.mark.fail_snapshot