Add smoke checks for Sl deploy
* check that needed services are run in docker
* check that all targets in prometheus are up
* add intial deploy in octa ovs
Change-Id: Id12c631aca99ecff82c4d36bfc4f278cd52bcbe4
Reviewed-on: https://review.gerrithub.io/376416
Reviewed-by: Tatyanka Leontovich <tleontovich@mirantis.com>
Tested-by: Tatyanka Leontovich <tleontovich@mirantis.com>
diff --git a/tcp_tests/managers/sl_manager.py b/tcp_tests/managers/sl_manager.py
index 23d362e..ccb6030 100644
--- a/tcp_tests/managers/sl_manager.py
+++ b/tcp_tests/managers/sl_manager.py
@@ -14,6 +14,9 @@
from tcp_tests.managers.execute_commands import ExecuteCommandsMixin
from tcp_tests.managers.clients.prometheus import prometheus_client
+from tcp_tests import logger
+
+LOG = logger.logger
class SLManager(ExecuteCommandsMixin):
@@ -57,3 +60,19 @@
port=self.__config.stack_light.sl_prometheus_port,
proto=self.__config.stack_light.sl_prometheus_proto)
return self._p_client
+
+ def get_monitoring_nodes(self):
+ return [node_name for node_name
+ in self.__underlay.node_names() if 'mon' in node_name]
+
+ def get_service_info_from_node(self, node_name):
+ service_stat_dict = {}
+ with self.__underlay.remote(node_name=node_name) as node_remote:
+ result = node_remote.execute(
+ "docker service ls --format '{{.Name}}:{{.Replicas}}'")
+ LOG.debug("Service ls result {0} from node {1}".format(
+ result['stdout'], node_name))
+ for line in result['stdout']:
+ tmp = line.split(':')
+ service_stat_dict.update({tmp[0]: tmp[1]})
+ return service_stat_dict
diff --git a/tcp_tests/templates/virtual-mcp-ocata-ovs/sl.yaml b/tcp_tests/templates/virtual-mcp-ocata-ovs/sl.yaml
new file mode 100644
index 0000000..13237ee
--- /dev/null
+++ b/tcp_tests/templates/virtual-mcp-ocata-ovs/sl.yaml
@@ -0,0 +1,184 @@
+{% from 'virtual-mcp-ocata/underlay.yaml' import HOSTNAME_CFG01 with context %}
+
+# Install docker swarm
+- description: Install keepalived on mon nodes
+ cmd: salt --hard-crash --state-output=mixed --state-verbose=False
+ -C 'mon*' state.sls keepalived
+ node_name: {{ HOSTNAME_CFG01 }}
+ retry: {count: 1, delay: 10}
+ skip_fail: false
+
+- description: Check the VIP on mon nodes
+ cmd: |
+ SL_VIP=`salt-call --out=newline_values_only pillar.get _param:stacklight_monitor_address`;
+ echo "_param:stacklight_monitor_address (vip): ${SL_VIP}";
+ salt --hard-crash --state-output=mixed --state-verbose=False -C 'mon*' cmd.run "ip a | grep ${SL_VIP}" | grep -B1 ${SL_VIP}
+ node_name: {{ HOSTNAME_CFG01 }}
+ retry: {count: 1, delay: 5}
+ skip_fail: false
+
+- description: Configure docker service
+ cmd: salt -C 'I@docker:swarm' state.sls docker.host
+ node_name: {{ HOSTNAME_CFG01 }}
+ retry: {count: 1, delay: 10}
+ skip_fail: false
+
+- description: Install docker swarm on master node
+ cmd: salt -C 'I@docker:swarm:role:master' state.sls docker.swarm
+ node_name: {{ HOSTNAME_CFG01 }}
+ retry: {count: 1, delay: 10}
+ skip_fail: false
+
+- description: Send grains to the swarm slave nodes
+ cmd: salt -C 'I@docker:swarm' state.sls salt.minion.grains
+ node_name: {{ HOSTNAME_CFG01 }}
+ retry: {count: 1, delay: 10}
+ skip_fail: false
+
+- description: Update mine
+ cmd: salt -C 'I@docker:swarm' mine.update
+ node_name: {{ HOSTNAME_CFG01 }}
+ retry: {count: 1, delay: 10}
+ skip_fail: false
+
+- description: Refresh modules
+ cmd: salt -C 'I@docker:swarm' saltutil.refresh_modules; sleep 5;
+ node_name: {{ HOSTNAME_CFG01 }}
+ retry: {count: 1, delay: 10}
+ skip_fail: false
+
+- description: Rerun swarm on slaves to proper token population
+ cmd: salt -C 'I@docker:swarm:role:master' state.sls docker.swarm
+ node_name: {{ HOSTNAME_CFG01 }}
+ retry: {count: 1, delay: 10}
+ skip_fail: false
+
+- description: Configure slave nodes
+ cmd: salt -C 'I@docker:swarm:role:manager' state.sls docker.swarm -b 1
+ node_name: {{ HOSTNAME_CFG01 }}
+ retry: {count: 1, delay: 10}
+ skip_fail: false
+
+- description: List registered Docker swarm nodes
+ cmd: salt -C 'I@docker:swarm:role:master' cmd.run 'docker node ls'
+ node_name: {{ HOSTNAME_CFG01 }}
+ retry: {count: 1, delay: 10}
+ skip_fail: false
+
+# Install slv2 infra
+- description: Install telegraf
+ cmd: salt -C 'I@telegraf:agent or I@telegraf:remote_agent' state.sls telegraf
+ node_name: {{ HOSTNAME_CFG01 }}
+ retry: {count: 2, delay: 10}
+ skip_fail: false
+
+- description: Configure Prometheus exporters
+ cmd: salt -C 'I@prometheus:exporters' state.sls prometheus
+ node_name: {{ HOSTNAME_CFG01 }}
+ retry: {count: 1, delay: 10}
+ skip_fail: false
+
+- description: Configure collector
+ cmd: salt -C 'I@heka:log_collector' state.sls heka.log_collector
+ node_name: {{ HOSTNAME_CFG01 }}
+ retry: {count: 1, delay: 10}
+ skip_fail: false
+
+- description: Install elasticsearch server
+ cmd: salt -C 'I@elasticsearch:server' state.sls elasticsearch.server -b 1
+ node_name: {{ HOSTNAME_CFG01 }}
+ retry: {count: 1, delay: 10}
+ skip_fail: false
+
+- description: Install kibana server
+ cmd: salt -C 'I@kibana:server' state.sls kibana.server -b 1
+ node_name: {{ HOSTNAME_CFG01 }}
+ retry: {count: 1, delay: 10}
+ skip_fail: false
+
+- description: Install elasticsearch client
+ cmd: salt -C 'I@elasticsearch:client' state.sls elasticsearch.client
+ node_name: {{ HOSTNAME_CFG01 }}
+ retry: {count: 1, delay: 10}
+ skip_fail: false
+
+- description: Install kibana client
+ cmd: salt -C 'I@kibana:client' state.sls kibana.client
+ node_name: {{ HOSTNAME_CFG01 }}
+ retry: {count: 1, delay: 10}
+ skip_fail: false
+
+- description: Check influix db
+ cmd: |
+ INFLUXDB_SERVICE=`salt -C 'I@influxdb:server' test.ping 1>/dev/null 2>&1 && echo true`;
+ echo "Influxdb service presence: ${INFLUXDB_SERVICE}";
+ if [[ "$INFLUXDB_SERVICE" == "true" ]]; then
+ salt -C 'I@influxdb:server' state.sls influxdb
+ fi
+ node_name: {{ HOSTNAME_CFG01 }}
+ retry: {count: 1, delay: 5}
+ skip_fail: true
+
+# Collect grains needed to configure the services
+
+- description: Get grains
+ cmd: salt -C 'I@salt:minion' state.sls salt.minion.grains
+ node_name: {{ HOSTNAME_CFG01 }}
+ retry: {count: 1, delay: 10}
+ skip_fail: false
+
+- description: Sync modules
+ cmd: salt -C 'I@salt:minion' saltutil.refresh_modules
+ node_name: {{ HOSTNAME_CFG01 }}
+ retry: {count: 1, delay: 10}
+ skip_fail: false
+
+- description: Update mine
+ cmd: salt -C 'I@salt:minion' mine.update; sleep 5;
+ node_name: {{ HOSTNAME_CFG01 }}
+ retry: {count: 1, delay: 10}
+ skip_fail: false
+
+# Change environment configuration before deploy
+- description: Set SL docker images deploy parameters
+ cmd: |
+ {% for sl_opt, value in config.sl_deploy.items() %}
+ {% if value|string() %}
+ salt-call reclass.cluster_meta_set {{ sl_opt }} {{ value }};
+ {% endif %}
+ {% endfor %}
+ node_name: {{ HOSTNAME_CFG01 }}
+ retry: {count: 1, delay: 1}
+ skip_fail: false
+
+# Configure the services running in Docker Swarm
+- description: Install prometheus alertmanager
+ cmd: salt -C 'I@docker:swarm' state.sls prometheus,heka.remote_collector -b 1
+ node_name: {{ HOSTNAME_CFG01 }}
+ retry: {count: 1, delay: 10}
+ skip_fail: false
+
+- description: run docker state
+ cmd: salt -C 'I@docker:swarm:role:master' state.sls docker
+ node_name: {{ HOSTNAME_CFG01 }}
+ retry: {count: 1, delay: 10}
+ skip_fail: false
+
+- description: docker ps
+ cmd: salt -C 'I@docker:swarm' dockerng.ps
+ node_name: {{ HOSTNAME_CFG01 }}
+ retry: {count: 1, delay: 10}
+ skip_fail: false
+
+- description: Configure Grafana dashboards and datasources
+ cmd: sleep 30; salt -C 'I@grafana:client' state.sls grafana.client
+ node_name: {{ HOSTNAME_CFG01 }}
+ retry: {count: 2, delay: 10}
+ skip_fail: false
+
+- description: Run salt minion to create cert files
+ cmd: salt --hard-crash --state-output=mixed --state-verbose=False "*" state.sls salt.minion
+ node_name: {{ HOSTNAME_CFG01 }}
+ retry: {count: 1, delay: 10}
+ skip_fail: false
+
diff --git a/tcp_tests/templates/virtual-mcp-ocata-ovs/underlay.yaml b/tcp_tests/templates/virtual-mcp-ocata-ovs/underlay.yaml
index fc0b675..222ba69 100644
--- a/tcp_tests/templates/virtual-mcp-ocata-ovs/underlay.yaml
+++ b/tcp_tests/templates/virtual-mcp-ocata-ovs/underlay.yaml
@@ -19,6 +19,9 @@
{% set HOSTNAME_CTL03 = os_env('HOSTNAME_CTL03', 'ctl03.' + DOMAIN_NAME) %}
{% set HOSTNAME_CMP01 = os_env('HOSTNAME_CMP01', 'cmp01.' + DOMAIN_NAME) %}
{% set HOSTNAME_CMP02 = os_env('HOSTNAME_CMP02', 'cmp02.' + DOMAIN_NAME) %}
+{% set HOSTNAME_MON01 = os_env('HOSTNAME_MON01', 'mon01.' + DOMAIN_NAME) %}
+{% set HOSTNAME_MON02 = os_env('HOSTNAME_MON02', 'mon02.' + DOMAIN_NAME) %}
+{% set HOSTNAME_MON03 = os_env('HOSTNAME_MON03', 'mon03.' + DOMAIN_NAME) %}
{% set HOSTNAME_GTW01 = os_env('HOSTNAME_GTW01', 'gtw01.' + DOMAIN_NAME) %}
{% set HOSTNAME_PRX01 = os_env('HOSTNAME_PRX01', 'prx01.' + DOMAIN_NAME) %}
@@ -39,6 +42,9 @@
default_{{ HOSTNAME_CTL03 }}: +103
default_{{ HOSTNAME_CMP01 }}: +105
default_{{ HOSTNAME_CMP02 }}: +106
+ default_{{ HOSTNAME_MON01 }}: +107
+ default_{{ HOSTNAME_MON02 }}: +108
+ default_{{ HOSTNAME_MON03 }}: +109
default_{{ HOSTNAME_GTW01 }}: +110
default_{{ HOSTNAME_PRX01 }}: +121
ip_ranges:
@@ -56,6 +62,9 @@
default_{{ HOSTNAME_CTL03 }}: +103
default_{{ HOSTNAME_CMP01 }}: +105
default_{{ HOSTNAME_CMP02 }}: +106
+ default_{{ HOSTNAME_MON01 }}: +107
+ default_{{ HOSTNAME_MON02 }}: +108
+ default_{{ HOSTNAME_MON03 }}: +109
default_{{ HOSTNAME_GTW01 }}: +110
default_{{ HOSTNAME_PRX01 }}: +121
ip_ranges:
@@ -73,6 +82,9 @@
default_{{ HOSTNAME_CTL03 }}: +103
default_{{ HOSTNAME_CMP01 }}: +105
default_{{ HOSTNAME_CMP02 }}: +106
+ default_{{ HOSTNAME_MON01 }}: +107
+ default_{{ HOSTNAME_MON02 }}: +108
+ default_{{ HOSTNAME_MON03 }}: +109
default_{{ HOSTNAME_GTW01 }}: +110
default_{{ HOSTNAME_PRX01 }}: +121
ip_ranges:
@@ -90,6 +102,9 @@
default_{{ HOSTNAME_CTL03 }}: +103
default_{{ HOSTNAME_CMP01 }}: +105
default_{{ HOSTNAME_CMP02 }}: +106
+ default_{{ HOSTNAME_MON01 }}: +107
+ default_{{ HOSTNAME_MON02 }}: +108
+ default_{{ HOSTNAME_MON03 }}: +109
default_{{ HOSTNAME_GTW01 }}: +110
default_{{ HOSTNAME_PRX01 }}: +121
ip_ranges:
@@ -280,12 +295,89 @@
interfaces: *interfaces
network_config: *network_config
+ - name: {{ HOSTNAME_MON01 }}
+ role: salt_minion
+ params:
+ vcpu: !os_env SLAVE_NODE_CPU, 3
+ memory: !os_env SLAVE_NODE_MEMORY, 2048
+ boot:
+ - hd
+ cloud_init_volume_name: iso
+ cloud_init_iface_up: ens3
+ volumes:
+ - name: system
+ capacity: !os_env NODE_VOLUME_SIZE, 150
+ backing_store: cloudimage1604
+ format: qcow2
+ - name: iso # Volume with name 'iso' will be used
+ # for store image with cloud-init metadata.
+ capacity: 1
+ format: raw
+ device: cdrom
+ bus: ide
+ cloudinit_meta_data: !include underlay--meta-data.yaml
+ cloudinit_user_data: !include underlay--user-data1604.yaml
+
+ interfaces: *interfaces
+ network_config: *network_config
+
+ - name: {{ HOSTNAME_MON02 }}
+ role: salt_minion
+ params:
+ vcpu: !os_env SLAVE_NODE_CPU, 3
+ memory: !os_env SLAVE_NODE_MEMORY, 2048
+ boot:
+ - hd
+ cloud_init_volume_name: iso
+ cloud_init_iface_up: ens3
+ volumes:
+ - name: system
+ capacity: !os_env NODE_VOLUME_SIZE, 150
+ backing_store: cloudimage1604
+ format: qcow2
+ - name: iso # Volume with name 'iso' will be used
+ # for store image with cloud-init metadata.
+ capacity: 1
+ format: raw
+ device: cdrom
+ bus: ide
+ cloudinit_meta_data: !include underlay--meta-data.yaml
+ cloudinit_user_data: !include underlay--user-data1604.yaml
+
+ interfaces: *interfaces
+ network_config: *network_config
+
+ - name: {{ HOSTNAME_MON03 }}
+ role: salt_minion
+ params:
+ vcpu: !os_env SLAVE_NODE_CPU, 3
+ memory: !os_env SLAVE_NODE_MEMORY, 2048
+ boot:
+ - hd
+ cloud_init_volume_name: iso
+ cloud_init_iface_up: ens3
+ volumes:
+ - name: system
+ capacity: !os_env NODE_VOLUME_SIZE, 150
+ backing_store: cloudimage1604
+ format: qcow2
+ - name: iso # Volume with name 'iso' will be used
+ # for store image with cloud-init metadata.
+ capacity: 1
+ format: raw
+ device: cdrom
+ bus: ide
+ cloudinit_meta_data: !include underlay--meta-data.yaml
+ cloudinit_user_data: !include underlay--user-data1604.yaml
+
+ interfaces: *interfaces
+ network_config: *network_config
- name: {{ HOSTNAME_PRX01 }}
role: salt_minion
params:
vcpu: !os_env SLAVE_NODE_CPU, 1
- memory: !os_env SLAVE_NODE_MEMORY, 8192
+ memory: !os_env SLAVE_NODE_MEMORY, 2048
boot:
- hd
cloud_init_volume_name: iso
diff --git a/tcp_tests/tests/system/test_install_mcp11_ovs_ocata.py b/tcp_tests/tests/system/test_install_mcp11_ovs_ocata.py
index ba641c6..b06b991 100644
--- a/tcp_tests/tests/system/test_install_mcp11_ovs_ocata.py
+++ b/tcp_tests/tests/system/test_install_mcp11_ovs_ocata.py
@@ -36,6 +36,61 @@
LOG.info("*************** DONE **************")
@pytest.mark.fail_snapshot
+ def test_mcp11_ocata_ovs_sl_install(self, underlay, config,
+ openstack_deployed,
+ sl_deployed, sl_actions, show_step):
+ """Test for deploying an mcp environment and check it
+ Scenario:
+ 1. Prepare salt on hosts
+ 2. Setup controller nodes
+ 3. Setup compute nodes
+ 4. Get monitoring nodes
+ 5. Check that docker services are running
+ 6. Check current targets are UP
+ 7. Check grafana dashboards
+
+ """
+ expected_service_list = ['monitoring_remote_storage_adapter',
+ 'monitoring_server',
+ 'monitoring_remote_agent',
+ 'dashboard_grafana',
+ 'monitoring_alertmanager',
+ 'monitoring_remote_collector',
+ 'monitoring_pushgateway']
+ # STEP #4
+ mon_nodes = sl_actions.get_monitoring_nodes()
+ LOG.debug('Mon nodes list {0}'.format(mon_nodes))
+ for node in mon_nodes:
+ services_status = sl_actions.get_service_info_from_node(node)
+ assert len(services_status) == len(expected_service_list), \
+ 'Some services are missed on node {0}. ' \
+ 'Current service list {1}'.format(node, services_status)
+ for service in expected_service_list:
+ assert service in services_status, \
+ 'Missing service {0} in {1}'.format(service, services_status)
+ assert '0' not in services_status.get(service), \
+ 'Service {0} failed to start'.format(service)
+ prometheus_client = sl_deployed.api
+ try:
+ current_targets = prometheus_client.get_targets()
+ LOG.debug('Current targets after install {0}'.format(current_targets))
+ except:
+ LOG.info('Restarting keepalived service on mon nodes...')
+ sl_actions._salt.local(tgt='mon*', fun='cmd.run',
+ args='systemctl restart keepalived')
+ LOG.warning(
+ 'Ip states after force restart {0}'.format(
+ sl_actions._salt.local(tgt='mon*',
+ fun='cmd.run', args='ip a')))
+ current_targets = prometheus_client.get_targets()
+ LOG.debug('Current targets after install {0}'.format(current_targets))
+ # Assert that targets are up
+ for entry in current_targets:
+ assert 'up' in entry['health'], \
+ 'Next target is down {}'.format(entry)
+ LOG.info("*************** DONE **************")
+
+ @pytest.mark.fail_snapshot
def test_mcp11_ocata_dvr_install(self, underlay, openstack_deployed,
show_step):
"""Test for deploying an mcp environment and check it
@@ -48,15 +103,58 @@
LOG.info("*************** DONE **************")
@pytest.mark.fail_snapshot
- def test_mcp11_ocata_dvr_sl_install(self, underlay, openstack_deployed,
+ def test_mcp11_ocata_dvr_sl_install(self, underlay, config,
+ openstack_deployed,
sl_deployed, sl_actions, show_step):
"""Test for deploying an mcp environment and check it
Scenario:
1. Prepare salt on hosts
2. Setup controller nodes
3. Setup compute nodes
+ 4. Get monitoring nodes
+ 5. Check that docker services are running
+ 6. Check current targets are UP
+ 7. Check grafana dashboards
"""
+ expected_service_list = ['monitoring_remote_storage_adapter',
+ 'monitoring_server',
+ 'monitoring_remote_agent',
+ 'dashboard_grafana',
+ 'monitoring_alertmanager',
+ 'monitoring_remote_collector',
+ 'monitoring_pushgateway']
+ # STEP #4
+ mon_nodes = sl_actions.get_monitoring_nodes()
+ LOG.debug('Mon nodes list {0}'.format(mon_nodes))
+ for node in mon_nodes:
+ services_status = sl_actions.get_service_info_from_node(node)
+ assert len(services_status) == len(expected_service_list), \
+ 'Some services are missed on node {0}. ' \
+ 'Current service list {1}'.format(node, services_status)
+ for service in expected_service_list:
+ assert service in services_status,\
+ 'Missing service {0} in {1}'.format(service, services_status)
+ assert '0' not in services_status.get(service),\
+ 'Service {0} failed to start'.format(service)
+ prometheus_client = sl_deployed.api
+ try:
+ current_targets = prometheus_client.get_targets()
+ LOG.debug('Current targets after install {0}'.format(current_targets))
+ except:
+ LOG.info('Restarting keepalived service on mon nodes...')
+ sl_actions._salt.local(tgt='mon*', fun='cmd.run',
+ args='systemctl restart keepalived')
+ LOG.warning(
+ 'Ip states after force restart {0}'.format(
+ sl_actions._salt.local(tgt='mon*',
+ fun='cmd.run', args='ip a')))
+ current_targets = prometheus_client.get_targets()
+ LOG.debug('Current targets after install {0}'.format(current_targets))
+ # Assert that targets are up
+ for entry in current_targets:
+ assert 'up' in entry['health'], \
+ 'Next target is down {}'.format(entry)
LOG.info("*************** DONE **************")
@pytest.mark.fail_snapshot