Add checks for netchecker metrics
Change-Id: Icc3ab64624168d4b39b18f06cda18e8b890eb4ee
Reviewed-on: https://review.gerrithub.io/368050
Reviewed-by: Tatyanka Leontovich <tleontovich@mirantis.com>
Tested-by: Tatyanka Leontovich <tleontovich@mirantis.com>
diff --git a/tcp_tests/helpers/netchecker.py b/tcp_tests/helpers/netchecker.py
index f23a5c8..9c920ec 100644
--- a/tcp_tests/helpers/netchecker.py
+++ b/tcp_tests/helpers/netchecker.py
@@ -331,7 +331,7 @@
@utils.retry(3, requests.exceptions.RequestException)
-def get_status(k8sclient, netchecker_pod_port=NETCHECKER_NODE_PORT,
+def get_connectivity_status(k8sclient, netchecker_pod_port=NETCHECKER_NODE_PORT,
pod_name='netchecker-server', namespace='default'):
netchecker_srv_pod_names = [pod.name for pod in
@@ -352,17 +352,33 @@
return response
-def check_network(k8sclient, namespace='default', works=True):
+@utils.retry(3, requests.exceptions.RequestException)
+def get_netchecker_pod_status(k8s,
+ pod_name='netchecker-server',
+ namespace='default'):
+
+ k8s.wait_pods_phase(
+ pods=[pod for pod in k8s.api.pods.list(namespace=namespace)
+ if pod_name in pod.name], phase='Running', timeout=600)
+
+
+def check_network(k8sclient, netchecker_pod_port,
+ namespace='default', works=True):
if works:
- assert get_status(k8sclient,
- namespace=namespace).status_code in (200, 204)
+ assert get_connectivity_status(
+ k8sclient, namespace=namespace,
+ netchecker_pod_port=netchecker_pod_port).status_code in (200, 204)
else:
- assert get_status(k8sclient, namespace=namespace).status_code == 400
+ assert get_connectivity_status(
+ k8sclient, namespace=namespace,
+ netchecker_pod_port=netchecker_pod_port).status_code == 400
def wait_check_network(k8sclient, namespace='default', works=True, timeout=120,
- interval=5):
- helpers.wait_pass(lambda: check_network(k8sclient, namespace=namespace,
+ interval=5, netchecker_pod_port=NETCHECKER_NODE_PORT):
+ helpers.wait_pass(lambda: check_network(k8sclient,
+ netchecker_pod_port=netchecker_pod_port,
+ namespace=namespace,
works=works),
timeout=timeout, interval=interval)
@@ -518,3 +534,25 @@
cmd_add_policy_hostnet = "echo '{0}' | kubectl create -f -".format(
json.dumps(kubernetes_policy_hostnet))
underlay.sudo_check_call(cmd=cmd_add_policy_hostnet, host=kube_host_ip)
+
+
+@utils.retry(3, requests.exceptions.RequestException)
+def get_metric(k8sclient, netchecker_pod_port,
+ pod_name='netchecker-server', namespace='default'):
+
+ netchecker_srv_pod_names = [pod.name for pod in
+ k8sclient.pods.list(namespace=namespace)
+ if pod_name in pod.name]
+
+ assert len(netchecker_srv_pod_names) > 0, \
+ "No netchecker-server pods found!"
+ netchecker_srv_pod = k8sclient.pods.get(name=netchecker_srv_pod_names[0],
+ namespace=namespace)
+
+ kube_host_ip = netchecker_srv_pod.status.host_ip
+ metrics_url = 'http://{0}:{1}/metrics'.format(
+ kube_host_ip, netchecker_pod_port)
+ response = requests.get(metrics_url, timeout=30)
+ LOG.debug('Metrics: [{0}] {1}'.format(
+ response.status_code, response.text.strip()))
+ return response
diff --git a/tcp_tests/settings_oslo.py b/tcp_tests/settings_oslo.py
index 419fd7b..47c18d1 100644
--- a/tcp_tests/settings_oslo.py
+++ b/tcp_tests/settings_oslo.py
@@ -155,7 +155,23 @@
ct.Cfg('sl_steps_path', ct.String(),
help="Path to YAML with steps to deploy sl",
default=_default_sl_prepare_tests_steps_path),
+ ct.Cfg('docker_image_alertmanager', ct.String(),
+ default='{}/openstack-docker/alertmanager:latest'.format(
+ settings.DOCKER_REGISTRY)),
+ ct.Cfg('docker_image_pushgateway', ct.String(),
+ default='{}/openstack-docker/pushgateway:latest'.format(
+ settings.DOCKER_REGISTRY)),
+ ct.Cfg('docker_image_prometheus', ct.String(),
+ default='{}/openstack-docker/prometheus:latest'.format(
+ settings.DOCKER_REGISTRY)),
+ ct.Cfg('docker_image_remote_agent', ct.String(),
+ default='{}/openstack-docker/telegraf:latest'.format(
+ settings.DOCKER_REGISTRY)),
+ ct.Cfg('docker_image_remote_storage_adapter', ct.String(),
+ default='{}/openstack-docker/remote_storage_adapter:latest'.format(
+ settings.DOCKER_REGISTRY)),
]
+
sl_opts = [
ct.Cfg('sl_installed', ct.Boolean(),
help="", default=False),
@@ -202,9 +218,9 @@
ct.Cfg('kubernetes_netchecker_enabled', ct.Boolean(),
help="", default=True),
ct.Cfg('kubernetes_netchecker_agent_image', ct.String(),
- default='mirantis/k8s-netchecker-agent:latest'),
+ default='mirantis/k8s-netchecker-agent:stable'),
ct.Cfg('kubernetes_netchecker_server_image', ct.String(),
- default='mirantis/k8s-netchecker-server:latest'),
+ default='mirantis/k8s-netchecker-server:stable'),
ct.Cfg('kubernetes_calico_policy_enabled', ct.Boolean(),
help="", default=False),
ct.Cfg('kubernetes_calico_policy_image', ct.String(),
diff --git a/tcp_tests/templates/virtual-mcp11-k8s-calico/sl.yaml b/tcp_tests/templates/virtual-mcp11-k8s-calico/sl.yaml
index d900c42..5be65fd 100644
--- a/tcp_tests/templates/virtual-mcp11-k8s-calico/sl.yaml
+++ b/tcp_tests/templates/virtual-mcp11-k8s-calico/sl.yaml
@@ -45,7 +45,13 @@
# Install slv2 infra
- description: Install telegraf
- cmd: salt -C 'I@telegraf:agent' state.sls telegraf
+ cmd: salt -C 'I@telegraf:agentor or I@telegraf:remote_agent' state.sls telegraf
+ node_name: {{ HOSTNAME_CFG01 }}
+ retry: {count: 1, delay: 10}
+ skip_fail: false
+
+- description: Configure collector
+ cmd: salt -C 'I@heka:log_collector' state.sls heka.log_collector
node_name: {{ HOSTNAME_CFG01 }}
retry: {count: 1, delay: 10}
skip_fail: false
@@ -62,18 +68,6 @@
retry: {count: 1, delay: 10}
skip_fail: false
-- description: Install elasticsearch service
- cmd: salt -C 'I@elasticsearch:client' state.sls elasticsearch.client.service
- node_name: {{ HOSTNAME_CFG01 }}
- retry: {count: 1, delay: 10}
- skip_fail: false
-
-- description: Restart minions
- cmd: salt -C 'I@elasticsearch:client' --async service.restart salt-minion; sleep 10;
- node_name: {{ HOSTNAME_CFG01 }}
- retry: {count: 1, delay: 10}
- skip_fail: false
-
- description: Install elasticsearch client
cmd: salt -C 'I@elasticsearch:client' state.sls elasticsearch.client
node_name: {{ HOSTNAME_CFG01 }}
@@ -117,19 +111,30 @@
retry: {count: 1, delay: 10}
skip_fail: false
+# Change environment configuration before deploy
+- description: Set SL docker images deploy parameters
+ cmd: |
+ {% for sl_opt, value in config.sl_deploy.items() %}
+ {% if value|string() %}
+ salt-call reclass.cluster_meta_set {{ sl_opt }} {{ value }};
+ {% endif %}
+ {% endfor %}
+ node_name: {{ HOSTNAME_CFG01 }}
+ retry: {count: 1, delay: 1}
+ skip_fail: false
+
# Configure the services running in Docker Swarm
- description: Install prometheus alertmanager
- cmd: salt -C 'I@docker:swarm' state.sls prometheus.server,prometheus.alertmanager -b 1
+ cmd: salt -C 'I@docker:swarm' state.sls prometheus,heka.remote_collector -b 1
node_name: {{ HOSTNAME_CFG01 }}
retry: {count: 1, delay: 10}
skip_fail: false
-
- description: Pull images
cmd: salt -C 'I@docker:swarm:role:master' cmd.run 'docker node ls';
for img in pushgateway alertmanager prometheus telegraf remote_storage_adapter; do
- salt -C 'I@docker:swarm' dockerng.pull "docker-sandbox.sandbox.mirantis.net/bkupidura/$img";
- salt -C 'I@docker:swarm' dockerng.tag "docker-sandbox.sandbox.mirantis.net/bkupidura/$img:latest" "$img:latest";
+ salt -C 'I@docker:swarm' dockerng.pull "docker-prod-virtual.docker.mirantis.net/openstack-docker/$img";
+ salt -C 'I@docker:swarm' dockerng.tag "docker-prod-virtual.docker.mirantis.net/openstack-docker/$img:latest" "$img:latest";
done;
node_name: {{ HOSTNAME_CFG01 }}
retry: {count: 1, delay: 1}
diff --git a/tcp_tests/tests/system/test_install_k8s.py b/tcp_tests/tests/system/test_install_k8s.py
index 1d5c8c8..a911294 100644
--- a/tcp_tests/tests/system/test_install_k8s.py
+++ b/tcp_tests/tests/system/test_install_k8s.py
@@ -15,6 +15,7 @@
import pytest
from tcp_tests import logger
+from tcp_tests.helpers import netchecker
LOG = logger.logger
@@ -24,7 +25,8 @@
"""Test class for testing Kubernetes deploy"""
@pytest.mark.fail_snapshot
- def test_k8s_install(self, config, sl_deployed, k8s_deployed, k8s_actions):
+ def test_k8s_install(self, config, show_step,
+ k8s_deployed, k8s_actions, sl_deployed):
"""Test for deploying MCP environment with k8s+stacklight and check it
Scenario:
@@ -32,10 +34,49 @@
2. Setup controller nodes
3. Setup compute nodes
4. Setup stack light nodes
- 5. Setup Kubernetes cluster
- 6. Run conformance if need
+ 5. Setup Kubernetes cluster and check it nodes
+ 6. Check netchecker server is running
+ 7. Check netchecker agent is running
+ 8. Check connectivity
+ 9. Get metrics from netchecker
"""
+ # STEP #5
+ show_step(5)
+ k8sclient = k8s_deployed.api
+ assert k8sclient.nodes.list() is not None, "Can not get nodes list"
+
+ show_step(6)
+ netchecker.get_netchecker_pod_status(k8s=k8s_deployed,
+ namespace='netchecker')
+
+ show_step(7)
+ netchecker.get_netchecker_pod_status(k8s=k8s_deployed,
+ pod_name='netchecker-agent',
+ namespace='netchecker')
+
+ # show_step(8)
+ netchecker.wait_check_network(k8sclient, namespace='netchecker',
+ netchecker_pod_port=30811)
+ show_step(9)
+ res = netchecker.get_metric(k8sclient, netchecker_pod_port=30811,
+ namespace='netchecker')
+
+ assert res.status_code == 200, 'Unexpected response code {}'.format(res)
+ metrics = ['ncagent_error_count_total', 'ncagent_http_probe_code',
+ 'ncagent_http_probe_connect_time_ms',
+ 'ncagent_http_probe_connection_result',
+ 'ncagent_http_probe_content_transfer_time_ms',
+ 'ncagent_http_probe_dns_lookup_time_ms',
+ 'ncagent_http_probe_server_processing_time_ms',
+ 'ncagent_http_probe_tcp_connection_time_ms',
+ 'ncagent_http_probe_total_time_ms',
+ 'ncagent_report_count_tota']
+ for metric in metrics:
+ assert metric in res.text.strip(), \
+ 'Mandotory metric {0} is missing in {1}'.format(
+ metric, res.text)
+
if config.k8s.k8s_conformance_run:
k8s_actions.run_conformance()
LOG.info("*************** DONE **************")