Add checks for netchecker metrics

Change-Id: Icc3ab64624168d4b39b18f06cda18e8b890eb4ee
Reviewed-on: https://review.gerrithub.io/368050
Reviewed-by: Tatyanka Leontovich <tleontovich@mirantis.com>
Tested-by: Tatyanka Leontovich <tleontovich@mirantis.com>
diff --git a/tcp_tests/helpers/netchecker.py b/tcp_tests/helpers/netchecker.py
index f23a5c8..9c920ec 100644
--- a/tcp_tests/helpers/netchecker.py
+++ b/tcp_tests/helpers/netchecker.py
@@ -331,7 +331,7 @@
 
 
 @utils.retry(3, requests.exceptions.RequestException)
-def get_status(k8sclient, netchecker_pod_port=NETCHECKER_NODE_PORT,
+def get_connectivity_status(k8sclient, netchecker_pod_port=NETCHECKER_NODE_PORT,
                pod_name='netchecker-server', namespace='default'):
 
     netchecker_srv_pod_names = [pod.name for pod in
@@ -352,17 +352,33 @@
     return response
 
 
-def check_network(k8sclient, namespace='default', works=True):
+@utils.retry(3, requests.exceptions.RequestException)
+def get_netchecker_pod_status(k8s,
+                              pod_name='netchecker-server',
+                              namespace='default'):
+
+    k8s.wait_pods_phase(
+        pods=[pod for pod in k8s.api.pods.list(namespace=namespace)
+              if pod_name in pod.name], phase='Running', timeout=600)
+
+
+def check_network(k8sclient,  netchecker_pod_port,
+                  namespace='default', works=True):
     if works:
-        assert get_status(k8sclient,
-                          namespace=namespace).status_code in (200, 204)
+        assert get_connectivity_status(
+            k8sclient, namespace=namespace,
+            netchecker_pod_port=netchecker_pod_port).status_code in (200, 204)
     else:
-        assert get_status(k8sclient, namespace=namespace).status_code == 400
+        assert get_connectivity_status(
+            k8sclient, namespace=namespace,
+            netchecker_pod_port=netchecker_pod_port).status_code == 400
 
 
 def wait_check_network(k8sclient, namespace='default', works=True, timeout=120,
-                       interval=5):
-    helpers.wait_pass(lambda: check_network(k8sclient, namespace=namespace,
+                       interval=5, netchecker_pod_port=NETCHECKER_NODE_PORT):
+    helpers.wait_pass(lambda: check_network(k8sclient,
+                                            netchecker_pod_port=netchecker_pod_port,
+                                            namespace=namespace,
                                             works=works),
                       timeout=timeout, interval=interval)
 
@@ -518,3 +534,25 @@
     cmd_add_policy_hostnet = "echo '{0}' | kubectl create -f -".format(
         json.dumps(kubernetes_policy_hostnet))
     underlay.sudo_check_call(cmd=cmd_add_policy_hostnet, host=kube_host_ip)
+
+
+@utils.retry(3, requests.exceptions.RequestException)
+def get_metric(k8sclient, netchecker_pod_port,
+               pod_name='netchecker-server', namespace='default'):
+
+    netchecker_srv_pod_names = [pod.name for pod in
+                                k8sclient.pods.list(namespace=namespace)
+                                if pod_name in pod.name]
+
+    assert len(netchecker_srv_pod_names) > 0, \
+        "No netchecker-server pods found!"
+    netchecker_srv_pod = k8sclient.pods.get(name=netchecker_srv_pod_names[0],
+                                            namespace=namespace)
+
+    kube_host_ip = netchecker_srv_pod.status.host_ip
+    metrics_url = 'http://{0}:{1}/metrics'.format(
+        kube_host_ip, netchecker_pod_port)
+    response = requests.get(metrics_url, timeout=30)
+    LOG.debug('Metrics: [{0}] {1}'.format(
+        response.status_code, response.text.strip()))
+    return response
diff --git a/tcp_tests/settings_oslo.py b/tcp_tests/settings_oslo.py
index 419fd7b..47c18d1 100644
--- a/tcp_tests/settings_oslo.py
+++ b/tcp_tests/settings_oslo.py
@@ -155,7 +155,23 @@
     ct.Cfg('sl_steps_path', ct.String(),
            help="Path to YAML with steps to deploy sl",
            default=_default_sl_prepare_tests_steps_path),
+    ct.Cfg('docker_image_alertmanager', ct.String(),
+           default='{}/openstack-docker/alertmanager:latest'.format(
+               settings.DOCKER_REGISTRY)),
+    ct.Cfg('docker_image_pushgateway', ct.String(),
+           default='{}/openstack-docker/pushgateway:latest'.format(
+               settings.DOCKER_REGISTRY)),
+    ct.Cfg('docker_image_prometheus', ct.String(),
+           default='{}/openstack-docker/prometheus:latest'.format(
+               settings.DOCKER_REGISTRY)),
+    ct.Cfg('docker_image_remote_agent', ct.String(),
+           default='{}/openstack-docker/telegraf:latest'.format(
+               settings.DOCKER_REGISTRY)),
+    ct.Cfg('docker_image_remote_storage_adapter', ct.String(),
+           default='{}/openstack-docker/remote_storage_adapter:latest'.format(
+               settings.DOCKER_REGISTRY)),
 ]
+
 sl_opts = [
     ct.Cfg('sl_installed', ct.Boolean(),
            help="", default=False),
@@ -202,9 +218,9 @@
     ct.Cfg('kubernetes_netchecker_enabled', ct.Boolean(),
            help="", default=True),
     ct.Cfg('kubernetes_netchecker_agent_image', ct.String(),
-           default='mirantis/k8s-netchecker-agent:latest'),
+           default='mirantis/k8s-netchecker-agent:stable'),
     ct.Cfg('kubernetes_netchecker_server_image', ct.String(),
-           default='mirantis/k8s-netchecker-server:latest'),
+           default='mirantis/k8s-netchecker-server:stable'),
     ct.Cfg('kubernetes_calico_policy_enabled', ct.Boolean(),
            help="", default=False),
     ct.Cfg('kubernetes_calico_policy_image', ct.String(),
diff --git a/tcp_tests/templates/virtual-mcp11-k8s-calico/sl.yaml b/tcp_tests/templates/virtual-mcp11-k8s-calico/sl.yaml
index d900c42..5be65fd 100644
--- a/tcp_tests/templates/virtual-mcp11-k8s-calico/sl.yaml
+++ b/tcp_tests/templates/virtual-mcp11-k8s-calico/sl.yaml
@@ -45,7 +45,13 @@
 
 # Install slv2 infra
 - description: Install telegraf
-  cmd: salt -C 'I@telegraf:agent' state.sls telegraf
+  cmd: salt -C 'I@telegraf:agentor or I@telegraf:remote_agent' state.sls telegraf
+  node_name: {{ HOSTNAME_CFG01 }}
+  retry: {count: 1, delay: 10}
+  skip_fail: false
+
+- description: Configure collector
+  cmd: salt -C 'I@heka:log_collector' state.sls heka.log_collector
   node_name: {{ HOSTNAME_CFG01 }}
   retry: {count: 1, delay: 10}
   skip_fail: false
@@ -62,18 +68,6 @@
   retry: {count: 1, delay: 10}
   skip_fail: false
 
-- description: Install elasticsearch service
-  cmd: salt -C 'I@elasticsearch:client' state.sls elasticsearch.client.service
-  node_name: {{ HOSTNAME_CFG01 }}
-  retry: {count: 1, delay: 10}
-  skip_fail: false
-
-- description: Restart minions
-  cmd: salt -C 'I@elasticsearch:client' --async service.restart salt-minion; sleep 10;
-  node_name: {{ HOSTNAME_CFG01 }}
-  retry: {count: 1, delay: 10}
-  skip_fail: false
-
 - description: Install elasticsearch client
   cmd: salt -C 'I@elasticsearch:client' state.sls elasticsearch.client
   node_name: {{ HOSTNAME_CFG01 }}
@@ -117,19 +111,30 @@
   retry: {count: 1, delay: 10}
   skip_fail: false
 
+# Change environment configuration before deploy
+- description: Set SL docker images deploy parameters
+  cmd: |
+  {% for sl_opt, value in config.sl_deploy.items() %}
+    {% if value|string() %}
+    salt-call reclass.cluster_meta_set {{ sl_opt }} {{ value }};
+    {% endif %}
+  {% endfor %}
+  node_name: {{ HOSTNAME_CFG01 }}
+  retry: {count: 1, delay: 1}
+  skip_fail: false
+
 # Configure the services running in Docker Swarm
 - description: Install prometheus alertmanager
-  cmd: salt -C 'I@docker:swarm' state.sls prometheus.server,prometheus.alertmanager -b 1
+  cmd: salt -C 'I@docker:swarm' state.sls prometheus,heka.remote_collector -b 1
   node_name: {{ HOSTNAME_CFG01 }}
   retry: {count: 1, delay: 10}
   skip_fail: false
 
-
 - description: Pull images
   cmd: salt -C 'I@docker:swarm:role:master' cmd.run 'docker node ls';
     for img in pushgateway alertmanager prometheus telegraf remote_storage_adapter; do
-        salt -C 'I@docker:swarm' dockerng.pull "docker-sandbox.sandbox.mirantis.net/bkupidura/$img";
-        salt -C 'I@docker:swarm' dockerng.tag "docker-sandbox.sandbox.mirantis.net/bkupidura/$img:latest" "$img:latest";
+        salt -C 'I@docker:swarm' dockerng.pull "docker-prod-virtual.docker.mirantis.net/openstack-docker/$img";
+        salt -C 'I@docker:swarm' dockerng.tag "docker-prod-virtual.docker.mirantis.net/openstack-docker/$img:latest" "$img:latest";
     done;
   node_name: {{ HOSTNAME_CFG01 }}
   retry: {count: 1, delay: 1}
diff --git a/tcp_tests/tests/system/test_install_k8s.py b/tcp_tests/tests/system/test_install_k8s.py
index 1d5c8c8..a911294 100644
--- a/tcp_tests/tests/system/test_install_k8s.py
+++ b/tcp_tests/tests/system/test_install_k8s.py
@@ -15,6 +15,7 @@
 import pytest
 
 from tcp_tests import logger
+from tcp_tests.helpers import netchecker
 
 LOG = logger.logger
 
@@ -24,7 +25,8 @@
     """Test class for testing Kubernetes deploy"""
 
     @pytest.mark.fail_snapshot
-    def test_k8s_install(self, config, sl_deployed, k8s_deployed, k8s_actions):
+    def test_k8s_install(self, config, show_step,
+                         k8s_deployed, k8s_actions, sl_deployed):
         """Test for deploying MCP environment with k8s+stacklight and check it
 
         Scenario:
@@ -32,10 +34,49 @@
             2. Setup controller nodes
             3. Setup compute nodes
             4. Setup stack light nodes
-            5. Setup Kubernetes cluster
-            6. Run conformance if need
+            5. Setup Kubernetes cluster and check it nodes
+            6. Check netchecker server is running
+            7. Check netchecker agent is running
+            8. Check connectivity
+            9. Get metrics from netchecker
 
         """
+        # STEP #5
+        show_step(5)
+        k8sclient = k8s_deployed.api
+        assert k8sclient.nodes.list() is not None, "Can not get nodes list"
+
+        show_step(6)
+        netchecker.get_netchecker_pod_status(k8s=k8s_deployed,
+                                             namespace='netchecker')
+
+        show_step(7)
+        netchecker.get_netchecker_pod_status(k8s=k8s_deployed,
+                                             pod_name='netchecker-agent',
+                                             namespace='netchecker')
+
+        # show_step(8)
+        netchecker.wait_check_network(k8sclient, namespace='netchecker',
+                                      netchecker_pod_port=30811)
+        show_step(9)
+        res = netchecker.get_metric(k8sclient, netchecker_pod_port=30811,
+                                    namespace='netchecker')
+
+        assert res.status_code == 200, 'Unexpected response code {}'.format(res)
+        metrics = ['ncagent_error_count_total', 'ncagent_http_probe_code',
+                   'ncagent_http_probe_connect_time_ms',
+                   'ncagent_http_probe_connection_result',
+                   'ncagent_http_probe_content_transfer_time_ms',
+                   'ncagent_http_probe_dns_lookup_time_ms',
+                   'ncagent_http_probe_server_processing_time_ms',
+                   'ncagent_http_probe_tcp_connection_time_ms',
+                   'ncagent_http_probe_total_time_ms',
+                   'ncagent_report_count_tota']
+        for metric in metrics:
+            assert metric in res.text.strip(), \
+                'Mandotory metric {0} is missing in {1}'.format(
+                    metric, res.text)
+        
         if config.k8s.k8s_conformance_run:
             k8s_actions.run_conformance()
         LOG.info("*************** DONE **************")