Prometheus client

Initial prometheus client

Change-Id: I8c02be6fe7e58c2f37ac19547fd7a795896af4bc
Reviewed-on: https://review.gerrithub.io/368732
Reviewed-by: Tatyanka Leontovich <tleontovich@mirantis.com>
Tested-by: Tatyanka Leontovich <tleontovich@mirantis.com>
diff --git a/tcp_tests/fixtures/stacklight_fixtures.py b/tcp_tests/fixtures/stacklight_fixtures.py
index 82b7c88..6d7d2e9 100644
--- a/tcp_tests/fixtures/stacklight_fixtures.py
+++ b/tcp_tests/fixtures/stacklight_fixtures.py
@@ -12,14 +12,11 @@
 #    License for the specific language governing permissions and limitations
 #    under the License.
 
-import os
 
 import pytest
-import yaml
 
 from tcp_tests import logger
 from tcp_tests.helpers import ext
-from tcp_tests import settings
 from tcp_tests.managers import sl_manager
 from tcp_tests.helpers import utils
 
@@ -70,6 +67,13 @@
         #    installed TCP API endpoint
         pass
 
+    # Workaround for keepalived hang issue after env revert from snapshot
+    # see https://mirantis.jira.com/browse/PROD-12038
+    LOG.warning('Restarting keepalived service on controllers...')
+    sl_actions._salt.local(tgt='ctl*', fun='cmd.run',
+                           args='systemctl restart keepalived.service')
+    sl_actions._salt.local(tgt='mon*', fun='cmd.run',
+                       args='systemctl restart keepalived.service')
     return sl_actions
 
 
@@ -106,5 +110,11 @@
         # 3. config.tcp.* options contain access credentials to the already
         #    installed TCP API endpoint
         pass
-
+    # Workaround for keepalived hang issue after env revert from snapshot
+    # see https://mirantis.jira.com/browse/PROD-12038
+    LOG.warning('Restarting keepalived service on controllers...')
+    sl_actions._salt.local(tgt='ctl*', fun='cmd.run',
+                args='systemctl restart keepalived.service')
+    sl_actions._salt.local(tgt='mon*', fun='cmd.run',
+                           args='systemctl restart keepalived.service')
     return sl_actions
diff --git a/tcp_tests/helpers/netchecker.py b/tcp_tests/helpers/netchecker.py
index 9c920ec..76e7640 100644
--- a/tcp_tests/helpers/netchecker.py
+++ b/tcp_tests/helpers/netchecker.py
@@ -556,3 +556,18 @@
     LOG.debug('Metrics: [{0}] {1}'.format(
         response.status_code, response.text.strip()))
     return response
+
+
+def get_service_port(k8sclient, service_name='netchecker',
+                        namespace='netchecker'):
+    full_service_name = [service.name for service
+                         in k8sclient.services.list(namespace=namespace)
+                         if service_name in service.name]
+    assert len(full_service_name) > 0, "No netchecker service run"
+
+    service_details = k8sclient.services.get(name=full_service_name[0],
+                                             namespace=namespace)
+
+    LOG.debug('Necthcecker service details {0}'.format(service_details))
+    netchecker_port = service_details.spec.ports[0].node_port
+    return netchecker_port
diff --git a/tcp_tests/managers/clients/__init__.py b/tcp_tests/managers/clients/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tcp_tests/managers/clients/__init__.py
diff --git a/tcp_tests/managers/clients/http_client.py b/tcp_tests/managers/clients/http_client.py
new file mode 100644
index 0000000..5453141
--- /dev/null
+++ b/tcp_tests/managers/clients/http_client.py
@@ -0,0 +1,49 @@
+import logging
+import urlparse
+
+import requests
+
+from tcp_tests import logger
+
+LOG = logger.logger
+
+logger = logging.getLogger(__name__)
+
+
+class HttpClient(object):
+    def __init__(self, base_url=None, user=None, password=None):
+        self.base_url = base_url
+        self.kwargs = {}
+        if user and password:
+            self.kwargs.update({"auth": (user, password)})
+
+    def set_base_url(self, base_url):
+        self.base_url = base_url
+
+    def request(self, url, method, headers=None, body=None, **kwargs):
+        logger.debug(
+            "Sending request to: {}, body: {}, headers: {}, kwargs: {}".format(
+                url, body, headers, kwargs))
+        if headers is None:
+            headers = {'Content-Type': 'application/json'}
+
+        kwargs.update(self.kwargs)
+        r = requests.request(method, urlparse.urljoin(self.base_url, url),
+                             headers=headers, data=body, **kwargs)
+
+        if not r.ok:
+            raise requests.HTTPError(r.content)
+        logger.debug(r.content)
+        return r.headers, r.content
+
+    def post(self, url, body=None, **kwargs):
+        return self.request(url, "POST", body=body, **kwargs)
+
+    def get(self, url, **kwargs):
+        return self.request(url, "GET", **kwargs)
+
+    def put(self, url, body=None, **kwargs):
+        return self.request(url, "PUT", body=body, **kwargs)
+
+    def delete(self, url, **kwargs):
+        return self.request(url, "DELETE", **kwargs)
diff --git a/tcp_tests/managers/clients/prometheus/__init__.py b/tcp_tests/managers/clients/prometheus/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tcp_tests/managers/clients/prometheus/__init__.py
diff --git a/tcp_tests/managers/clients/prometheus/prometheus_client.py b/tcp_tests/managers/clients/prometheus/prometheus_client.py
new file mode 100644
index 0000000..bf748af
--- /dev/null
+++ b/tcp_tests/managers/clients/prometheus/prometheus_client.py
@@ -0,0 +1,31 @@
+import json
+
+from tcp_tests.managers.clients import http_client
+
+
+class PrometheusClient(object):
+    def __init__(self, host, port, proto):
+        self.url = '{0}://{1}:{2}'.format(proto, host, port)
+        self.client = http_client.HttpClient(base_url=self.url)
+
+    def get_targets(self):
+        _, resp = self.client.get("/api/v1/targets")
+        targets = json.loads(resp)
+        return targets["data"]["activeTargets"]
+
+    def get_query(self, query, timestamp=None):
+        params = {
+            "query": query
+        }
+
+        if timestamp is not None:
+            params.update({"time": timestamp})
+
+        _, resp = self.client.get("/api/v1/query", params=params)
+
+        query_result = json.loads(resp)
+        if query_result["status"] != "success":
+            raise Exception("Failed resp: {}".format(resp))
+
+        if query_result["data"]["resultType"] == "vector":
+            return query_result["data"]["result"]
diff --git a/tcp_tests/managers/sl_manager.py b/tcp_tests/managers/sl_manager.py
index c868d67..23d362e 100644
--- a/tcp_tests/managers/sl_manager.py
+++ b/tcp_tests/managers/sl_manager.py
@@ -13,6 +13,7 @@
 #    under the License.
 
 from tcp_tests.managers.execute_commands import ExecuteCommandsMixin
+from tcp_tests.managers.clients.prometheus import prometheus_client
 
 
 class SLManager(ExecuteCommandsMixin):
@@ -25,6 +26,7 @@
         self.__config = config
         self.__underlay = underlay
         self._salt = salt
+        self._p_client = None
         super(SLManager, self).__init__(
             config=config, underlay=underlay)
 
@@ -32,3 +34,26 @@
         self.execute_commands(commands,
                               label='Install SL services')
         self.__config.stack_light.sl_installed = True
+        self.__config.stack_light.sl_vip_host = self.get_sl_vip()
+
+    def get_sl_vip(self):
+        sl_vip_address_pillars = self._salt.get_pillar(
+            tgt='I@keepalived:cluster:enabled:true and not ctl*',
+            pillar='keepalived:cluster:instance:prometheus_server_vip:address')
+        sl_vip_ip = set([ip
+                            for item in sl_vip_address_pillars
+                            for node,ip in item.items() if ip])
+        assert len(sl_vip_ip) == 1, (
+            "Found more than one SL VIP in pillars:{0}, "
+            "expected one!").format(sl_vip_ip)
+        sl_vip_ip_host = sl_vip_ip.pop()
+        return sl_vip_ip_host
+
+    @property
+    def api(self):
+        if self._p_client is None:
+            self._p_client = prometheus_client.PrometheusClient(
+                host=self.__config.stack_light.sl_vip_host,
+                port=self.__config.stack_light.sl_prometheus_port,
+                proto=self.__config.stack_light.sl_prometheus_proto)
+        return self._p_client
diff --git a/tcp_tests/settings_oslo.py b/tcp_tests/settings_oslo.py
index 47c18d1..c01c743 100644
--- a/tcp_tests/settings_oslo.py
+++ b/tcp_tests/settings_oslo.py
@@ -175,6 +175,12 @@
 sl_opts = [
     ct.Cfg('sl_installed', ct.Boolean(),
            help="", default=False),
+    ct.Cfg('sl_vip_host', ct.IPAddress(),
+           help="Vip address for SL services", default='0.0.0.0'),
+    ct.Cfg('sl_prometheus_port', ct.String(),
+           help="Prometheus port", default='15010'),
+    ct.Cfg('sl_prometheus_proto', ct.String(),
+           help="Proemtheus protocol", default='http'),
 ]
 
 virtlet_deploy_opts = [
diff --git a/tcp_tests/templates/virtual-mcp11-k8s-calico/sl.yaml b/tcp_tests/templates/virtual-mcp11-k8s-calico/sl.yaml
index 5be65fd..7258fea 100644
--- a/tcp_tests/templates/virtual-mcp11-k8s-calico/sl.yaml
+++ b/tcp_tests/templates/virtual-mcp11-k8s-calico/sl.yaml
@@ -130,16 +130,6 @@
   retry: {count: 1, delay: 10}
   skip_fail: false
 
-- description: Pull images
-  cmd: salt -C 'I@docker:swarm:role:master' cmd.run 'docker node ls';
-    for img in pushgateway alertmanager prometheus telegraf remote_storage_adapter; do
-        salt -C 'I@docker:swarm' dockerng.pull "docker-prod-virtual.docker.mirantis.net/openstack-docker/$img";
-        salt -C 'I@docker:swarm' dockerng.tag "docker-prod-virtual.docker.mirantis.net/openstack-docker/$img:latest" "$img:latest";
-    done;
-  node_name: {{ HOSTNAME_CFG01 }}
-  retry: {count: 1, delay: 1}
-  skip_fail: false
-
 - description: run docker state
   cmd: salt -C 'I@docker:swarm:role:master' state.sls docker
   node_name: {{ HOSTNAME_CFG01 }}
@@ -153,7 +143,7 @@
   skip_fail: false
 
 - description: Configure Grafana dashboards and datasources
-  cmd: salt -C 'I@grafana:client' state.sls grafana.client
+  cmd: sleep10;  salt -C 'I@grafana:client' state.sls grafana.client
   node_name: {{ HOSTNAME_CFG01 }}
   retry: {count: 1, delay: 10}
   skip_fail: false
diff --git a/tcp_tests/templates/virtual-mcp11-k8s-contrail/sl.yaml b/tcp_tests/templates/virtual-mcp11-k8s-contrail/sl.yaml
index 8b708c9..de3f15d 100644
--- a/tcp_tests/templates/virtual-mcp11-k8s-contrail/sl.yaml
+++ b/tcp_tests/templates/virtual-mcp11-k8s-contrail/sl.yaml
@@ -1,4 +1,4 @@
-{% from 'virtual-mcp11-k8s-calico/underlay.yaml' import HOSTNAME_CFG01 with context %}
+{% from 'virtual-mcp11-k8s-contrail/underlay.yaml' import HOSTNAME_CFG01 with context %}
 
 # Install docker swarm
 - description: Configure docker service
@@ -45,7 +45,13 @@
 
 # Install slv2 infra
 - description: Install telegraf
-  cmd: salt -C 'I@telegraf:agent' state.sls telegraf
+  cmd: salt -C 'I@telegraf:agentor or I@telegraf:remote_agent' state.sls telegraf
+  node_name: {{ HOSTNAME_CFG01 }}
+  retry: {count: 1, delay: 10}
+  skip_fail: false
+
+- description: Configure collector
+  cmd: salt -C 'I@heka:log_collector' state.sls heka.log_collector
   node_name: {{ HOSTNAME_CFG01 }}
   retry: {count: 1, delay: 10}
   skip_fail: false
@@ -62,18 +68,6 @@
   retry: {count: 1, delay: 10}
   skip_fail: false
 
-- description: Install elasticsearch service
-  cmd: salt -C 'I@elasticsearch:client' state.sls elasticsearch.client.service
-  node_name: {{ HOSTNAME_CFG01 }}
-  retry: {count: 1, delay: 10}
-  skip_fail: false
-
-- description: Restart minions
-  cmd: salt -C 'I@elasticsearch:client' --async service.restart salt-minion; sleep 10;
-  node_name: {{ HOSTNAME_CFG01 }}
-  retry: {count: 1, delay: 10}
-  skip_fail: false
-
 - description: Install elasticsearch client
   cmd: salt -C 'I@elasticsearch:client' state.sls elasticsearch.client
   node_name: {{ HOSTNAME_CFG01 }}
@@ -86,6 +80,17 @@
   retry: {count: 1, delay: 10}
   skip_fail: false
 
+- description: Check influix db
+  cmd: |
+    INFLUXDB_SERVICE=`salt -C 'I@influxdb:server' test.ping 1>/dev/null 2>&1 && echo true`;
+    echo "Influxdb service presence: ${INFLUXDB_SERVICE}";
+    if [[ "$INFLUXDB_SERVICE" == "true" ]]; then
+        salt -C 'I@influxdb:server' state.sls influxdb
+    fi
+  node_name: {{ HOSTNAME_CFG01 }}
+  retry: {count: 1, delay: 5}
+  skip_fail: true
+
 # Collect grains needed to configure the services
 
 - description: Get grains
@@ -106,35 +111,25 @@
   retry: {count: 1, delay: 10}
   skip_fail: false
 
-# Configure the services running in Docker Swarm
-- description: Install prometheus alertmanager
-  cmd: salt -C 'I@docker:swarm' state.sls prometheus.server,prometheus.alertmanager -b 1
-  node_name: {{ HOSTNAME_CFG01 }}
-  retry: {count: 1, delay: 10}
-  skip_fail: false
-
-- description: Sync modules
-  cmd: salt -C 'I@salt:minion' saltutil.refresh_modules
-  node_name: {{ HOSTNAME_CFG01 }}
-  retry: {count: 1, delay: 10}
-  skip_fail: false
-
-- description: Update mine
-  cmd: salt -C 'I@salt:minion' mine.update; sleep 5;
-  node_name: {{ HOSTNAME_CFG01 }}
-  retry: {count: 1, delay: 10}
-  skip_fail: false
-
-- description: Pull images
-  cmd: salt -C 'I@docker:swarm:role:master' cmd.run 'docker node ls';
-    for img in pushgateway alertmanager prometheus; do
-        salt -C 'I@docker:swarm' dockerng.pull "docker-sandbox.sandbox.mirantis.net/bkupidura/$img";
-        salt -C 'I@docker:swarm' dockerng.tag "docker-sandbox.sandbox.mirantis.net/bkupidura/$img:latest" "$img:latest";
-    done;
+# Change environment configuration before deploy
+- description: Set SL docker images deploy parameters
+  cmd: |
+  {% for sl_opt, value in config.sl_deploy.items() %}
+    {% if value|string() %}
+    salt-call reclass.cluster_meta_set {{ sl_opt }} {{ value }};
+    {% endif %}
+  {% endfor %}
   node_name: {{ HOSTNAME_CFG01 }}
   retry: {count: 1, delay: 1}
   skip_fail: false
 
+# Configure the services running in Docker Swarm
+- description: Install prometheus alertmanager
+  cmd: salt -C 'I@docker:swarm' state.sls prometheus,heka.remote_collector -b 1
+  node_name: {{ HOSTNAME_CFG01 }}
+  retry: {count: 1, delay: 10}
+  skip_fail: false
+
 - description: run docker state
   cmd: salt -C 'I@docker:swarm:role:master' state.sls docker
   node_name: {{ HOSTNAME_CFG01 }}
@@ -148,7 +143,7 @@
   skip_fail: false
 
 - description: Configure Grafana dashboards and datasources
-  cmd: salt -C 'I@grafana:client' state.sls grafana.client
+  cmd: sleep10;  salt -C 'I@grafana:client' state.sls grafana.client
   node_name: {{ HOSTNAME_CFG01 }}
   retry: {count: 1, delay: 10}
   skip_fail: false
diff --git a/tcp_tests/tests/system/test_install_k8s.py b/tcp_tests/tests/system/test_install_k8s.py
index a911294..0af33e3 100644
--- a/tcp_tests/tests/system/test_install_k8s.py
+++ b/tcp_tests/tests/system/test_install_k8s.py
@@ -45,7 +45,7 @@
         show_step(5)
         k8sclient = k8s_deployed.api
         assert k8sclient.nodes.list() is not None, "Can not get nodes list"
-
+        netchecker_port = netchecker.get_service_port(k8sclient)
         show_step(6)
         netchecker.get_netchecker_pod_status(k8s=k8s_deployed,
                                              namespace='netchecker')
@@ -57,9 +57,9 @@
 
         # show_step(8)
         netchecker.wait_check_network(k8sclient, namespace='netchecker',
-                                      netchecker_pod_port=30811)
+                                      netchecker_pod_port=netchecker_port)
         show_step(9)
-        res = netchecker.get_metric(k8sclient, netchecker_pod_port=30811,
+        res = netchecker.get_metric(k8sclient, netchecker_pod_port=netchecker_port,
                                     namespace='netchecker')
 
         assert res.status_code == 200, 'Unexpected response code {}'.format(res)
@@ -76,6 +76,18 @@
             assert metric in res.text.strip(), \
                 'Mandotory metric {0} is missing in {1}'.format(
                     metric, res.text)
+
+        prometheus_client = sl_deployed.api
+        current_targets = prometheus_client.get_targets()
+        #todo (tleontovich) add assertion that k8s targets here
+        LOG.debug('Current targets after install {0}'.format(current_targets))
+
+        for metric in metrics:
+            res = prometheus_client.get_query(metric)
+            for entry in res:
+                assert entry["metric"]["job"] == 'kubernetes-service-endpoints'
+            LOG.debug('Metric {} exists'.format(res))
+            # todo (tleontovich) add asserts here and extend the tests with acceptance criteria
         
         if config.k8s.k8s_conformance_run:
             k8s_actions.run_conformance()