Prometheus client
Initial prometheus client
Change-Id: I8c02be6fe7e58c2f37ac19547fd7a795896af4bc
Reviewed-on: https://review.gerrithub.io/368732
Reviewed-by: Tatyanka Leontovich <tleontovich@mirantis.com>
Tested-by: Tatyanka Leontovich <tleontovich@mirantis.com>
diff --git a/tcp_tests/fixtures/stacklight_fixtures.py b/tcp_tests/fixtures/stacklight_fixtures.py
index 82b7c88..6d7d2e9 100644
--- a/tcp_tests/fixtures/stacklight_fixtures.py
+++ b/tcp_tests/fixtures/stacklight_fixtures.py
@@ -12,14 +12,11 @@
# License for the specific language governing permissions and limitations
# under the License.
-import os
import pytest
-import yaml
from tcp_tests import logger
from tcp_tests.helpers import ext
-from tcp_tests import settings
from tcp_tests.managers import sl_manager
from tcp_tests.helpers import utils
@@ -70,6 +67,13 @@
# installed TCP API endpoint
pass
+ # Workaround for keepalived hang issue after env revert from snapshot
+ # see https://mirantis.jira.com/browse/PROD-12038
+ LOG.warning('Restarting keepalived service on controllers...')
+ sl_actions._salt.local(tgt='ctl*', fun='cmd.run',
+ args='systemctl restart keepalived.service')
+ sl_actions._salt.local(tgt='mon*', fun='cmd.run',
+ args='systemctl restart keepalived.service')
return sl_actions
@@ -106,5 +110,11 @@
# 3. config.tcp.* options contain access credentials to the already
# installed TCP API endpoint
pass
-
+ # Workaround for keepalived hang issue after env revert from snapshot
+ # see https://mirantis.jira.com/browse/PROD-12038
+ LOG.warning('Restarting keepalived service on controllers...')
+ sl_actions._salt.local(tgt='ctl*', fun='cmd.run',
+ args='systemctl restart keepalived.service')
+ sl_actions._salt.local(tgt='mon*', fun='cmd.run',
+ args='systemctl restart keepalived.service')
return sl_actions
diff --git a/tcp_tests/helpers/netchecker.py b/tcp_tests/helpers/netchecker.py
index 9c920ec..76e7640 100644
--- a/tcp_tests/helpers/netchecker.py
+++ b/tcp_tests/helpers/netchecker.py
@@ -556,3 +556,18 @@
LOG.debug('Metrics: [{0}] {1}'.format(
response.status_code, response.text.strip()))
return response
+
+
+def get_service_port(k8sclient, service_name='netchecker',
+ namespace='netchecker'):
+ full_service_name = [service.name for service
+ in k8sclient.services.list(namespace=namespace)
+ if service_name in service.name]
+ assert len(full_service_name) > 0, "No netchecker service run"
+
+ service_details = k8sclient.services.get(name=full_service_name[0],
+ namespace=namespace)
+
+ LOG.debug('Necthcecker service details {0}'.format(service_details))
+ netchecker_port = service_details.spec.ports[0].node_port
+ return netchecker_port
diff --git a/tcp_tests/managers/clients/__init__.py b/tcp_tests/managers/clients/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tcp_tests/managers/clients/__init__.py
diff --git a/tcp_tests/managers/clients/http_client.py b/tcp_tests/managers/clients/http_client.py
new file mode 100644
index 0000000..5453141
--- /dev/null
+++ b/tcp_tests/managers/clients/http_client.py
@@ -0,0 +1,49 @@
+import logging
+import urlparse
+
+import requests
+
+from tcp_tests import logger
+
+LOG = logger.logger
+
+logger = logging.getLogger(__name__)
+
+
+class HttpClient(object):
+ def __init__(self, base_url=None, user=None, password=None):
+ self.base_url = base_url
+ self.kwargs = {}
+ if user and password:
+ self.kwargs.update({"auth": (user, password)})
+
+ def set_base_url(self, base_url):
+ self.base_url = base_url
+
+ def request(self, url, method, headers=None, body=None, **kwargs):
+ logger.debug(
+ "Sending request to: {}, body: {}, headers: {}, kwargs: {}".format(
+ url, body, headers, kwargs))
+ if headers is None:
+ headers = {'Content-Type': 'application/json'}
+
+ kwargs.update(self.kwargs)
+ r = requests.request(method, urlparse.urljoin(self.base_url, url),
+ headers=headers, data=body, **kwargs)
+
+ if not r.ok:
+ raise requests.HTTPError(r.content)
+ logger.debug(r.content)
+ return r.headers, r.content
+
+ def post(self, url, body=None, **kwargs):
+ return self.request(url, "POST", body=body, **kwargs)
+
+ def get(self, url, **kwargs):
+ return self.request(url, "GET", **kwargs)
+
+ def put(self, url, body=None, **kwargs):
+ return self.request(url, "PUT", body=body, **kwargs)
+
+ def delete(self, url, **kwargs):
+ return self.request(url, "DELETE", **kwargs)
diff --git a/tcp_tests/managers/clients/prometheus/__init__.py b/tcp_tests/managers/clients/prometheus/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tcp_tests/managers/clients/prometheus/__init__.py
diff --git a/tcp_tests/managers/clients/prometheus/prometheus_client.py b/tcp_tests/managers/clients/prometheus/prometheus_client.py
new file mode 100644
index 0000000..bf748af
--- /dev/null
+++ b/tcp_tests/managers/clients/prometheus/prometheus_client.py
@@ -0,0 +1,31 @@
+import json
+
+from tcp_tests.managers.clients import http_client
+
+
+class PrometheusClient(object):
+ def __init__(self, host, port, proto):
+ self.url = '{0}://{1}:{2}'.format(proto, host, port)
+ self.client = http_client.HttpClient(base_url=self.url)
+
+ def get_targets(self):
+ _, resp = self.client.get("/api/v1/targets")
+ targets = json.loads(resp)
+ return targets["data"]["activeTargets"]
+
+ def get_query(self, query, timestamp=None):
+ params = {
+ "query": query
+ }
+
+ if timestamp is not None:
+ params.update({"time": timestamp})
+
+ _, resp = self.client.get("/api/v1/query", params=params)
+
+ query_result = json.loads(resp)
+ if query_result["status"] != "success":
+ raise Exception("Failed resp: {}".format(resp))
+
+ if query_result["data"]["resultType"] == "vector":
+ return query_result["data"]["result"]
diff --git a/tcp_tests/managers/sl_manager.py b/tcp_tests/managers/sl_manager.py
index c868d67..23d362e 100644
--- a/tcp_tests/managers/sl_manager.py
+++ b/tcp_tests/managers/sl_manager.py
@@ -13,6 +13,7 @@
# under the License.
from tcp_tests.managers.execute_commands import ExecuteCommandsMixin
+from tcp_tests.managers.clients.prometheus import prometheus_client
class SLManager(ExecuteCommandsMixin):
@@ -25,6 +26,7 @@
self.__config = config
self.__underlay = underlay
self._salt = salt
+ self._p_client = None
super(SLManager, self).__init__(
config=config, underlay=underlay)
@@ -32,3 +34,26 @@
self.execute_commands(commands,
label='Install SL services')
self.__config.stack_light.sl_installed = True
+ self.__config.stack_light.sl_vip_host = self.get_sl_vip()
+
+ def get_sl_vip(self):
+ sl_vip_address_pillars = self._salt.get_pillar(
+ tgt='I@keepalived:cluster:enabled:true and not ctl*',
+ pillar='keepalived:cluster:instance:prometheus_server_vip:address')
+ sl_vip_ip = set([ip
+ for item in sl_vip_address_pillars
+ for node,ip in item.items() if ip])
+ assert len(sl_vip_ip) == 1, (
+ "Found more than one SL VIP in pillars:{0}, "
+ "expected one!").format(sl_vip_ip)
+ sl_vip_ip_host = sl_vip_ip.pop()
+ return sl_vip_ip_host
+
+ @property
+ def api(self):
+ if self._p_client is None:
+ self._p_client = prometheus_client.PrometheusClient(
+ host=self.__config.stack_light.sl_vip_host,
+ port=self.__config.stack_light.sl_prometheus_port,
+ proto=self.__config.stack_light.sl_prometheus_proto)
+ return self._p_client
diff --git a/tcp_tests/settings_oslo.py b/tcp_tests/settings_oslo.py
index 47c18d1..c01c743 100644
--- a/tcp_tests/settings_oslo.py
+++ b/tcp_tests/settings_oslo.py
@@ -175,6 +175,12 @@
sl_opts = [
ct.Cfg('sl_installed', ct.Boolean(),
help="", default=False),
+ ct.Cfg('sl_vip_host', ct.IPAddress(),
+ help="Vip address for SL services", default='0.0.0.0'),
+ ct.Cfg('sl_prometheus_port', ct.String(),
+ help="Prometheus port", default='15010'),
+ ct.Cfg('sl_prometheus_proto', ct.String(),
+ help="Proemtheus protocol", default='http'),
]
virtlet_deploy_opts = [
diff --git a/tcp_tests/templates/virtual-mcp11-k8s-calico/sl.yaml b/tcp_tests/templates/virtual-mcp11-k8s-calico/sl.yaml
index 5be65fd..7258fea 100644
--- a/tcp_tests/templates/virtual-mcp11-k8s-calico/sl.yaml
+++ b/tcp_tests/templates/virtual-mcp11-k8s-calico/sl.yaml
@@ -130,16 +130,6 @@
retry: {count: 1, delay: 10}
skip_fail: false
-- description: Pull images
- cmd: salt -C 'I@docker:swarm:role:master' cmd.run 'docker node ls';
- for img in pushgateway alertmanager prometheus telegraf remote_storage_adapter; do
- salt -C 'I@docker:swarm' dockerng.pull "docker-prod-virtual.docker.mirantis.net/openstack-docker/$img";
- salt -C 'I@docker:swarm' dockerng.tag "docker-prod-virtual.docker.mirantis.net/openstack-docker/$img:latest" "$img:latest";
- done;
- node_name: {{ HOSTNAME_CFG01 }}
- retry: {count: 1, delay: 1}
- skip_fail: false
-
- description: run docker state
cmd: salt -C 'I@docker:swarm:role:master' state.sls docker
node_name: {{ HOSTNAME_CFG01 }}
@@ -153,7 +143,7 @@
skip_fail: false
- description: Configure Grafana dashboards and datasources
- cmd: salt -C 'I@grafana:client' state.sls grafana.client
+ cmd: sleep10; salt -C 'I@grafana:client' state.sls grafana.client
node_name: {{ HOSTNAME_CFG01 }}
retry: {count: 1, delay: 10}
skip_fail: false
diff --git a/tcp_tests/templates/virtual-mcp11-k8s-contrail/sl.yaml b/tcp_tests/templates/virtual-mcp11-k8s-contrail/sl.yaml
index 8b708c9..de3f15d 100644
--- a/tcp_tests/templates/virtual-mcp11-k8s-contrail/sl.yaml
+++ b/tcp_tests/templates/virtual-mcp11-k8s-contrail/sl.yaml
@@ -1,4 +1,4 @@
-{% from 'virtual-mcp11-k8s-calico/underlay.yaml' import HOSTNAME_CFG01 with context %}
+{% from 'virtual-mcp11-k8s-contrail/underlay.yaml' import HOSTNAME_CFG01 with context %}
# Install docker swarm
- description: Configure docker service
@@ -45,7 +45,13 @@
# Install slv2 infra
- description: Install telegraf
- cmd: salt -C 'I@telegraf:agent' state.sls telegraf
+ cmd: salt -C 'I@telegraf:agentor or I@telegraf:remote_agent' state.sls telegraf
+ node_name: {{ HOSTNAME_CFG01 }}
+ retry: {count: 1, delay: 10}
+ skip_fail: false
+
+- description: Configure collector
+ cmd: salt -C 'I@heka:log_collector' state.sls heka.log_collector
node_name: {{ HOSTNAME_CFG01 }}
retry: {count: 1, delay: 10}
skip_fail: false
@@ -62,18 +68,6 @@
retry: {count: 1, delay: 10}
skip_fail: false
-- description: Install elasticsearch service
- cmd: salt -C 'I@elasticsearch:client' state.sls elasticsearch.client.service
- node_name: {{ HOSTNAME_CFG01 }}
- retry: {count: 1, delay: 10}
- skip_fail: false
-
-- description: Restart minions
- cmd: salt -C 'I@elasticsearch:client' --async service.restart salt-minion; sleep 10;
- node_name: {{ HOSTNAME_CFG01 }}
- retry: {count: 1, delay: 10}
- skip_fail: false
-
- description: Install elasticsearch client
cmd: salt -C 'I@elasticsearch:client' state.sls elasticsearch.client
node_name: {{ HOSTNAME_CFG01 }}
@@ -86,6 +80,17 @@
retry: {count: 1, delay: 10}
skip_fail: false
+- description: Check influix db
+ cmd: |
+ INFLUXDB_SERVICE=`salt -C 'I@influxdb:server' test.ping 1>/dev/null 2>&1 && echo true`;
+ echo "Influxdb service presence: ${INFLUXDB_SERVICE}";
+ if [[ "$INFLUXDB_SERVICE" == "true" ]]; then
+ salt -C 'I@influxdb:server' state.sls influxdb
+ fi
+ node_name: {{ HOSTNAME_CFG01 }}
+ retry: {count: 1, delay: 5}
+ skip_fail: true
+
# Collect grains needed to configure the services
- description: Get grains
@@ -106,35 +111,25 @@
retry: {count: 1, delay: 10}
skip_fail: false
-# Configure the services running in Docker Swarm
-- description: Install prometheus alertmanager
- cmd: salt -C 'I@docker:swarm' state.sls prometheus.server,prometheus.alertmanager -b 1
- node_name: {{ HOSTNAME_CFG01 }}
- retry: {count: 1, delay: 10}
- skip_fail: false
-
-- description: Sync modules
- cmd: salt -C 'I@salt:minion' saltutil.refresh_modules
- node_name: {{ HOSTNAME_CFG01 }}
- retry: {count: 1, delay: 10}
- skip_fail: false
-
-- description: Update mine
- cmd: salt -C 'I@salt:minion' mine.update; sleep 5;
- node_name: {{ HOSTNAME_CFG01 }}
- retry: {count: 1, delay: 10}
- skip_fail: false
-
-- description: Pull images
- cmd: salt -C 'I@docker:swarm:role:master' cmd.run 'docker node ls';
- for img in pushgateway alertmanager prometheus; do
- salt -C 'I@docker:swarm' dockerng.pull "docker-sandbox.sandbox.mirantis.net/bkupidura/$img";
- salt -C 'I@docker:swarm' dockerng.tag "docker-sandbox.sandbox.mirantis.net/bkupidura/$img:latest" "$img:latest";
- done;
+# Change environment configuration before deploy
+- description: Set SL docker images deploy parameters
+ cmd: |
+ {% for sl_opt, value in config.sl_deploy.items() %}
+ {% if value|string() %}
+ salt-call reclass.cluster_meta_set {{ sl_opt }} {{ value }};
+ {% endif %}
+ {% endfor %}
node_name: {{ HOSTNAME_CFG01 }}
retry: {count: 1, delay: 1}
skip_fail: false
+# Configure the services running in Docker Swarm
+- description: Install prometheus alertmanager
+ cmd: salt -C 'I@docker:swarm' state.sls prometheus,heka.remote_collector -b 1
+ node_name: {{ HOSTNAME_CFG01 }}
+ retry: {count: 1, delay: 10}
+ skip_fail: false
+
- description: run docker state
cmd: salt -C 'I@docker:swarm:role:master' state.sls docker
node_name: {{ HOSTNAME_CFG01 }}
@@ -148,7 +143,7 @@
skip_fail: false
- description: Configure Grafana dashboards and datasources
- cmd: salt -C 'I@grafana:client' state.sls grafana.client
+ cmd: sleep10; salt -C 'I@grafana:client' state.sls grafana.client
node_name: {{ HOSTNAME_CFG01 }}
retry: {count: 1, delay: 10}
skip_fail: false
diff --git a/tcp_tests/tests/system/test_install_k8s.py b/tcp_tests/tests/system/test_install_k8s.py
index a911294..0af33e3 100644
--- a/tcp_tests/tests/system/test_install_k8s.py
+++ b/tcp_tests/tests/system/test_install_k8s.py
@@ -45,7 +45,7 @@
show_step(5)
k8sclient = k8s_deployed.api
assert k8sclient.nodes.list() is not None, "Can not get nodes list"
-
+ netchecker_port = netchecker.get_service_port(k8sclient)
show_step(6)
netchecker.get_netchecker_pod_status(k8s=k8s_deployed,
namespace='netchecker')
@@ -57,9 +57,9 @@
# show_step(8)
netchecker.wait_check_network(k8sclient, namespace='netchecker',
- netchecker_pod_port=30811)
+ netchecker_pod_port=netchecker_port)
show_step(9)
- res = netchecker.get_metric(k8sclient, netchecker_pod_port=30811,
+ res = netchecker.get_metric(k8sclient, netchecker_pod_port=netchecker_port,
namespace='netchecker')
assert res.status_code == 200, 'Unexpected response code {}'.format(res)
@@ -76,6 +76,18 @@
assert metric in res.text.strip(), \
'Mandotory metric {0} is missing in {1}'.format(
metric, res.text)
+
+ prometheus_client = sl_deployed.api
+ current_targets = prometheus_client.get_targets()
+ #todo (tleontovich) add assertion that k8s targets here
+ LOG.debug('Current targets after install {0}'.format(current_targets))
+
+ for metric in metrics:
+ res = prometheus_client.get_query(metric)
+ for entry in res:
+ assert entry["metric"]["job"] == 'kubernetes-service-endpoints'
+ LOG.debug('Metric {} exists'.format(res))
+ # todo (tleontovich) add asserts here and extend the tests with acceptance criteria
if config.k8s.k8s_conformance_run:
k8s_actions.run_conformance()