blob: 530496322c555a2ab3e3b80ef148c334dc5d494d [file] [log] [blame]
Oleksii Zhurba020fab42017-11-01 20:13:28 +00001import json
2import requests
Oleksii Zhurba84ce7fe2018-01-16 21:34:01 +00003import datetime
Oleksii Zhurba9848e212018-09-05 10:53:51 -05004import pytest
Oleksii Zhurba020fab42017-11-01 20:13:28 +00005
Hanna Arhipova481938b2020-02-11 22:17:20 +02006import utils
7import logging
8
9# ################################ FIXTURES ##################################
10
11
12def prometheus_rules():
13 salt = utils.init_salt_client()
14
15 IP = salt.pillar_get(param='_param:cluster_public_host')
16 proto = salt.pillar_get(
17 param='_param:cluster_public_protocol')
18 proxies = {"http": None, "https": None}
19
20 prometheus_password = (
21 # new password in 2019.2.7
22 salt.pillar_get(
23 tgt="nginx:server",
24 param='_param:nginx_proxy_prometheus_server_password')
25
26 # Generated password ~2019.2.4
27 or salt.pillar_get(
28 param='_param:prometheus_server_proxy_password_generated')
29
30 # old password ~ 2019.2.0
31 or salt.pillar_get(
32 param='_param:keepalived_prometheus_vip_password_generated')
Hanna Arhipova2b93dfc2020-05-20 16:23:57 +030033
34 or ""
Hanna Arhipova481938b2020-02-11 22:17:20 +020035 )
36
Hanna Arhipova2b93dfc2020-05-20 16:23:57 +030037 if prometheus_password == "":
38 logging.warning("Got empty prometheus_password. \
39 Possibly this cluster with no Stacklight component")
40 return dict()
41
Hanna Arhipova481938b2020-02-11 22:17:20 +020042 response = requests.get(
43 '{0}://{1}:15010/api/v1/rules'.format(proto, IP),
44 proxies=proxies,
45 auth=('prometheus', prometheus_password),
46 verify=False)
47
48 if not response.status_code == 200:
Hanna Arhipova2b93dfc2020-05-20 16:23:57 +030049 logging.warning(
50 "Got response with incorrect status: {}".format(response))
51 return dict()
Hanna Arhipova481938b2020-02-11 22:17:20 +020052
53 content = json.loads(response.content.decode())
54 rules = content['data']['groups'][0]["rules"]
55
56 # collect rules with dict {'rulename' : {<rulecontent>}}
57 alerts_by_name = {rule['name']: rule['alerts']
58 for rule in rules
59 }
60 logging.debug("collected next rules: {}".format(alerts_by_name))
61 return alerts_by_name
62
63
64prometheus_rules = prometheus_rules()
65
66
Hanna Arhipova2b93dfc2020-05-20 16:23:57 +030067@pytest.mark.usefixtures('check_prometheus')
Hanna Arhipova481938b2020-02-11 22:17:20 +020068@pytest.fixture(scope='session',
69 ids=prometheus_rules.keys(),
70 params=prometheus_rules.values())
71def alert_in_prometheus(request):
72 return request.param
73
74# ############################## TESTS #######################################
75
76
Oleksii Zhurba5b15b9b2019-05-09 18:53:40 -050077@pytest.mark.sl_dup
Hanna Arhipova481938b2020-02-11 22:17:20 +020078# ElasticsearchClusterHealthStatusMajor or stacklight-pytest
Oleksii Zhurba5b15b9b2019-05-09 18:53:40 -050079@pytest.mark.full
Oleksii Zhurba8ce9fcf2018-10-05 18:38:22 +030080@pytest.mark.usefixtures('check_kibana')
Oleksii Zhurba020fab42017-11-01 20:13:28 +000081def test_elasticsearch_cluster(local_salt_client):
Oleksii Zhurba4bfd2ee2019-04-10 21:56:58 -050082 salt_output = local_salt_client.pillar_get(
83 tgt='kibana:server',
84 param='_param:haproxy_elasticsearch_bind_host')
Oleksii Zhurba85f55fe2019-06-10 17:30:53 -050085 ssl = local_salt_client.pillar_get(
86 tgt='elasticsearch:server',
87 param='haproxy:proxy:listen:elasticsearch:binds:ssl:enabled')
88 proto = "https" if ssl else "http"
Oleksii Zhurba9848e212018-09-05 10:53:51 -050089
Oleksii Zhurbae592ed12018-06-21 18:01:09 -050090 proxies = {"http": None, "https": None}
Oleksii Zhurba4bfd2ee2019-04-10 21:56:58 -050091 IP = salt_output
Dmitriy Kruglova34a3042019-08-20 11:45:35 +020092 response = requests.get(
93 '{0}://{1}:9200/'.format(proto, IP),
94 proxies=proxies,
95 verify=False)
96 assert response.status_code == 200, (
97 "Issues with accessing elasticsearch on {}.".format(IP))
98 response = requests.get(
99 '{0}://{1}:9200/_cat/health'.format(proto, IP),
100 proxies=proxies,
Ekaterina Chernovae32e3f92019-11-12 14:56:03 +0300101 verify=False).content.decode()
Dmitriy Kruglova34a3042019-08-20 11:45:35 +0200102 msg = "elasticsearch is not healthy:\n{}".format(
103 json.dumps(response, indent=4))
Ekaterina Chernovae32e3f92019-11-12 14:56:03 +0300104 assert response.split()[3] == 'green', msg
Dmitriy Kruglova34a3042019-08-20 11:45:35 +0200105 assert response.split()[4] == '3', msg
106 assert response.split()[5] == '3', msg
107 assert response.split()[10] == '0', msg
108 assert response.split()[13] == '100.0%', msg
Oleksii Zhurba020fab42017-11-01 20:13:28 +0000109
110
Oleksii Zhurba5b15b9b2019-05-09 18:53:40 -0500111@pytest.mark.sl_dup
Hanna Arhipova481938b2020-02-11 22:17:20 +0200112# stacklight-pytest
Oleksii Zhurba5b15b9b2019-05-09 18:53:40 -0500113@pytest.mark.full
Oleksii Zhurba8ce9fcf2018-10-05 18:38:22 +0300114@pytest.mark.usefixtures('check_kibana')
Ievgeniia Zadorozhna511f0ce2018-11-08 17:43:10 +0300115def test_kibana_status(local_salt_client):
116 proxies = {"http": None, "https": None}
Oleksii Zhurba4bfd2ee2019-04-10 21:56:58 -0500117 IP = local_salt_client.pillar_get(param='_param:stacklight_log_address')
Oleksii Zhurba85f55fe2019-06-10 17:30:53 -0500118 ssl = local_salt_client.pillar_get(
119 tgt='kibana:server',
120 param='haproxy:proxy:listen:kibana:binds:ssl:enabled')
121 proto = "https" if ssl else "http"
122
Dmitriy Kruglova34a3042019-08-20 11:45:35 +0200123 response = requests.get(
124 '{0}://{1}:5601/api/status'.format(proto, IP),
125 proxies=proxies,
Ekaterina Chernovae32e3f92019-11-12 14:56:03 +0300126 verify=False).content.decode()
Dmitriy Kruglova34a3042019-08-20 11:45:35 +0200127 body = json.loads(response)
128 assert body['status']['overall']['state'] == "green", (
129 "Kibana overall status is not 'green':\n{}".format(
130 body['status']['overall'])
131 )
Ievgeniia Zadorozhna511f0ce2018-11-08 17:43:10 +0300132 for i in body['status']['statuses']:
Dmitriy Kruglova34a3042019-08-20 11:45:35 +0200133 assert i['state'] == "green", (
134 "Kibana statuses are unexpected:\n{}".format(i))
Ievgeniia Zadorozhna511f0ce2018-11-08 17:43:10 +0300135
136
Oleksii Zhurba5b15b9b2019-05-09 18:53:40 -0500137@pytest.mark.smoke
138#TODO: recheck
Ievgeniia Zadorozhna511f0ce2018-11-08 17:43:10 +0300139@pytest.mark.usefixtures('check_kibana')
Oleksii Zhurba84ce7fe2018-01-16 21:34:01 +0000140def test_elasticsearch_node_count(local_salt_client):
141 now = datetime.datetime.now()
142 today = now.strftime("%Y.%m.%d")
Oleksii Zhurba4bfd2ee2019-04-10 21:56:58 -0500143 salt_output = local_salt_client.pillar_get(
144 tgt='kibana:server',
145 param='_param:haproxy_elasticsearch_bind_host')
Oleksii Zhurba9848e212018-09-05 10:53:51 -0500146
Oleksii Zhurba4bfd2ee2019-04-10 21:56:58 -0500147 IP = salt_output
Oleksii Zhurba85f55fe2019-06-10 17:30:53 -0500148 ssl = local_salt_client.pillar_get(
149 tgt='elasticsearch:server',
150 param='haproxy:proxy:listen:elasticsearch:binds:ssl:enabled')
151 proto = "https" if ssl else "http"
152
Tatyana Leontovich30bd90c2019-01-11 16:26:32 +0200153 headers = {'Content-type': 'application/json', 'Accept': 'text/plain'}
Oleksii Zhurbae592ed12018-06-21 18:01:09 -0500154 proxies = {"http": None, "https": None}
Tatyana Leontovich30bd90c2019-01-11 16:26:32 +0200155 data = ('{"size": 0, "aggs": '
156 '{"uniq_hostname": '
Oleksii Zhurba5b15b9b2019-05-09 18:53:40 -0500157 '{"terms": {"size": 500, '
Tatyana Leontovich30bd90c2019-01-11 16:26:32 +0200158 '"field": "Hostname.keyword"}}}}')
159 response = requests.post(
Oleksii Zhurba85f55fe2019-06-10 17:30:53 -0500160 '{0}://{1}:9200/log-{2}/_search?pretty'.format(proto, IP, today),
Tatyana Leontovich30bd90c2019-01-11 16:26:32 +0200161 proxies=proxies,
162 headers=headers,
Dmitriy Kruglova34a3042019-08-20 11:45:35 +0200163 verify=False,
Tatyana Leontovich30bd90c2019-01-11 16:26:32 +0200164 data=data)
Dmitriy Kruglova34a3042019-08-20 11:45:35 +0200165 assert response.status_code == 200, (
166 'Issues with accessing elasticsearch on {}:\n{}'.format(
167 IP, response.text)
168 )
Tatyana Leontovich30bd90c2019-01-11 16:26:32 +0200169 resp = json.loads(response.text)
Hanna Arhipova481938b2020-02-11 22:17:20 +0200170 cluster_domain = local_salt_client.pillar_get(
171 param='_param:cluster_domain')
Oleksii Zhurbad2847dc2018-02-16 15:13:09 -0600172 monitored_nodes = []
Oleksii Zhurba7f463412018-03-21 16:32:44 -0500173 for item_ in resp['aggregations']['uniq_hostname']['buckets']:
Oleksii Zhurbad2847dc2018-02-16 15:13:09 -0600174 node_name = item_['key']
175 monitored_nodes.append(node_name + '.' + cluster_domain)
176 missing_nodes = []
Ekaterina Chernovae32e3f92019-11-12 14:56:03 +0300177 all_nodes = list(local_salt_client.test_ping(tgt='*').keys())
Oleksii Zhurba4bfd2ee2019-04-10 21:56:58 -0500178 for node in all_nodes:
Oleksii Zhurbad2847dc2018-02-16 15:13:09 -0600179 if node not in monitored_nodes:
180 missing_nodes.append(node)
Dmitriy Kruglova34a3042019-08-20 11:45:35 +0200181 assert len(missing_nodes) == 0, (
182 "Not all nodes are in Elasticsearch. Expected {}, but found {} keys.\n"
183 "Missing nodes:\n{}".format(
184 len(monitored_nodes), len(all_nodes), missing_nodes)
185 )
Oleksii Zhurba84ce7fe2018-01-16 21:34:01 +0000186
187
Oleksii Zhurba5b15b9b2019-05-09 18:53:40 -0500188@pytest.mark.sl_dup
Hanna Arhipova481938b2020-02-11 22:17:20 +0200189# DockerServiceMonitoring*
Oleksii Zhurba5b15b9b2019-05-09 18:53:40 -0500190@pytest.mark.full
Oleksii Zhurba020fab42017-11-01 20:13:28 +0000191def test_stacklight_services_replicas(local_salt_client):
Oleksii Zhurba8ce9fcf2018-10-05 18:38:22 +0300192 # TODO
193 # change to docker:swarm:role:master ?
Oleksii Zhurba020fab42017-11-01 20:13:28 +0000194 salt_output = local_salt_client.cmd(
Oleksii Zhurba4bfd2ee2019-04-10 21:56:58 -0500195 tgt='I@docker:client:stack:monitoring and I@prometheus:server',
196 param='docker service ls',
Oleksii Zhurba8ce9fcf2018-10-05 18:38:22 +0300197 expr_form='compound')
Oleksii Zhurba9848e212018-09-05 10:53:51 -0500198
199 if not salt_output:
Oleksii Zhurba8ce9fcf2018-10-05 18:38:22 +0300200 pytest.skip("docker:client:stack:monitoring or \
201 prometheus:server pillars are not found on this environment.")
Oleksii Zhurba9848e212018-09-05 10:53:51 -0500202
Oleksii Zhurba020fab42017-11-01 20:13:28 +0000203 wrong_items = []
Ekaterina Chernovae32e3f92019-11-12 14:56:03 +0300204 for line in salt_output[list(salt_output.keys())[0]].split('\n'):
Oleksii Zhurba020fab42017-11-01 20:13:28 +0000205 if line[line.find('/') - 1] != line[line.find('/') + 1] \
206 and 'replicated' in line:
207 wrong_items.append(line)
Dmitriy Kruglova34a3042019-08-20 11:45:35 +0200208 assert len(wrong_items) == 0, (
209 "Some monitoring services don't have the expected number of "
210 "replicas:\n{}".format(json.dumps(wrong_items, indent=4))
211 )
Oleksii Zhurba020fab42017-11-01 20:13:28 +0000212
213
Oleksii Zhurba5b15b9b2019-05-09 18:53:40 -0500214@pytest.mark.smoke
Hanna Arhipova481938b2020-02-11 22:17:20 +0200215def test_prometheus_alert_count(alert_in_prometheus):
Hanna Arhipovafae5b5b2019-12-09 15:57:55 +0200216
Hanna Arhipova481938b2020-02-11 22:17:20 +0200217 assert len(alert_in_prometheus) == 0, \
218 '\n\n\tAlertManager page has some alerts!\n{} \n'.format(
219 '\n'.join(
220 [alert['annotations']['description']
221 for alert in alert_in_prometheus]
222 ))
Oleksii Zhurba468e6c72018-01-16 17:43:15 +0000223
224
Oleksii Zhurba5b15b9b2019-05-09 18:53:40 -0500225@pytest.mark.sl_dup
Hanna Arhipova481938b2020-02-11 22:17:20 +0200226# DockerServiceMonitoring* ??
Oleksii Zhurba5b15b9b2019-05-09 18:53:40 -0500227@pytest.mark.full
Oleksii Zhurba020fab42017-11-01 20:13:28 +0000228def test_stacklight_containers_status(local_salt_client):
229 salt_output = local_salt_client.cmd(
Oleksii Zhurba4bfd2ee2019-04-10 21:56:58 -0500230 tgt='I@docker:swarm:role:master and I@prometheus:server',
231 param='docker service ps $(docker stack services -q monitoring)',
Oleksii Zhurba8ce9fcf2018-10-05 18:38:22 +0300232 expr_form='compound')
Oleksii Zhurba9848e212018-09-05 10:53:51 -0500233
234 if not salt_output:
Dmitriy Kruglova34a3042019-08-20 11:45:35 +0200235 pytest.skip("docker:swarm:role:master or prometheus:server pillars "
236 "are not found on this environment.")
Oleksii Zhurba9848e212018-09-05 10:53:51 -0500237
Oleksii Zhurba020fab42017-11-01 20:13:28 +0000238 result = {}
Oleksii Zhurba468e6c72018-01-16 17:43:15 +0000239 # for old reclass models, docker:swarm:role:master can return
240 # 2 nodes instead of one. Here is temporary fix.
241 # TODO
Ekaterina Chernovae32e3f92019-11-12 14:56:03 +0300242 if len(list(salt_output.keys())) > 1:
243 if 'CURRENT STATE' not in salt_output[list(salt_output.keys())[0]]:
244 del salt_output[list(salt_output.keys())[0]]
245 for line in salt_output[list(salt_output.keys())[0]].split('\n')[1:]:
Oleksii Zhurba020fab42017-11-01 20:13:28 +0000246 shift = 0
Oleksii Zhurba020fab42017-11-01 20:13:28 +0000247 if line.split()[1] == '\\_':
248 shift = 1
Ekaterina Chernovae32e3f92019-11-12 14:56:03 +0300249 if line.split()[1 + shift] not in list(result.keys()):
Oleksii Zhurba020fab42017-11-01 20:13:28 +0000250 result[line.split()[1]] = 'NOT OK'
251 if line.split()[4 + shift] == 'Running' \
252 or line.split()[4 + shift] == 'Ready':
253 result[line.split()[1 + shift]] = 'OK'
Ekaterina Chernovae32e3f92019-11-12 14:56:03 +0300254 assert 'NOT OK' not in list(result.values()), (
Dmitriy Kruglova34a3042019-08-20 11:45:35 +0200255 "Some containers have incorrect state:\n{}".format(
256 json.dumps(result, indent=4))
257 )
Oleksii Zhurbae592ed12018-06-21 18:01:09 -0500258
259
Oleksii Zhurba5b15b9b2019-05-09 18:53:40 -0500260@pytest.mark.sl_dup
Hanna Arhipova481938b2020-02-11 22:17:20 +0200261# PrometheusTargetDown
Oleksii Zhurba5b15b9b2019-05-09 18:53:40 -0500262@pytest.mark.full
Oleksii Zhurbae592ed12018-06-21 18:01:09 -0500263def test_running_telegraf_services(local_salt_client):
Oleksii Zhurba4bfd2ee2019-04-10 21:56:58 -0500264 salt_output = local_salt_client.cmd(tgt='telegraf:agent',
265 fun='service.status',
266 param='telegraf',
267 expr_form='pillar',)
Oleksii Zhurba9848e212018-09-05 10:53:51 -0500268
269 if not salt_output:
Dmitriy Kruglova34a3042019-08-20 11:45:35 +0200270 pytest.skip("Telegraf or telegraf:agent pillars are not found on "
271 "this environment.")
Oleksii Zhurba9848e212018-09-05 10:53:51 -0500272
Oleksii Zhurbae592ed12018-06-21 18:01:09 -0500273 result = [{node: status} for node, status
Ekaterina Chernovae32e3f92019-11-12 14:56:03 +0300274 in list(salt_output.items())
Oleksii Zhurbae592ed12018-06-21 18:01:09 -0500275 if status is False]
Dmitriy Kruglova34a3042019-08-20 11:45:35 +0200276 assert result == [], (
277 "Telegraf service is not running on the following nodes:\n{}".format(
278 result)
279 )
Ievgeniia Zadorozhna6775eb72018-11-09 19:50:04 +0300280
281
Oleksii Zhurba5b15b9b2019-05-09 18:53:40 -0500282@pytest.mark.sl_dup
Hanna Arhipova481938b2020-02-11 22:17:20 +0200283# PrometheusTargetDown
Oleksii Zhurba5b15b9b2019-05-09 18:53:40 -0500284@pytest.mark.full
Ievgeniia Zadorozhna6775eb72018-11-09 19:50:04 +0300285def test_running_fluentd_services(local_salt_client):
Oleksii Zhurba4bfd2ee2019-04-10 21:56:58 -0500286 salt_output = local_salt_client.cmd(tgt='fluentd:agent',
287 fun='service.status',
288 param='td-agent',
Ievgeniia Zadorozhna6775eb72018-11-09 19:50:04 +0300289 expr_form='pillar')
290 result = [{node: status} for node, status
Ekaterina Chernovae32e3f92019-11-12 14:56:03 +0300291 in list(salt_output.items())
Ievgeniia Zadorozhna6775eb72018-11-09 19:50:04 +0300292 if status is False]
Dmitriy Kruglova34a3042019-08-20 11:45:35 +0200293 assert result == [], (
294 "Fluentd check failed - td-agent service is not running on the "
295 "following nodes:\n{}".format(result)
296 )