blob: 4f3fef26c0ad6b432d306c27739d6691e011487a [file] [log] [blame]
Oleksii Zhurba020fab42017-11-01 20:13:28 +00001import json
2import requests
Oleksii Zhurba84ce7fe2018-01-16 21:34:01 +00003import datetime
Oleksii Zhurba9848e212018-09-05 10:53:51 -05004import pytest
Oleksii Zhurba020fab42017-11-01 20:13:28 +00005
Hanna Arhipova481938b2020-02-11 22:17:20 +02006import utils
7import logging
8
9# ################################ FIXTURES ##################################
10
11
12def prometheus_rules():
13 salt = utils.init_salt_client()
14
15 IP = salt.pillar_get(param='_param:cluster_public_host')
16 proto = salt.pillar_get(
17 param='_param:cluster_public_protocol')
18 proxies = {"http": None, "https": None}
19
20 prometheus_password = (
21 # new password in 2019.2.7
22 salt.pillar_get(
23 tgt="nginx:server",
24 param='_param:nginx_proxy_prometheus_server_password')
25
26 # Generated password ~2019.2.4
27 or salt.pillar_get(
28 param='_param:prometheus_server_proxy_password_generated')
29
30 # old password ~ 2019.2.0
31 or salt.pillar_get(
32 param='_param:keepalived_prometheus_vip_password_generated')
33 )
34
35 response = requests.get(
36 '{0}://{1}:15010/api/v1/rules'.format(proto, IP),
37 proxies=proxies,
38 auth=('prometheus', prometheus_password),
39 verify=False)
40
41 if not response.status_code == 200:
42 return list()
43
44 content = json.loads(response.content.decode())
45 rules = content['data']['groups'][0]["rules"]
46
47 # collect rules with dict {'rulename' : {<rulecontent>}}
48 alerts_by_name = {rule['name']: rule['alerts']
49 for rule in rules
50 }
51 logging.debug("collected next rules: {}".format(alerts_by_name))
52 return alerts_by_name
53
54
55prometheus_rules = prometheus_rules()
56
57
58@pytest.fixture(scope='session',
59 ids=prometheus_rules.keys(),
60 params=prometheus_rules.values())
61def alert_in_prometheus(request):
62 return request.param
63
64# ############################## TESTS #######################################
65
66
Oleksii Zhurba5b15b9b2019-05-09 18:53:40 -050067@pytest.mark.sl_dup
Hanna Arhipova481938b2020-02-11 22:17:20 +020068# ElasticsearchClusterHealthStatusMajor or stacklight-pytest
Oleksii Zhurba5b15b9b2019-05-09 18:53:40 -050069@pytest.mark.full
Oleksii Zhurba8ce9fcf2018-10-05 18:38:22 +030070@pytest.mark.usefixtures('check_kibana')
Oleksii Zhurba020fab42017-11-01 20:13:28 +000071def test_elasticsearch_cluster(local_salt_client):
Oleksii Zhurba4bfd2ee2019-04-10 21:56:58 -050072 salt_output = local_salt_client.pillar_get(
73 tgt='kibana:server',
74 param='_param:haproxy_elasticsearch_bind_host')
Oleksii Zhurba85f55fe2019-06-10 17:30:53 -050075 ssl = local_salt_client.pillar_get(
76 tgt='elasticsearch:server',
77 param='haproxy:proxy:listen:elasticsearch:binds:ssl:enabled')
78 proto = "https" if ssl else "http"
Oleksii Zhurba9848e212018-09-05 10:53:51 -050079
Oleksii Zhurbae592ed12018-06-21 18:01:09 -050080 proxies = {"http": None, "https": None}
Oleksii Zhurba4bfd2ee2019-04-10 21:56:58 -050081 IP = salt_output
Dmitriy Kruglova34a3042019-08-20 11:45:35 +020082 response = requests.get(
83 '{0}://{1}:9200/'.format(proto, IP),
84 proxies=proxies,
85 verify=False)
86 assert response.status_code == 200, (
87 "Issues with accessing elasticsearch on {}.".format(IP))
88 response = requests.get(
89 '{0}://{1}:9200/_cat/health'.format(proto, IP),
90 proxies=proxies,
Ekaterina Chernovae32e3f92019-11-12 14:56:03 +030091 verify=False).content.decode()
Dmitriy Kruglova34a3042019-08-20 11:45:35 +020092 msg = "elasticsearch is not healthy:\n{}".format(
93 json.dumps(response, indent=4))
Ekaterina Chernovae32e3f92019-11-12 14:56:03 +030094 assert response.split()[3] == 'green', msg
Dmitriy Kruglova34a3042019-08-20 11:45:35 +020095 assert response.split()[4] == '3', msg
96 assert response.split()[5] == '3', msg
97 assert response.split()[10] == '0', msg
98 assert response.split()[13] == '100.0%', msg
Oleksii Zhurba020fab42017-11-01 20:13:28 +000099
100
Oleksii Zhurba5b15b9b2019-05-09 18:53:40 -0500101@pytest.mark.sl_dup
Hanna Arhipova481938b2020-02-11 22:17:20 +0200102# stacklight-pytest
Oleksii Zhurba5b15b9b2019-05-09 18:53:40 -0500103@pytest.mark.full
Oleksii Zhurba8ce9fcf2018-10-05 18:38:22 +0300104@pytest.mark.usefixtures('check_kibana')
Ievgeniia Zadorozhna511f0ce2018-11-08 17:43:10 +0300105def test_kibana_status(local_salt_client):
106 proxies = {"http": None, "https": None}
Oleksii Zhurba4bfd2ee2019-04-10 21:56:58 -0500107 IP = local_salt_client.pillar_get(param='_param:stacklight_log_address')
Oleksii Zhurba85f55fe2019-06-10 17:30:53 -0500108 ssl = local_salt_client.pillar_get(
109 tgt='kibana:server',
110 param='haproxy:proxy:listen:kibana:binds:ssl:enabled')
111 proto = "https" if ssl else "http"
112
Dmitriy Kruglova34a3042019-08-20 11:45:35 +0200113 response = requests.get(
114 '{0}://{1}:5601/api/status'.format(proto, IP),
115 proxies=proxies,
Ekaterina Chernovae32e3f92019-11-12 14:56:03 +0300116 verify=False).content.decode()
Dmitriy Kruglova34a3042019-08-20 11:45:35 +0200117 body = json.loads(response)
118 assert body['status']['overall']['state'] == "green", (
119 "Kibana overall status is not 'green':\n{}".format(
120 body['status']['overall'])
121 )
Ievgeniia Zadorozhna511f0ce2018-11-08 17:43:10 +0300122 for i in body['status']['statuses']:
Dmitriy Kruglova34a3042019-08-20 11:45:35 +0200123 assert i['state'] == "green", (
124 "Kibana statuses are unexpected:\n{}".format(i))
Ievgeniia Zadorozhna511f0ce2018-11-08 17:43:10 +0300125
126
Oleksii Zhurba5b15b9b2019-05-09 18:53:40 -0500127@pytest.mark.smoke
128#TODO: recheck
Ievgeniia Zadorozhna511f0ce2018-11-08 17:43:10 +0300129@pytest.mark.usefixtures('check_kibana')
Oleksii Zhurba84ce7fe2018-01-16 21:34:01 +0000130def test_elasticsearch_node_count(local_salt_client):
131 now = datetime.datetime.now()
132 today = now.strftime("%Y.%m.%d")
Oleksii Zhurba4bfd2ee2019-04-10 21:56:58 -0500133 salt_output = local_salt_client.pillar_get(
134 tgt='kibana:server',
135 param='_param:haproxy_elasticsearch_bind_host')
Oleksii Zhurba9848e212018-09-05 10:53:51 -0500136
Oleksii Zhurba4bfd2ee2019-04-10 21:56:58 -0500137 IP = salt_output
Oleksii Zhurba85f55fe2019-06-10 17:30:53 -0500138 ssl = local_salt_client.pillar_get(
139 tgt='elasticsearch:server',
140 param='haproxy:proxy:listen:elasticsearch:binds:ssl:enabled')
141 proto = "https" if ssl else "http"
142
Tatyana Leontovich30bd90c2019-01-11 16:26:32 +0200143 headers = {'Content-type': 'application/json', 'Accept': 'text/plain'}
Oleksii Zhurbae592ed12018-06-21 18:01:09 -0500144 proxies = {"http": None, "https": None}
Tatyana Leontovich30bd90c2019-01-11 16:26:32 +0200145 data = ('{"size": 0, "aggs": '
146 '{"uniq_hostname": '
Oleksii Zhurba5b15b9b2019-05-09 18:53:40 -0500147 '{"terms": {"size": 500, '
Tatyana Leontovich30bd90c2019-01-11 16:26:32 +0200148 '"field": "Hostname.keyword"}}}}')
149 response = requests.post(
Oleksii Zhurba85f55fe2019-06-10 17:30:53 -0500150 '{0}://{1}:9200/log-{2}/_search?pretty'.format(proto, IP, today),
Tatyana Leontovich30bd90c2019-01-11 16:26:32 +0200151 proxies=proxies,
152 headers=headers,
Dmitriy Kruglova34a3042019-08-20 11:45:35 +0200153 verify=False,
Tatyana Leontovich30bd90c2019-01-11 16:26:32 +0200154 data=data)
Dmitriy Kruglova34a3042019-08-20 11:45:35 +0200155 assert response.status_code == 200, (
156 'Issues with accessing elasticsearch on {}:\n{}'.format(
157 IP, response.text)
158 )
Tatyana Leontovich30bd90c2019-01-11 16:26:32 +0200159 resp = json.loads(response.text)
Hanna Arhipova481938b2020-02-11 22:17:20 +0200160 cluster_domain = local_salt_client.pillar_get(
161 param='_param:cluster_domain')
Oleksii Zhurbad2847dc2018-02-16 15:13:09 -0600162 monitored_nodes = []
Oleksii Zhurba7f463412018-03-21 16:32:44 -0500163 for item_ in resp['aggregations']['uniq_hostname']['buckets']:
Oleksii Zhurbad2847dc2018-02-16 15:13:09 -0600164 node_name = item_['key']
165 monitored_nodes.append(node_name + '.' + cluster_domain)
166 missing_nodes = []
Ekaterina Chernovae32e3f92019-11-12 14:56:03 +0300167 all_nodes = list(local_salt_client.test_ping(tgt='*').keys())
Oleksii Zhurba4bfd2ee2019-04-10 21:56:58 -0500168 for node in all_nodes:
Oleksii Zhurbad2847dc2018-02-16 15:13:09 -0600169 if node not in monitored_nodes:
170 missing_nodes.append(node)
Dmitriy Kruglova34a3042019-08-20 11:45:35 +0200171 assert len(missing_nodes) == 0, (
172 "Not all nodes are in Elasticsearch. Expected {}, but found {} keys.\n"
173 "Missing nodes:\n{}".format(
174 len(monitored_nodes), len(all_nodes), missing_nodes)
175 )
Oleksii Zhurba84ce7fe2018-01-16 21:34:01 +0000176
177
Oleksii Zhurba5b15b9b2019-05-09 18:53:40 -0500178@pytest.mark.sl_dup
Hanna Arhipova481938b2020-02-11 22:17:20 +0200179# DockerServiceMonitoring*
Oleksii Zhurba5b15b9b2019-05-09 18:53:40 -0500180@pytest.mark.full
Oleksii Zhurba020fab42017-11-01 20:13:28 +0000181def test_stacklight_services_replicas(local_salt_client):
Oleksii Zhurba8ce9fcf2018-10-05 18:38:22 +0300182 # TODO
183 # change to docker:swarm:role:master ?
Oleksii Zhurba020fab42017-11-01 20:13:28 +0000184 salt_output = local_salt_client.cmd(
Oleksii Zhurba4bfd2ee2019-04-10 21:56:58 -0500185 tgt='I@docker:client:stack:monitoring and I@prometheus:server',
186 param='docker service ls',
Oleksii Zhurba8ce9fcf2018-10-05 18:38:22 +0300187 expr_form='compound')
Oleksii Zhurba9848e212018-09-05 10:53:51 -0500188
189 if not salt_output:
Oleksii Zhurba8ce9fcf2018-10-05 18:38:22 +0300190 pytest.skip("docker:client:stack:monitoring or \
191 prometheus:server pillars are not found on this environment.")
Oleksii Zhurba9848e212018-09-05 10:53:51 -0500192
Oleksii Zhurba020fab42017-11-01 20:13:28 +0000193 wrong_items = []
Ekaterina Chernovae32e3f92019-11-12 14:56:03 +0300194 for line in salt_output[list(salt_output.keys())[0]].split('\n'):
Oleksii Zhurba020fab42017-11-01 20:13:28 +0000195 if line[line.find('/') - 1] != line[line.find('/') + 1] \
196 and 'replicated' in line:
197 wrong_items.append(line)
Dmitriy Kruglova34a3042019-08-20 11:45:35 +0200198 assert len(wrong_items) == 0, (
199 "Some monitoring services don't have the expected number of "
200 "replicas:\n{}".format(json.dumps(wrong_items, indent=4))
201 )
Oleksii Zhurba020fab42017-11-01 20:13:28 +0000202
203
Oleksii Zhurba5b15b9b2019-05-09 18:53:40 -0500204@pytest.mark.smoke
Hanna Arhipova481938b2020-02-11 22:17:20 +0200205def test_prometheus_alert_count(alert_in_prometheus):
Hanna Arhipovafae5b5b2019-12-09 15:57:55 +0200206
Hanna Arhipova481938b2020-02-11 22:17:20 +0200207 assert len(alert_in_prometheus) == 0, \
208 '\n\n\tAlertManager page has some alerts!\n{} \n'.format(
209 '\n'.join(
210 [alert['annotations']['description']
211 for alert in alert_in_prometheus]
212 ))
Oleksii Zhurba468e6c72018-01-16 17:43:15 +0000213
214
Oleksii Zhurba5b15b9b2019-05-09 18:53:40 -0500215@pytest.mark.sl_dup
Hanna Arhipova481938b2020-02-11 22:17:20 +0200216# DockerServiceMonitoring* ??
Oleksii Zhurba5b15b9b2019-05-09 18:53:40 -0500217@pytest.mark.full
Oleksii Zhurba020fab42017-11-01 20:13:28 +0000218def test_stacklight_containers_status(local_salt_client):
219 salt_output = local_salt_client.cmd(
Oleksii Zhurba4bfd2ee2019-04-10 21:56:58 -0500220 tgt='I@docker:swarm:role:master and I@prometheus:server',
221 param='docker service ps $(docker stack services -q monitoring)',
Oleksii Zhurba8ce9fcf2018-10-05 18:38:22 +0300222 expr_form='compound')
Oleksii Zhurba9848e212018-09-05 10:53:51 -0500223
224 if not salt_output:
Dmitriy Kruglova34a3042019-08-20 11:45:35 +0200225 pytest.skip("docker:swarm:role:master or prometheus:server pillars "
226 "are not found on this environment.")
Oleksii Zhurba9848e212018-09-05 10:53:51 -0500227
Oleksii Zhurba020fab42017-11-01 20:13:28 +0000228 result = {}
Oleksii Zhurba468e6c72018-01-16 17:43:15 +0000229 # for old reclass models, docker:swarm:role:master can return
230 # 2 nodes instead of one. Here is temporary fix.
231 # TODO
Ekaterina Chernovae32e3f92019-11-12 14:56:03 +0300232 if len(list(salt_output.keys())) > 1:
233 if 'CURRENT STATE' not in salt_output[list(salt_output.keys())[0]]:
234 del salt_output[list(salt_output.keys())[0]]
235 for line in salt_output[list(salt_output.keys())[0]].split('\n')[1:]:
Oleksii Zhurba020fab42017-11-01 20:13:28 +0000236 shift = 0
Oleksii Zhurba020fab42017-11-01 20:13:28 +0000237 if line.split()[1] == '\\_':
238 shift = 1
Ekaterina Chernovae32e3f92019-11-12 14:56:03 +0300239 if line.split()[1 + shift] not in list(result.keys()):
Oleksii Zhurba020fab42017-11-01 20:13:28 +0000240 result[line.split()[1]] = 'NOT OK'
241 if line.split()[4 + shift] == 'Running' \
242 or line.split()[4 + shift] == 'Ready':
243 result[line.split()[1 + shift]] = 'OK'
Ekaterina Chernovae32e3f92019-11-12 14:56:03 +0300244 assert 'NOT OK' not in list(result.values()), (
Dmitriy Kruglova34a3042019-08-20 11:45:35 +0200245 "Some containers have incorrect state:\n{}".format(
246 json.dumps(result, indent=4))
247 )
Oleksii Zhurbae592ed12018-06-21 18:01:09 -0500248
249
Oleksii Zhurba5b15b9b2019-05-09 18:53:40 -0500250@pytest.mark.sl_dup
Hanna Arhipova481938b2020-02-11 22:17:20 +0200251# PrometheusTargetDown
Oleksii Zhurba5b15b9b2019-05-09 18:53:40 -0500252@pytest.mark.full
Oleksii Zhurbae592ed12018-06-21 18:01:09 -0500253def test_running_telegraf_services(local_salt_client):
Oleksii Zhurba4bfd2ee2019-04-10 21:56:58 -0500254 salt_output = local_salt_client.cmd(tgt='telegraf:agent',
255 fun='service.status',
256 param='telegraf',
257 expr_form='pillar',)
Oleksii Zhurba9848e212018-09-05 10:53:51 -0500258
259 if not salt_output:
Dmitriy Kruglova34a3042019-08-20 11:45:35 +0200260 pytest.skip("Telegraf or telegraf:agent pillars are not found on "
261 "this environment.")
Oleksii Zhurba9848e212018-09-05 10:53:51 -0500262
Oleksii Zhurbae592ed12018-06-21 18:01:09 -0500263 result = [{node: status} for node, status
Ekaterina Chernovae32e3f92019-11-12 14:56:03 +0300264 in list(salt_output.items())
Oleksii Zhurbae592ed12018-06-21 18:01:09 -0500265 if status is False]
Dmitriy Kruglova34a3042019-08-20 11:45:35 +0200266 assert result == [], (
267 "Telegraf service is not running on the following nodes:\n{}".format(
268 result)
269 )
Ievgeniia Zadorozhna6775eb72018-11-09 19:50:04 +0300270
271
Oleksii Zhurba5b15b9b2019-05-09 18:53:40 -0500272@pytest.mark.sl_dup
Hanna Arhipova481938b2020-02-11 22:17:20 +0200273# PrometheusTargetDown
Oleksii Zhurba5b15b9b2019-05-09 18:53:40 -0500274@pytest.mark.full
Ievgeniia Zadorozhna6775eb72018-11-09 19:50:04 +0300275def test_running_fluentd_services(local_salt_client):
Oleksii Zhurba4bfd2ee2019-04-10 21:56:58 -0500276 salt_output = local_salt_client.cmd(tgt='fluentd:agent',
277 fun='service.status',
278 param='td-agent',
Ievgeniia Zadorozhna6775eb72018-11-09 19:50:04 +0300279 expr_form='pillar')
280 result = [{node: status} for node, status
Ekaterina Chernovae32e3f92019-11-12 14:56:03 +0300281 in list(salt_output.items())
Ievgeniia Zadorozhna6775eb72018-11-09 19:50:04 +0300282 if status is False]
Dmitriy Kruglova34a3042019-08-20 11:45:35 +0200283 assert result == [], (
284 "Fluentd check failed - td-agent service is not running on the "
285 "following nodes:\n{}".format(result)
286 )