test_set/cvp-sanity/tests/test_stacklight.py - mcp/cvp-sanity-checks - Gitiles

 import json
 import requests
 import datetime
 import pytest

 @pytest.mark.sl_dup
 #ElasticsearchClusterHealthStatusMajor or stacklight-pytest
 @pytest.mark.full
 @pytest.mark.usefixtures('check_kibana')
 def test_elasticsearch_cluster(local_salt_client):
     salt_output = local_salt_client.pillar_get(
         tgt='kibana:server',
         param='_param:haproxy_elasticsearch_bind_host')
     ssl = local_salt_client.pillar_get(
         tgt='elasticsearch:server',
         param='haproxy:proxy:listen:elasticsearch:binds:ssl:enabled')
     proto = "https" if ssl else "http"

     proxies = {"http": None, "https": None}
     IP = salt_output
     assert requests.get('{0}://{1}:9200/'.format(proto, IP),
                         proxies=proxies, verify=False).status_code == 200, \
         'Cannot check elasticsearch url on {}.'.format(IP)
     resp = requests.get('{0}://{1}:9200/_cat/health'.format(proto, IP),
                         proxies=proxies, verify=False).content
     assert resp.split()[3] == 'green', \
         'elasticsearch status is not good {}'.format(
         json.dumps(resp, indent=4))
     assert resp.split()[4] == '3', \
         'elasticsearch status is not good {}'.format(
         json.dumps(resp, indent=4))
     assert resp.split()[5] == '3', \
         'elasticsearch status is not good {}'.format(
         json.dumps(resp, indent=4))
     assert resp.split()[10] == '0', \
         'elasticsearch status is not good {}'.format(
         json.dumps(resp, indent=4))
     assert resp.split()[13] == '100.0%', \
         'elasticsearch status is not good {}'.format(
         json.dumps(resp, indent=4))


 @pytest.mark.sl_dup
 #stacklight-pytest
 @pytest.mark.full
 @pytest.mark.usefixtures('check_kibana')
 def test_kibana_status(local_salt_client):
     proxies = {"http": None, "https": None}
     IP = local_salt_client.pillar_get(param='_param:stacklight_log_address')
     ssl = local_salt_client.pillar_get(
         tgt='kibana:server',
         param='haproxy:proxy:listen:kibana:binds:ssl:enabled')
     proto = "https" if ssl else "http"

     resp = requests.get('{0}://{1}:5601/api/status'.format(proto, IP),
                         proxies=proxies, verify=False).content
     body = json.loads(resp)
     assert body['status']['overall']['state'] == "green", \
         "Kibana status is not expected: {}".format(
         body['status']['overall'])
     for i in body['status']['statuses']:
         assert i['state'] == "green", \
             "Kibana statuses are unexpected: {}".format(i)


 @pytest.mark.smoke
 #TODO: recheck
 @pytest.mark.usefixtures('check_kibana')
 def test_elasticsearch_node_count(local_salt_client):
     now = datetime.datetime.now()
     today = now.strftime("%Y.%m.%d")
     salt_output = local_salt_client.pillar_get(
         tgt='kibana:server',
         param='_param:haproxy_elasticsearch_bind_host')

     IP = salt_output
     ssl = local_salt_client.pillar_get(
         tgt='elasticsearch:server',
         param='haproxy:proxy:listen:elasticsearch:binds:ssl:enabled')
     proto = "https" if ssl else "http"

     headers = {'Content-type': 'application/json', 'Accept': 'text/plain'}
     proxies = {"http": None, "https": None}
     data = ('{"size": 0, "aggs": '
             '{"uniq_hostname": '
             '{"terms": {"size": 500, '
             '"field": "Hostname.keyword"}}}}')
     response = requests.post(
         '{0}://{1}:9200/log-{2}/_search?pretty'.format(proto, IP, today),
         proxies=proxies,
         headers=headers,
         verify = False,
         data=data)
     assert 200 == response.status_code, 'Unexpected code {}'.format(
         response.text)
     resp = json.loads(response.text)
     cluster_domain = local_salt_client.pillar_get(param='_param:cluster_domain')
     monitored_nodes = []
     for item_ in resp['aggregations']['uniq_hostname']['buckets']:
         node_name = item_['key']
         monitored_nodes.append(node_name + '.' + cluster_domain)
     missing_nodes = []
     all_nodes = local_salt_client.test_ping(tgt='*').keys()
     for node in all_nodes:
         if node not in monitored_nodes:
             missing_nodes.append(node)
     assert len(missing_nodes) == 0, \
         'Not all nodes are in Elasticsearch. Found {0} keys, ' \
         'expected {1}. Missing nodes: \n{2}'. \
             format(len(monitored_nodes), len(all_nodes), missing_nodes)


 @pytest.mark.sl_dup
 #DockerServiceMonitoring*
 @pytest.mark.full
 def test_stacklight_services_replicas(local_salt_client):
     # TODO
     # change to docker:swarm:role:master ?
     salt_output = local_salt_client.cmd(
         tgt='I@docker:client:stack:monitoring and I@prometheus:server',
         param='docker service ls',
         expr_form='compound')

     if not salt_output:
         pytest.skip("docker:client:stack:monitoring or \
         prometheus:server pillars are not found on this environment.")

     wrong_items = []
     for line in salt_output[salt_output.keys()[0]].split('\n'):
         if line[line.find('/') - 1] != line[line.find('/') + 1] \
            and 'replicated' in line:
             wrong_items.append(line)
     assert len(wrong_items) == 0, \
         '''Some monitoring services doesn't have expected number of replicas:
               {}'''.format(json.dumps(wrong_items, indent=4))


 @pytest.mark.smoke
 @pytest.mark.usefixtures('check_prometheus')
 def test_prometheus_alert_count(local_salt_client, ctl_nodes_pillar):
     IP = local_salt_client.pillar_get(param='_param:cluster_public_host')
     proto = local_salt_client.pillar_get(
         param='_param:cluster_public_protocol')
     # keystone:server can return 3 nodes instead of 1
     # this will be fixed later
     # TODO
     nodes_info = local_salt_client.cmd(
         tgt=ctl_nodes_pillar,
         param='curl -k -s {0}://{1}:15010/alerts | grep icon-chevron-down | '
               'grep -v "0 active"'.format(proto, IP),
         expr_form='pillar')

     result = nodes_info[nodes_info.keys()[0]].replace('</td>', '').replace(
         '<td><i class="icon-chevron-down"></i> <b>', '').replace('</b>', '')
     assert result == '', 'AlertManager page has some alerts! {}'.format(
                          json.dumps(result), indent=4)


 @pytest.mark.sl_dup
 #DockerServiceMonitoring* ??
 @pytest.mark.full
 def test_stacklight_containers_status(local_salt_client):
     salt_output = local_salt_client.cmd(
         tgt='I@docker:swarm:role:master and I@prometheus:server',
         param='docker service ps $(docker stack services -q monitoring)',
         expr_form='compound')

     if not salt_output:
         pytest.skip("docker:swarm:role:master or prometheus:server \
         pillars are not found on this environment.")

     result = {}
     # for old reclass models, docker:swarm:role:master can return
     # 2 nodes instead of one. Here is temporary fix.
     # TODO
     if len(salt_output.keys()) > 1:
         if 'CURRENT STATE' not in salt_output[salt_output.keys()[0]]:
             del salt_output[salt_output.keys()[0]]
     for line in salt_output[salt_output.keys()[0]].split('\n')[1:]:
         shift = 0
         if line.split()[1] == '\\_':
             shift = 1
         if line.split()[1 + shift] not in result.keys():
             result[line.split()[1]] = 'NOT OK'
         if line.split()[4 + shift] == 'Running' \
            or line.split()[4 + shift] == 'Ready':
             result[line.split()[1 + shift]] = 'OK'
     assert 'NOT OK' not in result.values(), \
         '''Some containers are in incorrect state:
               {}'''.format(json.dumps(result, indent=4))


 @pytest.mark.sl_dup
 #PrometheusTargetDown
 @pytest.mark.full
 def test_running_telegraf_services(local_salt_client):
     salt_output = local_salt_client.cmd(tgt='telegraf:agent',
                                         fun='service.status',
                                         param='telegraf',
                                         expr_form='pillar',)

     if not salt_output:
         pytest.skip("Telegraf or telegraf:agent \
         pillar are not found on this environment.")

     result = [{node: status} for node, status
               in salt_output.items()
               if status is False]
     assert result == [], 'Telegraf service is not running ' \
                          'on following nodes: {}'.format(result)


 @pytest.mark.sl_dup
 #PrometheusTargetDown
 @pytest.mark.full
 def test_running_fluentd_services(local_salt_client):
     salt_output = local_salt_client.cmd(tgt='fluentd:agent',
                                         fun='service.status',
                                         param='td-agent',
                                         expr_form='pillar')
     result = [{node: status} for node, status
               in salt_output.items()
               if status is False]
     assert result == [], 'Fluentd check failed: td-agent service is not ' \
                          'running on following nodes:'.format(result)
	import json
	import requests
	import datetime
	import pytest

	@pytest.mark.sl_dup
	#ElasticsearchClusterHealthStatusMajor or stacklight-pytest
	@pytest.mark.full
	@pytest.mark.usefixtures('check_kibana')
	def test_elasticsearch_cluster(local_salt_client):
	salt_output = local_salt_client.pillar_get(
	tgt='kibana:server',
	param='_param:haproxy_elasticsearch_bind_host')
	ssl = local_salt_client.pillar_get(
	tgt='elasticsearch:server',
	param='haproxy:proxy:listen:elasticsearch:binds:ssl:enabled')
	proto = "https" if ssl else "http"

	proxies = {"http": None, "https": None}
	IP = salt_output
	assert requests.get('{0}://{1}:9200/'.format(proto, IP),
	proxies=proxies, verify=False).status_code == 200, \
	'Cannot check elasticsearch url on {}.'.format(IP)
	resp = requests.get('{0}://{1}:9200/_cat/health'.format(proto, IP),
	proxies=proxies, verify=False).content
	assert resp.split()[3] == 'green', \
	'elasticsearch status is not good {}'.format(
	json.dumps(resp, indent=4))
	assert resp.split()[4] == '3', \
	'elasticsearch status is not good {}'.format(
	json.dumps(resp, indent=4))
	assert resp.split()[5] == '3', \
	'elasticsearch status is not good {}'.format(
	json.dumps(resp, indent=4))
	assert resp.split()[10] == '0', \
	'elasticsearch status is not good {}'.format(
	json.dumps(resp, indent=4))
	assert resp.split()[13] == '100.0%', \
	'elasticsearch status is not good {}'.format(
	json.dumps(resp, indent=4))


	@pytest.mark.sl_dup
	#stacklight-pytest
	@pytest.mark.full
	@pytest.mark.usefixtures('check_kibana')
	def test_kibana_status(local_salt_client):
	proxies = {"http": None, "https": None}
	IP = local_salt_client.pillar_get(param='_param:stacklight_log_address')
	ssl = local_salt_client.pillar_get(
	tgt='kibana:server',
	param='haproxy:proxy:listen:kibana:binds:ssl:enabled')
	proto = "https" if ssl else "http"

	resp = requests.get('{0}://{1}:5601/api/status'.format(proto, IP),
	proxies=proxies, verify=False).content
	body = json.loads(resp)
	assert body['status']['overall']['state'] == "green", \
	"Kibana status is not expected: {}".format(
	body['status']['overall'])
	for i in body['status']['statuses']:
	assert i['state'] == "green", \
	"Kibana statuses are unexpected: {}".format(i)


	@pytest.mark.smoke
	#TODO: recheck
	@pytest.mark.usefixtures('check_kibana')
	def test_elasticsearch_node_count(local_salt_client):
	now = datetime.datetime.now()
	today = now.strftime("%Y.%m.%d")
	salt_output = local_salt_client.pillar_get(
	tgt='kibana:server',
	param='_param:haproxy_elasticsearch_bind_host')

	IP = salt_output
	ssl = local_salt_client.pillar_get(
	tgt='elasticsearch:server',
	param='haproxy:proxy:listen:elasticsearch:binds:ssl:enabled')
	proto = "https" if ssl else "http"

	headers = {'Content-type': 'application/json', 'Accept': 'text/plain'}
	proxies = {"http": None, "https": None}
	data = ('{"size": 0, "aggs": '
	'{"uniq_hostname": '
	'{"terms": {"size": 500, '
	'"field": "Hostname.keyword"}}}}')
	response = requests.post(
	'{0}://{1}:9200/log-{2}/_search?pretty'.format(proto, IP, today),
	proxies=proxies,
	headers=headers,
	verify = False,
	data=data)
	assert 200 == response.status_code, 'Unexpected code {}'.format(
	response.text)
	resp = json.loads(response.text)
	cluster_domain = local_salt_client.pillar_get(param='_param:cluster_domain')
	monitored_nodes = []
	for item_ in resp['aggregations']['uniq_hostname']['buckets']:
	node_name = item_['key']
	monitored_nodes.append(node_name + '.' + cluster_domain)
	missing_nodes = []
	all_nodes = local_salt_client.test_ping(tgt='*').keys()
	for node in all_nodes:
	if node not in monitored_nodes:
	missing_nodes.append(node)
	assert len(missing_nodes) == 0, \
	'Not all nodes are in Elasticsearch. Found {0} keys, ' \
	'expected {1}. Missing nodes: \n{2}'. \
	format(len(monitored_nodes), len(all_nodes), missing_nodes)


	@pytest.mark.sl_dup
	#DockerServiceMonitoring*
	@pytest.mark.full
	def test_stacklight_services_replicas(local_salt_client):
	# TODO
	# change to docker:swarm:role:master ?
	salt_output = local_salt_client.cmd(
	tgt='I@docker:client:stack:monitoring and I@prometheus:server',
	param='docker service ls',
	expr_form='compound')

	if not salt_output:
	pytest.skip("docker:client:stack:monitoring or \
	prometheus:server pillars are not found on this environment.")

	wrong_items = []
	for line in salt_output[salt_output.keys()[0]].split('\n'):
	if line[line.find('/') - 1] != line[line.find('/') + 1] \
	and 'replicated' in line:
	wrong_items.append(line)
	assert len(wrong_items) == 0, \
	'''Some monitoring services doesn't have expected number of replicas:
	{}'''.format(json.dumps(wrong_items, indent=4))


	@pytest.mark.smoke
	@pytest.mark.usefixtures('check_prometheus')
	def test_prometheus_alert_count(local_salt_client, ctl_nodes_pillar):
	IP = local_salt_client.pillar_get(param='_param:cluster_public_host')
	proto = local_salt_client.pillar_get(
	param='_param:cluster_public_protocol')
	# keystone:server can return 3 nodes instead of 1
	# this will be fixed later
	# TODO
	nodes_info = local_salt_client.cmd(
	tgt=ctl_nodes_pillar,
	param='curl -k -s {0}://{1}:15010/alerts \| grep icon-chevron-down \| '
	'grep -v "0 active"'.format(proto, IP),
	expr_form='pillar')

	result = nodes_info[nodes_info.keys()[0]].replace('</td>', '').replace(
	'<td><i class="icon-chevron-down"></i> <b>', '').replace('</b>', '')
	assert result == '', 'AlertManager page has some alerts! {}'.format(
	json.dumps(result), indent=4)


	@pytest.mark.sl_dup
	#DockerServiceMonitoring* ??
	@pytest.mark.full
	def test_stacklight_containers_status(local_salt_client):
	salt_output = local_salt_client.cmd(
	tgt='I@docker:swarm:role:master and I@prometheus:server',
	param='docker service ps $(docker stack services -q monitoring)',
	expr_form='compound')

	if not salt_output:
	pytest.skip("docker:swarm:role:master or prometheus:server \
	pillars are not found on this environment.")

	result = {}
	# for old reclass models, docker:swarm:role:master can return
	# 2 nodes instead of one. Here is temporary fix.
	# TODO
	if len(salt_output.keys()) > 1:
	if 'CURRENT STATE' not in salt_output[salt_output.keys()[0]]:
	del salt_output[salt_output.keys()[0]]
	for line in salt_output[salt_output.keys()[0]].split('\n')[1:]:
	shift = 0
	if line.split()[1] == '\\_':
	shift = 1
	if line.split()[1 + shift] not in result.keys():
	result[line.split()[1]] = 'NOT OK'
	if line.split()[4 + shift] == 'Running' \
	or line.split()[4 + shift] == 'Ready':
	result[line.split()[1 + shift]] = 'OK'
	assert 'NOT OK' not in result.values(), \
	'''Some containers are in incorrect state:
	{}'''.format(json.dumps(result, indent=4))


	@pytest.mark.sl_dup
	#PrometheusTargetDown
	@pytest.mark.full
	def test_running_telegraf_services(local_salt_client):
	salt_output = local_salt_client.cmd(tgt='telegraf:agent',
	fun='service.status',
	param='telegraf',
	expr_form='pillar',)

	if not salt_output:
	pytest.skip("Telegraf or telegraf:agent \
	pillar are not found on this environment.")

	result = [{node: status} for node, status
	in salt_output.items()
	if status is False]
	assert result == [], 'Telegraf service is not running ' \
	'on following nodes: {}'.format(result)


	@pytest.mark.sl_dup
	#PrometheusTargetDown
	@pytest.mark.full
	def test_running_fluentd_services(local_salt_client):
	salt_output = local_salt_client.cmd(tgt='fluentd:agent',
	fun='service.status',
	param='td-agent',
	expr_form='pillar')
	result = [{node: status} for node, status
	in salt_output.items()
	if status is False]
	assert result == [], 'Fluentd check failed: td-agent service is not ' \
	'running on following nodes:'.format(result)