blob: 6d70df388601defc01ea93695d69280f29e1acd9 [file] [log] [blame]
import datetime
import hashlib
import json
import logging
import os
import re
import requests
import salt.utils
import socket
import subprocess
import yaml
__author__ = "Dzmitry Stremkouski"
__copyright__ = "Copyright 2019, Mirantis Inc."
__license__ = "Apache 2.0"
logger = logging.getLogger(__name__)
stream = logging.StreamHandler()
logger.addHandler(stream)
try:
from yaml import CLoader as Loader, CDumper as Dumper
except ImportError:
from yaml import Loader, Dumper
db_ver_map=yaml.load("""
kilo:
cinder: 41
glance: 41
heat: 62
keystone: 67
neutron: [ kilo ]
nova:
api_db: 2
db: 280
liberty:
cinder: 60
glance: 42
heat: 65
keystone: 75
neutron:
- 1b4c6e320f79
- 26c371498592
- 599c6a226151
- 45f955889773
- 1c844d1677f7
- 52c5312f6baf
- 9859ac9c136
- 8675309a5c4f
- 48153cb5f051
- 31337ec0ffee
- 34af2b5c5a59
- 354db87e3225
- 11926bcfe72d
- 5498d17be016
- 4af11ca47297
- 2e5352a0ad4d
- 2a16083502f3
- 4ffceebfada
- 30018084ec99
nova:
api_db: 3
db: 302
mitaka:
cinder: 72
glance: 44
heat: 71
keystone: 97
neutron:
- 15be73214821
- dce3ec7a25c9
- 659bf3d90664
- 19f26505c74f
- 0e66c5227a8a
- ec7fcfbf72ee
- 32e5974ada25
- 3894bccad37f
- c3a73f615e4
- 13cfb89f881a
- 1df244e556f5
- 2f9e956e7532
- 15e43b934f81
- 59cb5b6cf4d
- b4caf27aae4
- 31ed664953e6
- 8a6d8bdae39
- c6c112992c9
- 2b4c2465d44b
- 5ffceebfada
- 1b294093239c
- 4ffceebfcdc
- e3278ee65050
nova:
api_db: 7
db: 319
newton:
cinder: 79
glance: 44
heat: 73
keystone:
contract: 1
data: 4
db: 109
expand: 1
neutron:
- 030a959ceafa
- 67daae611b6e
- a5648cfeeadf
- a963b38d82f4
- 6b461a21bcfc
- 0f5bef0f87d4
- d3435b514502
- 5cd92597d11d
- 3d0e74aa7d37
- 5abc0278ca73
- 30107ab6a3ee
- 45f8dd33480b
- c415aab1c048
- 2e0d7a8a1586
- 5c85685d616d
- a8b517cff8ab
- a84ccf28f06a
- 7d9d8eeec6ad
- 7bbb25278f53
- 89ab9a816d70
- 8fd3918ef6f4
- c879c5e1ee90
- b67e765a3524
- 3b935b28e7a0
- b12a3ef66e62
- 4bcd4df1f426
- 97c25b0d2353
nova:
api_db: 22
db: 334
ocata:
cinder: 96
glance: 45
heat: 79
keystone:
contract: 1
data: 16
db: 109
expand: 1
neutron:
- a9c43481023c
- 929c968efe70
nova:
api_db: 31
db: 347
pike:
cinder: 105
glance: 45
heat: 80
keystone:
contract: 1
data: 24
db: 109
expand: 1
neutron:
- 62c781cb6192
- 2b42d90729da
- 7d32f979895f
- 349b6fd605a6
- 804a3c76314c
- c8c222d42aa9
nova:
api_db: 45
db: 362
queens:
cinder: 117
glance: 45
heat: 85
keystone:
contract: 1
data: 44
db: 109
expand: 1
neutron:
- 594422d373ee
nova:
api_db: 52
db: 378
rocky:
cinder: 123
glance: 45
heat: 86
keystone:
contract: 1
data: 52
db: 109
expand: 1
neutron:
- 61663558142c
- 867d39095bf4
nova:
api_db: 61
db: 390
stein:
cinder: 128
glance: 45
heat: 86
keystone:
contract: 1
data: 61
db: 109
expand: 1
neutron:
- 0ff9e3881597
- 195176fb410d
- d72db3e25539
- fb0167bd9639
- 9bfad3f1e780
- cada2437bf41
nova:
api_db: 62
db: 391
train:
cinder: 132
glance: 45
heat: 86
keystone:
contract: 1
data: 71
db: 109
expand: 1
neutron:
- 63fd95af7dcd
- c613d0b82681
nova:
api_db: 67
db: 402
""")
default_vrouter_info_map = yaml.load("""
ContrailConfig:
- deleted
- elements:uuid
- elements:virtual_router_dpdk_enabled
- elements:virtual_router_type
VrouterAgent:
- build_info:build-info:0:build-version
- build_info:build-info:0:build-number
- config_file
- control_ip
- control_node_list_cfg
- dns_server_list_cfg
- dns_servers
- down_interface_count
- eth_name
- headless_mode_cfg
- hostname_cfg
- hypervisor
- mode
- phy_if
- platform
- self_ip_list
- total_interface_count
- tunnel_type
- vhost_cfg
- vhost_if
- vr_limits:max_interfaces
- vr_limits:max_labels
- vr_limits:max_mirror_entries
- vr_limits:max_nexthops
- vr_limits:max_vrfs
- vr_limits:vrouter_max_bridge_entries
- vr_limits:vrouter_max_flow_entries
- vr_limits:vrouter_max_oflow_bridge_entries
- vr_limits:vrouter_max_oflow_entries
- xmpp_peer_list:*:ip
- xmpp_peer_list:*:primary
- xmpp_peer_list:*:status
""", Loader=Loader)
default_peer_filter = ["encoding", "peer_address", "state"]
def _failed_minions(out, agent, failed_minions):
''' Verify failed minions '''
if len(failed_minions) > 0:
logger.error("%s check FAILED" % agent)
logger.error("Some minions returned non-zero exit code or empty data")
logger.error("Failed minions:" + str(failed_minions))
for minion in failed_minions:
logger.error(minion)
logger.debug(str(out[minion]['ret']))
__context__['retcode'] = 2
return False
return True
def _minions_output(out, agent, ignore_dead, ignore_empty=False):
''' Verify minions output and exit code '''
if not out:
logger.error("%s check FAILED" % agent)
logger.error("No response from master cmd")
__context__['retcode'] = 2
return False
if not ignore_dead:
jid = out.itervalues().next()['jid']
job_stats = __salt__['saltutil.runner']( 'jobs.print_job', arg=[jid] ) or None
if not job_stats:
logger.error("%s check FAILED" % agent)
logger.error("No response from master runner")
__context__['retcode'] = 2
return False
job_result = job_stats[jid]['Result']
job_minions = job_stats[jid]['Minions']
if len(job_minions) != len(job_result):
logger.error("%s check FAILED" % agent)
logger.error("Some minions are offline")
logger.error(list(set(job_minions) - set(job_result.keys())))
__context__['retcode'] = 2
return False
failed_minions = []
for minion in out:
if 'retcode' in out[minion]:
if out[minion]['retcode'] == 0:
if not ignore_empty:
if isinstance(out[minion]['ret'], bool):
if minion not in failed_minions:
failed_minions.append(minion)
elif len(out[minion]['ret']) == 0:
if minion not in failed_minions:
failed_minions.append(minion)
else:
if minion not in failed_minions:
failed_minions.append(minion)
else:
if minion not in failed_minions:
failed_minions.append(minion)
if not _failed_minions(out, agent, failed_minions):
__context__['retcode'] = 2
return False
return True
def minions_check(wait_timeout=1, gather_job_wait_timeout=1, target='*', target_type='glob', ignore_dead=False):
''' Verify minions are online '''
agent = "Minions"
out = __salt__['saltutil.cmd']( tgt=target,
tgt_type=target_type,
fun='test.ping',
timeout=wait_timeout,
gather_job_timeout=gather_job_wait_timeout
) or None
return _minions_output(out, agent, ignore_dead, ignore_empty=True)
def time_diff_check(time_diff=1, target='*', target_type='glob', ignore_dead=False, **kwargs):
''' Verify time diff on servers '''
agent = "Time diff"
out = __salt__['saltutil.cmd']( tgt=target,
tgt_type=target_type,
fun='status.time',
arg=['%s'],
timeout=3
) or None
if not _minions_output(out, agent, ignore_dead):
__context__['retcode'] = 2
return False
minions_times = {}
env_times = []
verified_minions = []
for minion in out:
verified_minions.append(minion)
if out[minion]['retcode'] == 0:
minion_time = int(out[minion]['ret'])
if str(minion_time) not in minions_times:
minions_times[str(minion_time)] = []
minions_times[str(minion_time)].append(minion)
env_times.append(minion_time)
env_times.sort()
diff = env_times[-1] - env_times[0]
if diff > time_diff:
__context__['retcode'] = 2
if kwargs.get("debug", False):
return False, minions_times
else:
return False
if kwargs.get("debug", False):
logger.info(verified_minions)
return True
def contrail_process_list(**kwargs):
''' Retrieve contrail process pids and start_time '''
cmd = ['contrail-status', '-d']
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
stdout, stderr = proc.communicate()
procs = {}
for line in stdout.split('\n'):
if re.findall('^(\S+).*pid ([0-9]+),.*$', line):
stat = line.split()
procs[stat[0]] = int(stat[3][:-1])
if kwargs.get('role', 'compute') == 'controller':
for service in ['zookeeper', 'ifmap-server']:
cmd = ['service', service, 'status']
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
stdout, stderr = proc.communicate()
for line in stdout.split('\n'):
if re.findall('^(\S+).*process ([0-9]+)$', line):
stat = line.split()
procs[stat[0]] = int(stat[3])
ctime = int(datetime.datetime.now().strftime("%s"))
btime_re = re.compile(r"^btime (\d+)$", re.MULTILINE)
btime_groups = btime_re.search(open("/proc/stat").read())
btime = int(btime_groups.groups()[0])
clk_tck = os.sysconf(os.sysconf_names["SC_CLK_TCK"])
for proc in procs:
pid = procs[proc]
try:
with open('/proc/%s/stat' % str(pid), 'r') as f:
stat = f.read()
jitty_time = int(stat.split(') ')[1].split()[19]) / clk_tck
proc_uptime = ctime - btime - int(jitty_time)
except:
proc_uptime = 0
procs[proc] = { 'pid': pid, 'uptime': proc_uptime }
return procs
def contrail_check(target='I@opencontrail:control or I@opencontrail:collector or I@opencontrail:compute', nodetool_target='I@opencontrail:control or I@opencontrail:collector', compute_target='I@opencontrail:compute', target_type='compound', nodetool_target_type='compound', compute_target_type='compound', nodetool_expected_size=3, proc_min_uptime=30, ignore_dead=False, **kwargs):
''' Verify contrail infrastructure '''
use_doctrail = False
oc_ver = str(__salt__['pillar.get']('_param:opencontrail_version'))
if len(oc_ver) > 1:
if oc_ver[0] == '4':
use_doctrail = True
agent = "Contrail status"
if use_doctrail:
# Compute nodes does not use doctrail still, but are in compund.
# In order to minimize salt calls we are using exception pipes
arg_cmd = 'test $(whereis -b doctrail | grep -c " ") -eq 0 && contrail-status || doctrail all contrail-status'
else:
arg_cmd = "contrail-status"
# Check #1 contrail-status
out = __salt__['saltutil.cmd']( tgt=target,
tgt_type=target_type,
fun='cmd.run',
arg=[arg_cmd],
timeout=5
) or None
if not _minions_output(out, agent, ignore_dead):
__context__['retcode'] = 2
return False
failed_minions = []
pattern = '^(==|\*+$|$|\S+\s+(active|backup|inactive\s\(disabled\son\sboot\)))'
prog = re.compile(pattern)
validated = []
for minion in out:
for line in out[minion]['ret'].split('\n'):
check_line = True
if " FOR NODE " in line:
check_line = False
if check_line and not prog.match(line) and minion not in failed_minions:
failed_minions.append(minion)
validated.append(minion)
if not _failed_minions(out, agent, failed_minions):
__context__['retcode'] = 2
return False
if kwargs.get("debug", False):
logger.info(validated)
if use_doctrail:
arg_cmd = "doctrail all nodetool status"
else:
arg_cmd = "nodetool status"
# Check #2 nodetool
out = __salt__['saltutil.cmd']( tgt=nodetool_target,
tgt_type=nodetool_target_type,
fun='cmd.run',
arg=[arg_cmd],
timeout=5
) or None
if not _minions_output(out, agent, ignore_dead):
__context__['retcode'] = 2
return False
failed_minions = []
pattern = '^UN'
prog = re.compile(pattern)
validated = []
for minion in out:
size = 0
for line in out[minion]['ret'].split('\n'):
if prog.match(line):
size += 1
if not size == nodetool_expected_size and minion not in failed_minions:
failed_minions.append(minion)
validated.append(minion)
if not _failed_minions(out, agent, failed_minions):
__context__['retcode'] = 2
return False
if kwargs.get("debug", False):
logger.info(validated)
# Check #3 process status control
# Contrail 4.x does not produce pid info from contrail-status -d
# Will skip this check and use another method further
# TODO: check process list state for oc4 env
if not use_doctrail:
out = __salt__['saltutil.cmd']( tgt=nodetool_target,
tgt_type=nodetool_target_type,
fun='health_checks.contrail_process_list',
arg=['role=controller'],
timeout=5
) or None
if not _minions_output(out, agent, ignore_dead):
__context__['retcode'] = 2
return False
failed_minions = []
validated = []
for minion in out:
procs = out[minion]['ret']
for proc in procs:
proc_uptime = procs[proc]['uptime']
if proc_uptime < proc_min_uptime:
if minion not in failed_minions:
failed_minions.append(minion)
logger.error({'minion': minion, 'name': proc, 'uptime': proc_uptime})
validated.append(minion)
if not _failed_minions(out, agent, failed_minions):
__context__['retcode'] = 2
return False
if kwargs.get("debug", False):
logger.info(validated)
# Check #4 process status computes
# Contrail 4.x does not produce pid info from contrail-status -d
# Will skip this check and use another method further
# TODO: check process list state for oc4 env
if not use_doctrail:
out = __salt__['saltutil.cmd']( tgt=compute_target,
tgt_type=compute_target_type,
fun='health_checks.contrail_process_list',
timeout=5
) or None
if not _minions_output(out, agent, ignore_dead):
__context__['retcode'] = 2
return False
failed_minions = []
validated = []
for minion in out:
procs = out[minion]['ret']
for proc in procs:
proc_uptime = procs[proc]['uptime']
if proc_uptime < proc_min_uptime:
if minion not in failed_minions:
failed_minions.append(minion)
logger.error({'minion': minion, 'name': proc, 'uptime': proc_uptime})
validated.append(minion)
if not _failed_minions(out, agent, failed_minions):
__context__['retcode'] = 2
return False
if kwargs.get("debug", False):
logger.info(validated)
# Check # 5 compute vrouter namespaces dumplicates check
out = __salt__['saltutil.cmd']( tgt=compute_target,
tgt_type=compute_target_type,
fun='health_checks.list_namespaces',
timeout=5
) or None
if not _minions_output(out, agent, ignore_dead):
__context__['retcode'] = 2
return False
failed_minions = []
validated = []
all_namespaces = []
for minion in out:
namespaces = out[minion]['ret']
for ns in namespaces:
if ns['uuid'] not in all_namespaces:
all_namespaces.append(ns['uuid'])
else:
if minion not in failed_minions:
failed_minions.append(minion)
logger.error({'minion': minion, 'uuid': ns['uuid']})
validated.append(minion)
if not _failed_minions(out, agent, failed_minions):
logger.error("Duplicated SNAT vrouters found. Please reset their gateways")
__context__['retcode'] = 2
return False
if kwargs.get("debug", False):
logger.info(validated)
# TODO: peers check
return True
def galera_check(cluster_size=3, target='I@galera:master or I@galera:slave', target_type='compound', ignore_dead=False, **kwargs):
''' Verify galera cluster size and state '''
agent = "Galera status"
out = __salt__['saltutil.cmd']( tgt=target,
tgt_type=target_type,
fun='mysql.status',
timeout=3
) or None
if not _minions_output(out, agent, ignore_dead):
__context__['retcode'] = 2
return False
failed_minions = []
validated = []
for minion in out:
if int(out[minion]['ret']['wsrep_cluster_size']) != int(cluster_size) and minion not in failed_minions:
failed_minions.append(minion)
if out[minion]['ret']['wsrep_evs_state'] != 'OPERATIONAL' and minion not in failed_minions:
failed_minions.append(minion)
validated.append(minion)
if not _failed_minions(out, agent, failed_minions):
__context__['retcode'] = 2
return False
if kwargs.get("debug", False):
logger.info(validated)
logger.info("Cluster size: " + str(out[validated[0]]['ret']['wsrep_cluster_size']))
logger.info("Cluster state: " + str(out[validated[0]]['ret']['wsrep_evs_state']))
return True
def _quote_str(s, l=False, r=False):
''' Quting rabbitmq erl objects for json import '''
if len(s) > 0:
if l:
s = s.lstrip()
if r:
s = s.rstrip()
if (s[0] == "'") and (s[-1] != "'") and r and not l:
s += "'"
if (s[0] == '"') and (s[-1] != '"') and r and not l:
s += '"'
if (s[-1] == "'") and (s[0] != "'") and l and not r:
s = "'" + s
if (s[-1] == '"') and (s[0] != '"') and l and not r:
s = '"' + s
if (s[-1] != "'") and (s[-1] != '"') and (s[0] != "'") and (s[0] != '"'):
s = '"' + s.replace('"', '\\\"') + '"'
else:
if (not l) and (not r) and s[0] != '"' and not s[-1] != '"':
s= s.replace('"', '\\\"')
return s.replace("'", '"')
else:
return s
def _sanitize_rmqctl_output(string):
''' Sanitizing rabbitmq erl objects for json import '''
rabbitctl_json = ""
for line in string.split(','):
copy = line
left = ""
right = ""
mid = copy
lpar = False
rpar = False
if re.search('([\[\{\s]+)(.*)', copy):
mid = re.sub('^([\[\{\s]+)','', copy)
left = copy[:-len(mid)]
copy = mid
lpar = True
if re.search('(.*)([\]\}\s]+)$', copy):
mid = re.sub('([\]\}\s]+)$','', copy)
right = copy[len(mid):]
copy = mid
rpar = True
result = left + _quote_str(mid, l=lpar, r=rpar) + right
if (not rpar) and lpar and (len(left.strip()) > 0) and (left.strip()[-1] == '{'):
result += ":"
else:
result += ","
rabbitctl_json += result
rabbitctl_json = rabbitctl_json[:-1]
new_rabbitctl_json = rabbitctl_json
for s in re.findall('"[^:\[{\]}]+"\s*:\s*("[^\[{\]}]+")', rabbitctl_json):
if '"' in s[1:][:-1]:
orig = s
changed = '"' + s.replace('\\', '\\\\').replace('"', '\\\"') + '"'
new_rabbitctl_json = new_rabbitctl_json.replace(orig, changed)
return new_rabbitctl_json
def rabbitmq_list_queues(vhost='/'):
''' JSON formatted RabbitMQ queues list '''
proc = subprocess.Popen(['rabbitmqctl', 'list_queues' , '-p', vhost], stdout=subprocess.PIPE)
stdout, stderr = proc.communicate()
queues = {}
for line in stdout.split('\n'):
if re.findall('[0-9]$', line):
queue_name, num = re.sub(r"\s+", " ", line).split()
queues[queue_name] = int(num)
return queues
def rabbitmq_list_vhosts():
''' JSON formatted RabbitMQ vhosts list '''
proc = subprocess.Popen(['rabbitmqctl', 'list_vhosts'], stdout=subprocess.PIPE)
stdout, stderr = proc.communicate()
vhosts = []
for line in stdout.split('\n'):
if re.findall('^/', line):
vhosts.append(line)
return vhosts
def rabbitmq_cmd(cmd):
''' JSON formatted RabbitMQ command output '''
supported_commands = ['status', 'cluster_status', 'list_hashes', 'list_ciphers']
if cmd not in supported_commands:
logger.error("Command is not supported yet, sorry")
logger.error("Supported commands are: " + str(supported_commands))
__context__['retcode'] = 2
return False
proc = subprocess.Popen(['rabbitmqctl', cmd], stdout=subprocess.PIPE)
stdout, stderr = proc.communicate()
rabbitmqctl_cutoff = stdout[int(stdout.find('[')):int(stdout.rfind(']'))+1].replace('\n','')
return json.loads(_sanitize_rmqctl_output(rabbitmqctl_cutoff))
def rabbitmq_check(target='I@rabbitmq:server', target_type='compound', ignore_dead=False, **kwargs):
''' Verify rabbit cluster and it's alarms '''
agent = "RabbitMQ status"
out = __salt__['saltutil.cmd']( tgt=target,
tgt_type=target_type,
fun='health_checks.rabbitmq_cmd',
arg=['cluster_status'],
timeout=3
) or None
if not _minions_output(out, agent, ignore_dead):
__context__['retcode'] = 2
return False
failed_minions = []
for minion in out:
rabbitmqctl_json = out[minion]['ret']
running_nodes = []
available_nodes = []
alarms = []
for el in rabbitmqctl_json:
if 'alarms' in el:
alarms = el['alarms']
if 'nodes' in el:
available_nodes = el['nodes'][0]['disc']
if 'running_nodes' in el:
running_nodes = el['running_nodes']
if running_nodes.sort() == available_nodes.sort():
nodes_alarms = []
for node in running_nodes:
for el in alarms:
if node in el:
if len(el[node]) > 0:
nodes_alarms.append(el[node])
if len(nodes_alarms) > 0:
failed_minions.append(minion)
else:
failed_minions.append(minion)
if not _failed_minions(out, agent, failed_minions):
__context__['retcode'] = 2
return False
if kwargs.get("debug", False):
logger.info(running_nodes)
return True
def haproxy_status(socket_path='/run/haproxy/admin.sock', buff_size = 8192, encoding = 'UTF-8', stats_filter=[]):
''' JSON formatted haproxy status '''
stat_cmd = 'show stat\n'
if not os.path.exists(socket_path):
logger.error('Socket %s does not exist or haproxy not running' % socket_path)
__context__['retcode'] = 2
return False
client = socket.socket( socket.AF_UNIX, socket.SOCK_STREAM)
client.connect(socket_path)
stat_cmd = 'show stat\n'
client.send(bytearray(stat_cmd, encoding))
output = client.recv(buff_size)
res = ""
while output:
res += output.decode(encoding)
output = client.recv(buff_size)
client.close()
haproxy_stats = {}
res_list = res.split('\n')
fields = res_list[0][2:].split(',')
stats_list = []
for line in res_list[1:]:
if len(line.strip()) > 0:
stats_list.append(line)
for i in range(len(stats_list)):
element = {}
for n in fields:
element[n] = stats_list[i].split(',')[fields.index(n)]
server_name = element.pop('pxname')
server_type = element.pop('svname')
if stats_filter:
filtered_element = element.copy()
for el in element:
if el not in stats_filter:
filtered_element.pop(el)
element = filtered_element
if server_name not in haproxy_stats:
haproxy_stats[server_name] = {}
if server_type == "FRONTEND" or server_type == "BACKEND":
haproxy_stats[server_name][server_type] = element
else:
if 'UPSTREAM' not in haproxy_stats[server_name]:
haproxy_stats[server_name]['UPSTREAM'] = {}
haproxy_stats[server_name]['UPSTREAM'][server_type] = element
return haproxy_stats
def haproxy_check(target='I@haproxy:proxy', target_type='compound', ignore_dead=False, ignore_services=[], ignore_upstreams=[], ignore_no_upstream=False, **kwargs):
''' Verify haproxy backends status '''
agent = "haproxy status"
out = __salt__['saltutil.cmd']( tgt=target,
tgt_type=target_type,
fun='health_checks.haproxy_status',
arg=["stats_filter=['status']"],
timeout=3
) or None
if not _minions_output(out, agent, ignore_dead):
__context__['retcode'] = 2
return False
failed_minions = []
verified_minions = []
for minion in out:
verified_minions.append(minion)
haproxy_json = out[minion]['ret']
for service in haproxy_json:
if service not in ignore_services:
if haproxy_json[service]['FRONTEND']['status'] != 'OPEN':
if minion not in failed_minions:
failed_minions.append(minion)
if haproxy_json[service]['BACKEND']['status'] != 'UP':
if minion not in failed_minions:
failed_minions.append(minion)
if 'UPSTREAM' in haproxy_json[service]:
for upstream in haproxy_json[service]['UPSTREAM']:
if upstream not in ignore_upstreams:
if haproxy_json[service]['UPSTREAM'][upstream]['status'] != 'UP':
if minion not in failed_minions:
failed_minions.append(minion)
else:
if not ignore_no_upstream:
if minion not in failed_minions:
failed_minions.append(minion)
if not _failed_minions(out, agent, failed_minions):
__context__['retcode'] = 2
return False
if kwargs.get("debug", False):
logger.info(verified_minions)
return True
def df_check(target='*', target_type='glob', verify='space', space_limit=80, inode_limit=80, ignore_dead=False, ignore_partitions=[], **kwargs):
''' Verify storage space/inodes status '''
supported_options = ['space', 'inodes']
if verify not in supported_options:
logger.error('Unsupported "verify" option.')
logger.error('Supported options are: %s' % str(supported_options))
__context__['retcode'] = 2
return False
if verify == 'space':
fun_cmd = 'disk.usage'
json_arg = 'capacity'
limit = space_limit
elif verify == 'inodes':
fun_cmd = 'disk.inodeusage'
json_arg = 'use'
limit = inode_limit
agent = "df status"
out = __salt__['saltutil.cmd']( tgt=target,
tgt_type=target_type,
fun=fun_cmd,
timeout=3
) or None
if not _minions_output(out, agent, ignore_dead):
__context__['retcode'] = 2
return False
failed_minions = []
verified_minions = []
for minion in out:
verified_minions.append(minion)
df_json = out[minion]['ret']
for disk in df_json:
if disk not in ignore_partitions:
if int(df_json[disk][json_arg][:-1]) > int(limit):
if minion not in failed_minions:
failed_minions.append(minion)
if not _failed_minions(out, agent, failed_minions):
__context__['retcode'] = 2
return False
if kwargs.get("debug", False):
logger.info(verified_minions)
return True
def load_check(target='*', target_type='glob', la1=3, la5=3, la15=3, ignore_dead=False, **kwargs):
''' Verify load average status '''
agent = "load average status"
out = __salt__['saltutil.cmd']( tgt=target,
tgt_type=target_type,
fun='status.loadavg',
timeout=3
) or None
if not _minions_output(out, agent, ignore_dead):
__context__['retcode'] = 2
return False
failed_minions = []
verified_minions = []
for minion in out:
verified_minions.append(minion)
la_json = out[minion]['ret']
if float(la_json['1-min']) > float(la1):
if minion not in failed_minions:
failed_minions.append(minion)
if float(la_json['5-min']) > float(la5):
if minion not in failed_minions:
failed_minions.append(minion)
if float(la_json['15-min']) > float(la15):
if minion not in failed_minions:
failed_minions.append(minion)
if not _failed_minions(out, agent, failed_minions):
__context__['retcode'] = 2
return False
if kwargs.get("debug", False):
logger.info(verified_minions)
return True
def netdev_check(target='*', target_type='glob', rx_drop_limit=0, tx_drop_limit=0, ignore_devices=[], ignore_dead=False, **kwargs):
''' Verify netdev rx/tx drop status '''
agent = "netdev rx/tx status"
out = __salt__['saltutil.cmd']( tgt=target,
tgt_type=target_type,
fun='status.netdev',
timeout=3
) or None
if not _minions_output(out, agent, ignore_dead):
__context__['retcode'] = 2
return False
failed_minions = {}
verified_minions = []
for minion in out:
verified_minions.append(minion)
dev_json = out[minion]['ret']
for netdev in dev_json:
if netdev not in ignore_devices:
if int(dev_json[netdev]['rx_drop']) > int(rx_drop_limit):
if minion not in failed_minions:
failed_minions[minion] = {}
if netdev not in failed_minions[minion]:
failed_minions[minion][netdev] = {}
failed_minions[minion][netdev]['rx_drop'] = int(dev_json[netdev]['rx_drop'])
if int(dev_json[netdev]['tx_drop']) > int(tx_drop_limit):
if minion not in failed_minions:
failed_minions[minion] = {}
if netdev not in failed_minions[minion]:
failed_minions[minion][netdev] = {}
failed_minions[minion][netdev]['tx_drop'] = int(dev_json[netdev]['tx_drop'])
if not _failed_minions(out, agent, failed_minions):
__context__['retcode'] = 2
return False
if kwargs.get("debug", False):
logger.info(verified_minions)
return True
def mem_check(target='*', target_type='glob', used_limit=80, ignore_dead=False, **kwargs):
''' Verify available memory status '''
agent = "available memory status"
out = __salt__['saltutil.cmd']( tgt=target,
tgt_type=target_type,
fun='status.meminfo',
timeout=3
) or None
if not _minions_output(out, agent, ignore_dead):
__context__['retcode'] = 2
return False
failed_minions = []
verified_minions = []
for minion in out:
mem_avail = int(out[minion]['ret']['MemAvailable']['value'])
mem_total = int(out[minion]['ret']['MemTotal']['value'])
used_pct = float((mem_total - mem_avail) * 100 / mem_total)
if used_pct > float(used_limit):
if minion not in failed_minions:
failed_minions.append(minion)
else:
verified_minions.append( { minion : str(used_pct) + '%' } )
if not _failed_minions(out, agent, failed_minions):
__context__['retcode'] = 2
return False
if kwargs.get("debug", False):
logger.info(verified_minions)
return True
def ntp_status(params = ['-4', '-p', '-n']):
''' JSON formatted ntpq command output '''
ntp_states = [
{ 'indicator': '#', 'comment': 'source selected, distance exceeds maximum value' },
{ 'indicator': 'o', 'comment': 'source selected, Pulse Per Second (PPS) used' },
{ 'indicator': '+', 'comment': 'source selected, included in final set' },
{ 'indicator': 'x', 'comment': 'source false ticker' },
{ 'indicator': '.', 'comment': 'source selected from end of candidate list' },
{ 'indicator': '-', 'comment': 'source discarded by cluster algorithm' },
{ 'indicator': '*', 'comment': 'current time source' },
{ 'indicator': ' ', 'comment': 'source discarded high stratum, failed sanity' }
]
ntp_state_indicators = []
for state in ntp_states:
ntp_state_indicators.append(state['indicator'])
source_types = {}
source_types['l'] = "local (such as a GPS, WWVB)"
source_types['u'] = "unicast (most common)"
source_types['m'] = "multicast"
source_types['b'] = "broadcast"
source_types['-'] = "netaddr"
proc = subprocess.Popen(['ntpq'] + params, stdout=subprocess.PIPE)
stdout, stderr = proc.communicate()
ntp_lines = stdout.split('\n')
fields = re.sub("\s+", " ", ntp_lines[0]).split()
fields[fields.index('st')] = 'stratum'
fields[fields.index('t')] = 'source_type'
ntp_peers = {}
for line in ntp_lines[2:]:
if len(line.strip()) > 0:
element = {}
values = re.sub("\s+", " ", line).split()
for i in range(len(values)):
if fields[i] == 'source_type':
element[fields[i]] = { 'indicator': values[i], 'comment': source_types[values[i]] }
elif fields[i] in ['stratum', 'when', 'poll', 'reach']:
if values[i] == '-':
element[fields[i]] = int(-1)
else:
element[fields[i]] = int(values[i])
elif fields[i] in ['delay', 'offset', 'jitter']:
element[fields[i]] = float(values[i])
else:
element[fields[i]] = values[i]
peer = element.pop('remote')
peer_state = peer[0]
if peer_state in ntp_state_indicators:
peer = peer[1:]
else:
peer_state = 'f'
element['current'] = False
if peer_state == '*':
element['current'] = True
for state in ntp_states:
if state['indicator'] == peer_state:
element['state'] = state.copy()
if peer_state == 'f' and state['indicator'] == ' ':
fail_state = state.copy()
fail_state.pop('indicator')
fail_state['indicator'] = 'f'
element['state'] = fail_state
ntp_peers[peer] = element
return ntp_peers
def ntp_check(min_peers=1, max_stratum=3, target='*', target_type='glob', ignore_dead=False, **kwargs):
''' Verify NTP peers status '''
agent = "ntpd peers status"
out = __salt__['saltutil.cmd']( tgt=target,
tgt_type=target_type,
fun='health_checks.ntp_status',
timeout=3
) or None
if not _minions_output(out, agent, ignore_dead):
__context__['retcode'] = 2
return False
failed_minions = []
verified_minions = []
for minion in out:
ntp_json = out[minion]['ret']
good_peers = []
for peer in ntp_json:
if ntp_json[peer]['stratum'] < int(max_stratum) + 1:
good_peers.append(peer)
if len(good_peers) > int(min_peers) - 1:
if minion not in verified_minions:
verified_minions.append(minion)
else:
if minion not in failed_minions:
failed_minions.append(minion)
if not _failed_minions(out, agent, failed_minions):
__context__['retcode'] = 2
return False
if kwargs.get("debug", False):
logger.info(verified_minions)
return True
def gluster_pool_list():
''' JSON formatted GlusterFS pool list command output '''
proc = subprocess.Popen(['gluster', 'pool', 'list'], stdout=subprocess.PIPE)
stdout, stderr = proc.communicate()
regex = re.compile('^(\S+)\s+(\S+)\s+(\S+)$')
fields = regex.findall(stdout.split('\n')[0])[0]
pool = {}
for line in stdout.split('\n')[1:]:
if len(line.strip()) > 0:
peer = {}
values = regex.findall(line.strip())[0]
for i in range(len(fields)):
peer[fields[i].lower()] = values[i]
uuid = peer.pop('uuid')
pool[uuid] = peer
return pool
def gluster_volume_status():
''' JSON formatted GlusterFS volumes status command output '''
proc = subprocess.Popen(['gluster', 'volume', 'status', 'all', 'detail'], stdout=subprocess.PIPE)
stdout, stderr = proc.communicate()
begin_volume = False
brick_lookup = False
volumes = {}
volume_name = ""
for line in stdout.split('\n'):
if 'Status of volume' in line:
volume_name = line.split(':')[1].strip()
volumes[volume_name] = { 'bricks': [] }
begin_volume = True
elif len(line.strip()) == 0:
if begin_volume:
begin_volume = False
elif '--------' in line:
brick_lookup = True
elif brick_lookup and line.split(':')[0].strip() == 'Brick':
brick_host, brick_path = re.findall('^Brick\ *:\ (.*)', line)[0].split()[1].split(':')
volumes[volume_name]['bricks'].append({ 'host': brick_host, 'path': brick_path })
brick_lookup = False
else:
brick_key, brick_value = line.split(':')
brick_key = brick_key.strip().lower().replace(' ', '_')
brick_value = brick_value.strip()
volumes[volume_name]['bricks'][len(volumes[volume_name]['bricks']) - 1][brick_key] = brick_value
return volumes
def gluster_pool_check(target='I@glusterfs:server', target_type='compound', expected_size=3, ignore_dead=False, **kwargs):
''' Check GlusterFS peer status '''
agent = "glusterfs peer status"
out = __salt__['saltutil.cmd']( tgt=target,
tgt_type=target_type,
fun='health_checks.gluster_pool_list',
timeout=3,
kwargs='[batch=True]'
) or None
if not _minions_output(out, agent, ignore_dead):
__context__['retcode'] = 2
return False
failed_minions = []
verified_minions = []
for minion in out:
verified_minions.append(minion)
gluster_json = out[minion]['ret']
alive_peers = []
for peer in gluster_json:
if gluster_json[peer]['state'] == 'Connected':
alive_peers.append(peer)
else:
if minion not in failed_minions:
failed_minions.append(minion)
if len(alive_peers) < expected_size:
if minion not in failed_minions:
failed_minions.append(minion)
if not _failed_minions(out, agent, failed_minions):
__context__['retcode'] = 2
return False
if kwargs.get("debug", False):
logger.info(verified_minions)
return True
def gluster_volumes_check(target='I@glusterfs:server', target_type='compound', expected_size=3, ignore_volumes=[], ignore_dead=False, **kwargs):
''' Check GlusterFS volumes status '''
agent = "glusterfs volumes status"
out = __salt__['saltutil.cmd']( tgt=target,
tgt_type=target_type,
fun='health_checks.gluster_volume_status',
timeout=3,
kwargs='[batch=True]'
) or None
if not _minions_output(out, agent, ignore_dead):
__context__['retcode'] = 2
return False
failed_minions = []
verified_minions = []
verified_volumes = []
for minion in out:
verified_minions.append(minion)
gluster_json = out[minion]['ret']
for volume in gluster_json:
if volume in ignore_volumes:
continue
else:
verified_volumes.append(volume)
alive_bricks = 0
if 'bricks' not in gluster_json[volume]:
if minion not in failed_minions:
failed_minions.append(minion)
bricks = gluster_json[volume]['bricks']
if len(bricks) < expected_size:
if minion not in failed_minions:
failed_minions.append(minion)
for brick in bricks:
if brick['online'] == 'Y':
alive_bricks += 1
else:
if minion not in failed_minions:
failed_minions.append(minion)
if alive_bricks < expected_size:
if minion not in failed_minions:
failed_minions.append(minion)
if not _failed_minions(out, agent, failed_minions):
__context__['retcode'] = 2
return False
if kwargs.get("debug", False):
logger.info("Verified minions:")
logger.info(verified_minions)
logger.info("Verified volumes:")
logger.info(verified_volumes)
return True
def ceph_cmd(cmd):
''' JSON formatted ceph command output '''
proc = subprocess.Popen(['ceph'] + cmd.split() + ['--format', 'json-pretty'], stdout=subprocess.PIPE)
stdout, stderr = proc.communicate()
return json.loads(stdout)
def ceph_health_check(target='I@ceph:mon', target_type='compound', expected_status='HEALTH_OK', expected_state='active+clean', ignore_dead=False, **kwargs):
''' Check all ceph monitors health status '''
agent = "ceph health status"
out = __salt__['saltutil.cmd']( tgt=target,
tgt_type=target_type,
fun='health_checks.ceph_cmd',
arg=['status'],
timeout=3
) or None
if not _minions_output(out, agent, ignore_dead):
__context__['retcode'] = 2
return False
failed_minions = []
verified_minions = []
for minion in out:
verified_minions.append(minion)
ceph_json = out[minion]['ret']
fsid = ceph_json['fsid']
if ceph_json['health']['overall_status'] != expected_status:
if minion not in failed_minions:
failed_minions.append(minion)
if ceph_json['osdmap']['osdmap']['full']:
if minion not in failed_minions:
failed_minions.append(minion)
if ceph_json['osdmap']['osdmap']['nearfull']:
if minion not in failed_minions:
failed_minions.append(minion)
num_osds = ceph_json['osdmap']['osdmap']['num_osds']
num_in_osds = ceph_json['osdmap']['osdmap']['num_in_osds']
num_up_osds = ceph_json['osdmap']['osdmap']['num_up_osds']
if not ( num_osds == num_in_osds == num_up_osds ):
if minion not in failed_minions:
failed_minions.append(minion)
quorum = len(ceph_json['quorum'])
quorum_names = len(ceph_json['quorum_names'])
mons = len(ceph_json['monmap']['mons'])
if not ( quorum == quorum_names == mons ):
if minion not in failed_minions:
failed_minions.append(minion)
for mon in ceph_json['health']['timechecks']['mons']:
if mon['health'] != expected_status:
if minion not in failed_minions:
failed_minions.append(minion)
for srv in ceph_json['health']['health']['health_services']:
for mon in srv['mons']:
if mon['health'] != expected_status:
if minion not in failed_minions:
failed_minions.append(minion)
for state in ceph_json['pgmap']['pgs_by_state']:
if state['state_name'] != expected_state:
if minion not in failed_minions:
failed_minions.append(minion)
if not _failed_minions(out, agent, failed_minions):
__context__['retcode'] = 2
return False
if kwargs.get("debug", False):
logger.info("Quorum:")
logger.info(ceph_json['quorum_names'])
logger.info("Verified minions:")
logger.info(verified_minions)
return True
def get_entropy():
''' Retrieve entropy size for the host '''
with open('/proc/sys/kernel/random/entropy_avail', 'r') as f:
entropy = f.read()
return entropy
def entropy_check(target='*', target_type='glob', minimum_bits=700, ignore_dead=False, **kwargs):
''' Check entropy size in cluster '''
agent = "entropy size status"
out = __salt__['saltutil.cmd']( tgt=target,
tgt_type=target_type,
fun='health_checks.get_entropy',
timeout=3
) or None
if not _minions_output(out, agent, ignore_dead):
__context__['retcode'] = 2
return False
failed_minions = []
verified_minions = []
for minion in out:
verified_minions.append(minion)
entropy = int(out[minion]['ret'])
if entropy < minimum_bits:
if not minion in failed_minions:
failed_minions.append(minion)
if not _failed_minions(out, agent, failed_minions):
__context__['retcode'] = 2
return False
if kwargs.get("debug", False):
logger.info(verified_minions)
return True
def docker_registry_list(host):
''' Retrieve and list docker catalog '''
try:
if host[0:4] == 'http':
url = host + '/v2/'
else:
url = 'http://' + host + '/v2/'
repos = requests.get(url + '_catalog')
versions = {}
for repo in repos.json()['repositories']:
repo_versions = requests.get(url + repo + '/tags/list')
versions[repo] = repo_versions.json().pop('tags')
return versions
except:
return {}
def docker_ps(list_all=0):
import docker
client = docker.client.Client(base_url='unix://var/run/docker.sock')
return client.containers(all=list_all)
def zookeeper_cmd(cmd, hostname='localhost', port=2181):
''' Execute zookeeper cmd via socket '''
buf_size = 1024
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.connect((hostname, port))
sock.sendall(cmd)
sock.shutdown(socket.SHUT_WR)
rdata = ""
while 1:
data = sock.recv(buf_size)
if data == "":
break
rdata += data
sock.close()
return rdata
def zookeeper_stats():
''' Retrieve zookeeper stats '''
stats = {}
stats['configuration'] = {}
for line in zookeeper_cmd('conf').split('\n'):
if line:
key, value = line.split('=')
if value.strip().isdigit():
value = int(value)
else:
value = value.strip()
stats['configuration'][key.strip().lower().replace(' ', '_')] = value
stats['environment'] = {}
for line in zookeeper_cmd('envi').split('\n')[1:]:
if line:
key, value = line.split('=')
if value.strip().isdigit():
value = int(value)
else:
value = value.strip()
stats['environment'][key.strip().lower().replace(' ', '_')] = value
stats['server'] = {}
for line in zookeeper_cmd('srvr').split('\n'):
if line:
if re.findall('^Zookeeper version:', line, flags=re.IGNORECASE):
version_str = line.split(':')[1].strip()
version = version_str
if '-' in version_str:
version_str = version_str.split('-')[0]
if '.' in version_str:
version = []
version_list = version_str.split('.')
for elem in version_list:
if elem.strip().isdigit():
version.append(int(elem))
stats['server']['version'] = version
continue
if re.findall('^Latency min/avg/max:', line, flags=re.IGNORECASE):
latency_min, latency_avg, latency_max = line.split(':')[1].strip().split('/')
stats['server']['latency'] = {'min':int(latency_min),'max':int(latency_max),'avg':int(latency_avg)}
continue
key, value = line.split(':')
if value.strip().isdigit():
value = int(value)
else:
value = value.strip()
stats['server'][key.strip().lower().replace(' ', '_')] = value
stats['clients'] = {}
for line in zookeeper_cmd('cons').split('\n'):
if line:
clients = re.findall('^(\s*\/)(.+)(:\d+\[\d+\])(\(.+\))$', line)[0][1:]
addr = clients[0]
port, direction = re.findall('^(\d+)\[(\d+)\]$', clients[1][1:])[0]
client = '['+addr+']:'+str(port)
stats['clients'][client] = {'direction': int(direction)}
for elem in clients[2][1:-1].split(','):
key, value = elem.split('=')
if value.strip().isdigit():
value = int(value)
else:
value = value.strip()
stats['clients'][client][key.strip().lower().replace(' ', '_')] = value
return stats
def get_zookeeper_leader(target='I@opencontrail:control', target_type='compound', ignore_dead=False, **kwargs):
''' Retrieve zookeeper leader '''
agent = "zookeeper leader retrieve"
out = __salt__['saltutil.cmd']( tgt=target,
tgt_type=target_type,
fun='health_checks.zookeeper_stats',
timeout=3
) or None
if not _minions_output(out, agent, ignore_dead):
__context__['retcode'] = 2
return False
leader = None
for minion in out:
zookeeper_mode = out[minion]['ret']['server']['mode']
if zookeeper_mode == 'leader':
leader = minion
return leader
def contrail_vrouter_list(api_host='127.0.0.1', api_port=9100):
''' Retrieve and list contrail vrouters.
Valid targets: Contrail controllers.
'''
try:
if api_host[0:4] == 'http':
url = api_host + ':' + str(api_port)
else:
url = 'http://' + api_host + ':' + str(api_port)
vrouters = requests.get(url + '/virtual-routers').json()
vrouter_list = []
for vr in vrouters['virtual-routers']:
vr_uuid = vr['uuid']
for name in vr['fq_name']:
if name == "default-global-system-config":
continue
else:
vr_name = name
vrouter_list.append({'name': vr_name, 'uuid': vr_uuid})
return vrouter_list
except:
return {}
def contrail_vrouter_show(vr_uuid, api_host='127.0.0.1', api_port=9100):
''' Retrieve contrail vrouter data
Valid targets: Contrail controllers.
'''
try:
if api_host[0:4] == 'http':
url = api_host + ':' + str(api_port)
else:
url = 'http://' + api_host + ':' + str(api_port)
return requests.get(url + '/virtual-router/' + vr_uuid).json()
except:
return {}
def _xmletree_descend_child(given_child, tag_requested):
''' Returns xmletree subelement by tag name '''
my_child = {}
for child in given_child:
if child.tag == tag_requested:
my_child = child
break
return my_child
def contrail_vrouter_agent_status(api_host='127.0.0.1', api_port=8085):
''' Retrieve contrail vrouter agent status '''
import xml.etree.ElementTree as ET
if api_host[0:4] == 'http':
url = api_host + ':' + str(api_port)
else:
url = 'http://' + api_host + ':' + str(api_port)
try:
req = requests.get(url + '/Snh_SandeshUVECacheReq?x=NodeStatus')
if int(req.status_code) != 200:
return "Could not fetch data from vrouter agent via %s.\nGot bad status code: %s\n%s" % (url, str(req.status_code), str(req.text))
except:
pass
try:
xmletree = ET.fromstring(req.text)
except:
return "Could not parse xml tree %s" % str(req.text)
try:
vrouter_data = {}
child = _xmletree_descend_child(xmletree, 'NodeStatusUVE')
child = _xmletree_descend_child(child, 'data')
child = _xmletree_descend_child(child, 'NodeStatus')
child = _xmletree_descend_child(child, 'process_status')
child = _xmletree_descend_child(child, 'list')
child = _xmletree_descend_child(child, 'ProcessStatus')
vrouter_data['state'] = _xmletree_descend_child(child, 'state').text
vrouter_data['connections'] = []
child = _xmletree_descend_child(child, 'connection_infos')
for elem in _xmletree_descend_child(child, 'list'):
conn = {}
conn['type'] = _xmletree_descend_child(elem,'type').text
conn['name'] = _xmletree_descend_child(elem,'name').text
conn['status'] = _xmletree_descend_child(elem,'status').text
conn['description'] = _xmletree_descend_child(elem,'description').text
conn['server_addrs'] = []
server_addrs = _xmletree_descend_child(elem,'server_addrs')
for srv in _xmletree_descend_child(server_addrs,'list'):
host, port = srv.text.split(':')
conn['server_addrs'].append({'host': host, 'port': port})
vrouter_data['connections'].append(conn)
return vrouter_data
except:
return "Unsupported xml tree for this function %s" % str(req.text)
def contrail_collector_agent_status(vr_name, api_host='auto', api_port=9081):
''' Retrieve contrail vrouter agent status from analyticsdb '''
if api_host[0:4] == 'http':
url = api_host + ':' + str(api_port)
elif api_host == 'auto':
my_ip = __salt__['pillar.get']('_param:opencontrail_analytics_address')
url = 'http://' + my_ip+ ':' + str(api_port)
else:
url = 'http://' + api_host + ':' + str(api_port)
req = requests.get(url + '/analytics/uves/vrouter/' + vr_name + '?flat')
if int(req.status_code) != 200:
return "Could not fetch data from vrouter agent via %s.\nGot bad status code: %s\n%s" % (url, str(req.status_code), str(req.text))
return json.loads(req.text)
def contrail_control_peers_summary(api_host='auto', api_port=8083):
''' Retrieve contrail control peers summary '''
import xml.etree.ElementTree as ET
if api_host[0:4] == 'http':
url = api_host + ':' + str(api_port)
elif api_host == 'auto':
my_ip = '127.0.0.1'
url = 'http://' + my_ip+ ':' + str(api_port)
else:
url = 'http://' + api_host + ':' + str(api_port)
req = requests.get(url + '/Snh_ShowBgpNeighborSummaryReq')
if int(req.status_code) != 200:
return "Could not fetch data from contrail control via %s.\nGot bad status code: %s\n%s" % (url, str(req.status_code), str(req.text))
peers = []
summary = req.text
try:
xmletree = ET.fromstring(summary)
for elem in xmletree.find('.//list'):
attrs = {}
for child in elem:
attrs[child.tag] = child.text
peers.append(attrs)
except:
return "Could not parse xml tree %s" % str(summary)
return peers
def contrail_control_peer_status(api_host='auto', api_port=8083, fields=default_peer_filter):
''' Contrail control peer status '''
peer_status = {}
for peer_elem in contrail_control_peers_summary():
elem = {}
for attr in peer_elem:
if attr in fields:
elem[attr] = peer_elem[attr]
peer_name = peer_elem["peer"]
peer_status[peer_name] = elem
return peer_status
def _get_object(json_obj, obj_path):
''' Retrieve subelemet of an JSON object or value '''
if ':' in obj_path:
splitter = obj_path.split(':')
k = splitter[0]
v = ':'.join(splitter[1:])
if k.isdigit():
# Return specific element path
return [ _get_object(json_obj[int(k)], v) ]
elif k == '*':
l = []
for el in json_obj:
l.append(_get_object(el, v))
# Return all list elements from the path
return l
else:
# Contrail output may have nested JSON
if isinstance(json_obj, str) or isinstance(json_obj, unicode):
json_obj = json.loads(json_obj)
# Assume list. Return it
return { k: _get_object(json_obj[k], v) }
else:
return { obj_path: json_obj[obj_path] }
def _deepmerge(o1, o2):
''' Deep merge JSON objects '''
o3 = {}
if type(o1) == type(o2):
if type(o1) == dict or type(o1) == tuple:
for k in set(o1.keys() + o2.keys()):
if k in o1:
if k in o2:
o3[k] = _deepmerge(o1[k], o2[k])
else:
o3[k] = o1[k]
else:
o3[k] = o2[k]
elif type(o1) == list or type(o1) == set:
o3 = [] + o2
for el in o3:
i = o3.index(el)
o3[i] = _deepmerge(o1[i], o2[i])
else:
o3 = o2
else:
o3 = o2
return o3
def contrail_vrouter_agent_info(vr_name, filter_map=default_vrouter_info_map):
''' Retrieve filtered contrail vrouter agent info from analyticsdb '''
vr_agent_status = contrail_collector_agent_status(vr_name)
vr_info = {}
for conf in filter_map:
vr_info[conf] = {}
for el_path in filter_map[conf]:
vr_info = _deepmerge(vr_info, { conf: _get_object(vr_agent_status[conf], el_path) } )
return vr_info
def contrail_mesh_check(target='I@opencontrail:control', target_type='compound', ignore_dead=False, strict=False, **kwargs):
''' Check if contrail elements are connected to each other '''
agent = "contrail mesh check"
out = __salt__['saltutil.cmd']( tgt=target,
tgt_type=target_type,
fun='health_checks.contrail_control_peer_status',
timeout=3
) or None
if not _minions_output(out, agent, ignore_dead):
__context__['retcode'] = 2
return False
minions = []
for node in out:
if strict:
minions.append(node)
else:
minions.append(node.split('.')[0])
elements = {}
for node in out:
peer_elem = out[node]["ret"]
for elem in peer_elem:
if not strict:
elem = elem.split('.')[0]
if elem in elements:
continue
elements[elem] = {}
elements[elem]["peers"] = []
elements[elem]["my_address"] = peer_elem[elem]["peer_address"]
if peer_elem[elem]["encoding"] == "XMPP":
elements[elem]["type"] = "COMPUTE"
elif peer_elem[elem]["encoding"] == "BGP":
if elem in minions:
elements[elem]["type"] = "CONTROLLER"
else:
elements[elem]["type"] = "EDGE-ROUTER"
for node in out:
if strict:
peer_name = node
else:
peer_name = node.split('.')[0]
peer_elem = out[node]["ret"]
for elem in peer_elem:
if not strict:
elem = elem.split('.')[0]
peer_elem[elem]["peer"] = peer_name
del(peer_elem[elem]["peer_address"])
elements[elem]["peers"].append(peer_elem[elem])
failed_peers = []
for elem_name in elements:
elem = elements[elem_name]
if elem["type"] == "COMPUTE":
if len(elem["peers"]) < 2:
if elem not in failed_peers:
failed_peers.append(elem)
if elem["type"] == "CONTROLLER":
if len(elem["peers"]) < len(minions)-1:
if elem not in failed_peers:
failed_peers.append(elem)
if elem["type"] == "EDGE-ROUTER":
if not len(elem["peers"]) == len(minions):
if elem not in failed_peers:
failed_peers.append(elem)
for peer in elem["peers"]:
if not peer["state"] == "Established":
if elem not in failed_peers:
failed_peers.append(elem)
if len(failed_peers) > 0:
logger.error("%s check FAILED" % agent)
if strict:
logger.error("Strict mode is on. Check DNS names in output")
logger.error("Minions output:")
logger.error(json.dumps(out, indent=4))
else:
logger.error("Failed peers:")
logger.error(json.dumps(failed_peers, indent=4))
__context__['retcode'] = 2
return False
if kwargs.get("debug", False):
logger.info(json.dumps(elements, indent=4))
return True
def kafka_brokers_ids():
''' Retrieve kafka brokers ids '''
brokers_ids = []
for line in zookeeper_cmd('dump').split('\n'):
if line:
if '/brokers/ids/' in line:
brokers_ids.append(int(line.split('/')[3]))
return brokers_ids
def libvirt_capabilities():
''' JSON formatted libvirtcapabilities list '''
import xml.etree.ElementTree as ET
try:
proc = subprocess.Popen(['virsh', 'capabilities'], stdout=subprocess.PIPE)
stdout, stderr = proc.communicate()
xmletree = ET.fromstring(stdout)
except:
return "Could not parse xml tree %s" % str(stdout)
try:
capabilities = {}
for elem in xmletree:
if elem.tag == "guest":
for el in elem:
if el.tag == 'arch':
_name = el.attrib['name']
capabilities[_name] = []
for arch in el:
if arch.tag == 'machine':
if 'canonical' not in arch.attrib:
capabilities[_name].append(arch.text)
return capabilities
except:
return "Unsupported xml tree for this function %s" % str(stdout)
def keystone_keys_attractor(keys_dir='/var/lib/keystone/fernet-keys', keys_ids=range(0,-4,-1)):
''' JSON formatted dict of keystone keys sha256 sums '''
keys = os.listdir(keys_dir)
keys.sort()
keys_dict = {}
try:
for i in keys_ids:
with open("%s/%s" % (keys_dir, str(keys[i])), 'r') as key_file:
_iter1 = hashlib.sha256(key_file.read()).hexdigest()
_iter2 = hashlib.sha256(_iter1).hexdigest()
_iter3 = hashlib.sha256(_iter2).hexdigest()
keys_dict[str(keys[i])] = _iter3
except:
pass
return keys_dict
def keystone_keys_check(target='I@keystone:server', target_type='compound', ignore_dead=False, **kwargs):
''' Check cluster keystone keys are in sync '''
keys_type = kwargs.get("keys_type", 'fernet')
supported_key_types = ['fernet', 'credential']
if keys_type not in supported_key_types:
logger.error("Unsupported keys type: %s" % str(keys_type))
logger.error("Supported keys type are: %s" % str(supported_key_types))
__context__['retcode'] = 2
return False
agent = "keystone %s keys sync" % keys_type
keys_dir_default = '/var/lib/keystone/%s-keys' % keys_type
keys_dir = kwargs.get("keys_dir", keys_dir_default)
out = __salt__['saltutil.cmd']( tgt=target,
tgt_type=target_type,
fun='health_checks.keystone_keys_attractor',
arg=["keys_dir='%s'" % keys_dir],
timeout=3
) or None
if not _minions_output(out, agent, ignore_dead):
__context__['retcode'] = 2
return False
cluster_attractors = []
failed_minions = []
verified_minions = []
attractor = {}
for minion in out:
verified_minions.append(minion)
attractor = out[minion]['ret']
if attractor == {}:
failed_minions.append(minion)
if attractor not in cluster_attractors:
cluster_attractors.append(attractor)
if not _failed_minions(out, agent, failed_minions):
__context__['retcode'] = 2
return False
if len(cluster_attractors) > 1:
failed_minions = []
for minion in out:
failed_minions.append(minion)
if not _failed_minions(out, agent, failed_minions):
__context__['retcode'] = 2
return False
if kwargs.get("debug", False):
logger.info("%s check done." % agent)
logger.info(verified_minions)
return True
def list_namespaces(raw_output=False):
''' JSON formatted ip netns dict '''
proc = subprocess.Popen(['ip', 'netns'], stdout=subprocess.PIPE)
stdout, stderr = proc.communicate()
namespaces = []
for line in stdout.split('\n'):
if len(line) > 0:
netns = {}
if raw_output:
netns['id'] = -2
netns['uuid'] = line
else:
line_splitted = line.split()
if len(line_splitted) > 1:
ns_uuid = line_splitted[0]
ns_id = int(line_splitted[2][:-1])
else:
ns_uuid = line
ns_id = -1
netns['id'] = ns_id
netns['uuid'] = ns_uuid
namespaces.append(netns)
return namespaces
def _load_mysql_module():
# Check if module is loaded
# It can be loaded by parent function
# In case of direct funtction call, we load it
mod_not_loaded = False
try:
dir(MySQLdb)
except:
# Not loaded. Trying to load
mod_not_loaded = True
if mod_not_loaded:
try:
import MySQLdb
except:
logger.error("Python library MySQLdb could not be loaded. Install it first")
__context__['retcode'] = 2
return False
return MySQLdb
def get_keystone_db_versions(db_host, db_user, db_pass, db_name="keystone"):
''' Return dict of keystone DB versions '''
MySQLdb = _load_mysql_module()
if not MySQLdb:
return MySQLdb
keystone_versions = {}
db = MySQLdb.connect(db_host, db_user, db_pass, db_name)
cursor = db.cursor()
cursor.execute("select `version` from `migrate_version` where `repository_id` like 'keystone'")
if cursor.rowcount == 1:
keystone_db_ver = cursor.fetchone()[0]
else:
keystone_db_ver = 0
if keystone_db_ver > 0:
cursor.execute("select `version` from `migrate_version` where `repository_id` like 'keystone_data_migrate'")
if cursor.rowcount == 1:
keystone_data_ver = cursor.fetchone()[0]
else:
keystone_data_ver = 0
db.close()
keystone_versions['db'] = keystone_db_ver
keystone_versions['data_db'] = keystone_data_ver
for release in db_ver_map:
keystone_obj = db_ver_map[release]["keystone"]
if isinstance(keystone_obj, dict):
keystone_db_map = int(keystone_obj["db"])
keystone_data_map = int(keystone_obj["data"])
if keystone_db_map == keystone_db_ver and keystone_data_map == keystone_data_ver:
keystone_versions['os_release'] = release
else:
keystone_db_map = int(keystone_obj)
if keystone_db_map == keystone_db_ver:
keystone_versions['os_release'] = release
return keystone_versions
def get_glance_db_versions(db_host, db_user, db_pass, db_name="glance"):
''' Return dict of glance DB versions '''
MySQLdb = _load_mysql_module()
if not MySQLdb:
return MySQLdb
glance_versions = {}
db = MySQLdb.connect(db_host, db_user, db_pass, db_name)
cursor = db.cursor()
glance_release = "unknown"
cursor.execute("select `version` from `migrate_version` where `repository_id` like 'Glance Migrations'")
if cursor.rowcount == 1:
glance_db_ver = cursor.fetchone()[0]
for release in db_ver_map:
glance_obj = db_ver_map[release]["glance"]
glance_db_map = int(glance_obj)
if glance_db_map == glance_db_ver:
glance_release = release
else:
glance_db_ver = 0
db.close()
db = MySQLdb.connect(db_host, db_user, db_pass, db_name)
cursor = db.cursor()
try:
cursor.execute("select `version_num` from `alembic_version`")
except:
pass
if cursor.rowcount == 1:
glance_release = cursor.fetchone()[0]
for release in db_ver_map:
if release in glance_release:
glance_db_ver = db_ver_map[release]["glance"]
glance_release = "%s (alembic)" % release
db.close()
glance_versions['db'] = glance_db_ver
glance_versions['os_release'] = glance_release
return glance_versions
def get_cinder_db_versions(db_host, db_user, db_pass, db_name="cinder"):
''' Return dict of cinder DB versions '''
MySQLdb = _load_mysql_module()
if not MySQLdb:
return MySQLdb
cinder_versions = {}
db = MySQLdb.connect(db_host, db_user, db_pass, db_name)
cursor = db.cursor()
cursor.execute("select `version` from `migrate_version` where `repository_id` like 'cinder'")
if cursor.rowcount == 1:
cinder_db_ver = cursor.fetchone()[0]
else:
cinder_db_ver = 0
db.close()
cinder_release = ""
for release in db_ver_map:
cinder_obj = db_ver_map[release]["cinder"]
cinder_db_map = int(cinder_obj)
if cinder_db_map == cinder_db_ver:
cinder_release = release
cinder_versions['db'] = cinder_db_ver
cinder_versions['os_release'] = cinder_release
return cinder_versions
def get_heat_db_versions(db_host, db_user, db_pass, db_name="heat"):
''' Return dict of heat DB versions '''
MySQLdb = _load_mysql_module()
if not MySQLdb:
return MySQLdb
heat_versions = {}
db = MySQLdb.connect(db_host, db_user, db_pass, db_name)
cursor = db.cursor()
cursor.execute("select `version` from `migrate_version` where `repository_id` like 'heat'")
if cursor.rowcount == 1:
heat_db_ver = cursor.fetchone()[0]
else:
heat_db_ver = 0
db.close()
heat_release = ""
for release in db_ver_map:
heat_obj = db_ver_map[release]["heat"]
heat_db_map = int(heat_obj)
if heat_db_map == heat_db_ver:
heat_release = release
heat_versions['db'] = heat_db_ver
heat_versions['os_release'] = heat_release
return heat_versions
def get_neutron_db_versions(db_host, db_user, db_pass, db_name="neutron"):
''' Return dict of neutron DB versions '''
MySQLdb = _load_mysql_module()
if not MySQLdb:
return MySQLdb
neutron_versions = {}
db = MySQLdb.connect(db_host, db_user, db_pass, db_name)
cursor = db.cursor()
try:
cursor.execute("select `version_num` from `alembic_version`")
neutron_db_versions_raw = cursor.fetchall()
neutron_db_versions = []
for el in neutron_db_versions_raw:
neutron_db_versions.append(el[0])
except:
neutron_db_versions = []
db.close()
neutron_release = "unknown (no marker found)"
for release in db_ver_map:
for commit_id in neutron_db_versions:
if commit_id in db_ver_map[release]["neutron"]:
neutron_release = release
neutron_versions['db_versions'] = neutron_db_versions
neutron_versions['os_release'] = neutron_release
return neutron_versions
def get_nova_db_versions(db_host, db_user, db_pass, db_name="nova", db_api_name="nova_api", db_api_pass=""):
''' Return dict of nova DB versions '''
MySQLdb = _load_mysql_module()
if not MySQLdb:
return MySQLdb
if not db_api_pass:
db_api_pass = db_pass
nova_versions = {}
db = MySQLdb.connect(db_host, db_user, db_pass, db_name)
cursor = db.cursor()
cursor.execute("select `version` from `migrate_version` where `repository_id` like 'nova'")
if cursor.rowcount == 1:
nova_db_ver = cursor.fetchone()[0]
else:
nova_db_ver = 0
db.close()
db = MySQLdb.connect(db_host, db_user, db_pass, db_api_name)
cursor = db.cursor()
if nova_db_ver > 0:
cursor.execute("select `version` from `migrate_version` where `repository_id` like 'nova_api'")
if cursor.rowcount == 1:
nova_apidb_ver = cursor.fetchone()[0]
else:
nova_apidb_ver = 0
db.close()
nova_versions['db'] = nova_db_ver
nova_versions['api_db'] = nova_apidb_ver
for release in db_ver_map:
nova_obj = db_ver_map[release]["nova"]
if isinstance(nova_obj, dict):
nova_db_map = int(nova_obj["db"])
nova_apidb_map = int(nova_obj["api_db"])
if nova_db_map == nova_db_ver and nova_apidb_map == nova_apidb_ver:
nova_versions['os_release'] = release
else:
nova_db_map = int(nova_obj)
if nova_db_map == nova_db_ver:
nova_versions['os_release'] = release
return nova_versions
def list_db_versions():
''' Retrieve openstack DB release codenames '''
db_host = str(__salt__['pillar.get']('_param:openstack_database_address'))
cinder_db_pass = str(__salt__['pillar.get']('_param:mysql_cinder_password'))
glance_db_pass = str(__salt__['pillar.get']('_param:mysql_glance_password'))
heat_db_pass = str(__salt__['pillar.get']('_param:mysql_heat_password'))
keystone_db_pass = str(__salt__['pillar.get']('_param:mysql_keystone_password'))
neutron_db_pass = str(__salt__['pillar.get']('_param:mysql_neutron_password'))
nova_db_pass = str(__salt__['pillar.get']('_param:mysql_nova_password'))
cinder_db_user = str(__salt__['pillar.get']('_param:mysql_cinder_username', 'cinder'))
glance_db_user = str(__salt__['pillar.get']('_param:mysql_glance_username', 'glance'))
heat_db_user = str(__salt__['pillar.get']('_param:mysql_heat_username', 'heat'))
keystone_db_user = str(__salt__['pillar.get']('_param:mysql_keystone_username', 'keystone'))
neutron_db_user = str(__salt__['pillar.get']('_param:mysql_neutron_username', 'neutron'))
nova_db_user = str(__salt__['pillar.get']('_param:mysql_nova_username', 'nova'))
os_db_releases = {}
os_db_releases['cinder'] = get_cinder_db_versions(db_host, cinder_db_user, cinder_db_pass)
os_db_releases['glance'] = get_glance_db_versions(db_host, glance_db_user, glance_db_pass)
os_db_releases['heat'] = get_heat_db_versions(db_host, heat_db_user, heat_db_pass)
os_db_releases['neutron'] = get_neutron_db_versions(db_host, neutron_db_user, neutron_db_pass)
os_db_releases['keystone'] = get_keystone_db_versions(db_host, keystone_db_user, keystone_db_pass)
os_db_releases['nova'] = get_nova_db_versions(db_host, nova_db_user, nova_db_pass)
return os_db_releases