Network check HTML report
- uniform map generation
- POC for additional checks on env
Change-Id: I52edcc94f4d9826cbfb1159e5311514097a15f62
Related-PROD: PROD-32792
diff --git a/cfg_checker/common/const.py b/cfg_checker/common/const.py
index 966f3d3..5826f43 100644
--- a/cfg_checker/common/const.py
+++ b/cfg_checker/common/const.py
@@ -35,7 +35,7 @@
ACT_NA: ""
}
-all_statuses = {
+all_pkg_statuses = {
VERSION_OK: "ok",
VERSION_UP: "upgraded",
VERSION_DOWN: "downgraded",
@@ -43,6 +43,11 @@
VERSION_NA: "no status"
}
+node_status = {
+ NODE_UP: "up",
+ NODE_DOWN: "down"
+}
+
uknown_code = "unk"
all_roles_map = {
diff --git a/cfg_checker/helpers/errors.py b/cfg_checker/helpers/errors.py
index 83ae803..95e1495 100644
--- a/cfg_checker/helpers/errors.py
+++ b/cfg_checker/helpers/errors.py
@@ -102,12 +102,12 @@
# format message
_msg = "### {}:\n Description: {}\n{}".format(
_code,
- self._get_error_type_text(self._errors[index]['type']),
+ self.get_error_type_text(self._errors[index]['type']),
"\n".join(_data)
)
return _msg
- def _get_error_type_text(self, err_type):
+ def get_error_type_text(self, err_type):
if err_type not in self._types:
raise ErrorMappingException(
"type code {} not found".format(err_type)
diff --git a/cfg_checker/modules/network/__init__.py b/cfg_checker/modules/network/__init__.py
index 6b06022..6b9013a 100644
--- a/cfg_checker/modules/network/__init__.py
+++ b/cfg_checker/modules/network/__init__.py
@@ -66,15 +66,6 @@
return _parser
-def _prepare_map():
- _mapper = mapper.NetworkMapper()
- _mapper.map_network(_mapper.RECLASS)
- _mapper.map_network(_mapper.RUNTIME)
- _mapper.map_network(_mapper.CONFIG)
-
- return _mapper
-
-
def do_check(args):
# Net Checks
# should not print map, etc...
@@ -104,6 +95,10 @@
_filename = args_utils.get_arg(args, 'html')
netChecker = checker.NetworkChecker()
+ netChecker.check_networks(map=False)
+
+ # save what was collected
+ netChecker.errors.save_iteration_data()
netChecker.create_html_report(_filename)
return
@@ -114,7 +109,9 @@
# Should generate network map to console or HTML
logger_cli.info("# Network report")
- networkMap = _prepare_map()
+ networkMap = mapper.NetworkMapper()
+ networkMap.prepare_all_maps()
+ networkMap.create_map()
networkMap.print_map()
return
diff --git a/cfg_checker/modules/network/checker.py b/cfg_checker/modules/network/checker.py
index 89db6ba..acd3bb1 100644
--- a/cfg_checker/modules/network/checker.py
+++ b/cfg_checker/modules/network/checker.py
@@ -10,11 +10,13 @@
self.errors = NetworkErrors()
self.mapper = NetworkMapper(self.errors)
- def check_networks(self):
+ def check_networks(self, map=True):
self.mapper.map_network(self.mapper.RECLASS)
self.mapper.map_network(self.mapper.RUNTIME)
- self.mapper.print_map()
+ self.mapper.create_map()
+ if map:
+ self.mapper.print_map()
def print_summary(self):
logger_cli.info(self.errors.get_summary(print_zeros=False))
@@ -39,10 +41,10 @@
filename
)
_report({
- "nodes": self.nodes,
- "network": {},
- "mcp_release": self.mcp_release,
- "openstack_release": self.openstack_release
+ "nodes": self.mapper.nodes,
+ "map": self.mapper.map,
+ "mcp_release": self.mapper.cluster['mcp_release'],
+ "openstack_release": self.mapper.cluster['openstack_release']
})
logger_cli.info("-> Done")
diff --git a/cfg_checker/modules/network/mapper.py b/cfg_checker/modules/network/mapper.py
index c3e3b73..ba9a256 100644
--- a/cfg_checker/modules/network/mapper.py
+++ b/cfg_checker/modules/network/mapper.py
@@ -36,6 +36,7 @@
# init networks and nodes
self.networks = {}
self.nodes = salt_master.get_nodes()
+ self.cluster = salt_master.get_info()
# init and pre-populate interfaces
self.interfaces = {k: {} for k in self.nodes}
# Init errors class
@@ -45,6 +46,11 @@
logger_cli.debug("... init error logs folder")
self.errors = NetworkErrors()
+ def prepare_all_maps(self):
+ self.map_network(self.RECLASS)
+ self.map_network(self.RUNTIME)
+ self.map_network(self.CONFIG)
+
# adding net data to tree
def _add_data(self, _list, _n, _h, _d):
if _n not in _list:
@@ -253,32 +259,23 @@
self.networks[source] = _networks
return _networks
- def print_map(self):
- """
- Create text report for CLI
+ def create_map(self):
+ """Create all needed elements for map output
:return: none
"""
_runtime = self.networks[self.RUNTIME]
_reclass = self.networks[self.RECLASS]
- logger_cli.info("# Networks")
- logger_cli.info(
- " {0:8} {1:25} {2:25} {3:6} {4:10} {5:10} {6}/{7}".format(
- "Host",
- "IF",
- "IP",
- "Proto",
- "MTU",
- "State",
- "Gate",
- "Def.Gate"
- )
- )
+
+ # main networks, target vars
+ _map = {}
# No matter of proto, at least one IP will be present for the network
+ # we interested in, since we are to make sure that L3 level
+ # is configured according to reclass model
for network in _reclass:
# shortcuts
_net = str(network)
- logger_cli.info("-> {}".format(_net))
+ _map[_net] = {}
if network not in _runtime:
# reclass has network that not found in runtime
self.errors.add_error(
@@ -290,6 +287,7 @@
# hostnames
names = sorted(_runtime[network].keys())
for hostname in names:
+ _notes = []
node = hostname.split('.')[0]
if not salt_master.is_node_available(hostname, log=False):
logger_cli.info(
@@ -300,9 +298,15 @@
self.errors.NET_NODE_NON_RESPONSIVE,
host=hostname
)
+ _notes.append(
+ self.errors.get_error_type_text(
+ self.errors.NET_NODE_NON_RESPONSIVE
+ )
+ )
continue
# lookup interface name on node using network CIDR
_if_name = _runtime[network][hostname][0]["name"]
+ _raw = self.interfaces[hostname][_if_name]['runtime']
# get proper reclass
_r = self.interfaces[hostname][_if_name]['reclass']
_if_name_suffix = ""
@@ -315,6 +319,11 @@
host=hostname,
if_name=_if_name
)
+ _notes.append(
+ self.errors.get_error_type_text(
+ self.errors.NET_NODE_UNEXPECTED_IF
+ )
+ )
_if_rc = "*"
if "proto" in _r:
@@ -379,6 +388,11 @@
ip=_ip_str,
gateway=_gate
)
+ _notes.append(
+ self.errors.get_error_type_text(
+ self.errors.NET_UNEXPECTED_GATEWAY
+ )
+ )
_gate_error = "*"
# IF status in reclass
@@ -389,6 +403,11 @@
host=hostname,
if_name=_if_name
)
+ _notes.append(
+ self.errors.get_error_type_text(
+ self.errors.NET_NO_RC_IF_STATUS
+ )
+ )
_up_error = "*"
_rc_mtu = _r['mtu'] if 'mtu' in _r else None
@@ -408,6 +427,11 @@
reclass_mtu=_rc_mtu,
runtime_mtu=_host['mtu']
)
+ _notes.append(
+ self.errors.get_error_type_text(
+ self.errors.NET_MTU_MISMATCH
+ )
+ )
_rc_mtu_s = "/" + _rc_mtu_s
_mtu_error = "*"
else:
@@ -424,48 +448,111 @@
if_cidr=_ip_str,
if_mtu=_host['mtu']
)
+ _notes.append(
+ self.errors.get_error_type_text(
+ self.errors.NET_MTU_EMPTY
+ )
+ )
_mtu_error = "*"
else:
# this is a VIP
_if_name = " "*7
_if_name_suffix = ""
_ip_str += " VIP"
- # Host IF IP Proto MTU State Gate Def.Gate
- _text = "{:7} {:17} {:25} {:6} {:10} " \
- "{:10} {} / {}".format(
- _if_name + _if_rc,
- _if_name_suffix,
- _ip_str,
- _proto,
- _host['mtu'] + _rc_mtu_s + _mtu_error,
- _host['state'] + _up_error,
- _gate + _gate_error,
- _d_gate_str
- )
- logger_cli.info(
- " {0:8} {1}".format(
- node,
- _text
+ # Save all data
+ _values = {
+ "interface": _if_name,
+ "interface_error": _if_rc,
+ "interface_note": _if_name_suffix,
+ "ip_address": _ip_str,
+ "address_type": _proto,
+ "rt_mtu": _host['mtu'],
+ "rc_mtu": _rc_mtu_s,
+ "mtu_error": _mtu_error,
+ "status": _host['state'],
+ "status_error": _up_error,
+ "subnet_gateway": _gate,
+ "subnet_gateway_error": _gate_error,
+ "default_gateway": _d_gate_str,
+ "raw_data": _raw,
+ "error_note": " and ".join(_notes)
+ }
+ if node in _map[_net]:
+ # add if to host
+ _map[_net][node].append(_values)
+ else:
+ _map[_net][node] = [_values]
+ _notes = []
+
+ # save map
+ self.map = _map
+ # other runtime networks found
+ # docker, etc
+
+ return
+
+ def print_map(self):
+ """
+ Create text report for CLI
+
+ :return: none
+ """
+ logger_cli.info("# Networks")
+ logger_cli.info(
+ " {0:8} {1:25} {2:25} {3:6} {4:10} {5:10} {6}/{7}".format(
+ "Host",
+ "IF",
+ "IP",
+ "Proto",
+ "MTU",
+ "State",
+ "Gate",
+ "Def.Gate"
+ )
+ )
+ for network in self.map.keys():
+ logger_cli.info("-> {}".format(network))
+ for hostname in self.map[network].keys():
+ node = hostname.split('.')[0]
+ _n = self.map[network][hostname]
+ for _i in _n:
+ # Host IF IP Proto MTU State Gate Def.Gate
+ _text = "{:7} {:17} {:25} {:6} {:10} " \
+ "{:10} {} / {}".format(
+ _i['interface'] + _i['interface_error'],
+ _i['interface_note'],
+ _i['ip_address'],
+ _i['address_type'],
+ _i['rt_mtu'] + _i['rc_mtu'] + _i['mtu_error'],
+ _i['status'] + _i['status_error'],
+ _i['subnet_gateway'] +
+ _i['subnet_gateway_error'],
+ _i['default_gateway']
)
- )
-
- logger_cli.info("\n# Other networks")
- _other = [n for n in _runtime if n not in _reclass]
- for network in _other:
- logger_cli.info("-> {}".format(str(network)))
- names = sorted(_runtime[network].keys())
-
- for hostname in names:
- for _n in _runtime[network][hostname]:
- _ifs = [str(ifs.ip) for ifs in _n['ifs']]
- _text = "{:25} {:25} {:6} {:10} {}".format(
- _n['name'],
- ", ".join(_ifs),
- "-",
- _n['mtu'],
- _n['state']
- )
logger_cli.info(
- " {0:8} {1}".format(hostname.split('.')[0], _text)
+ " {0:8} {1}".format(
+ node,
+ _text
+ )
)
- logger_cli.info("\n")
+
+ # logger_cli.info("\n# Other networks")
+ # _other = [n for n in _runtime if n not in _reclass]
+ # for network in _other:
+ # logger_cli.info("-> {}".format(str(network)))
+ # names = sorted(_runtime[network].keys())
+
+ # for hostname in names:
+ # for _n in _runtime[network][hostname]:
+ # _ifs = [str(ifs.ip) for ifs in _n['ifs']]
+ # _text = "{:25} {:25} {:6} {:10} {}".format(
+ # _n['name'],
+ # ", ".join(_ifs),
+ # "-",
+ # _n['mtu'],
+ # _n['state']
+ # )
+ # logger_cli.info(
+ # " {0:8} {1}".format(hostname.split('.')[0], _text)
+ # )
+ # logger_cli.info("\n")
diff --git a/cfg_checker/nodes.py b/cfg_checker/nodes.py
index 5e535b4..ca4e261 100644
--- a/cfg_checker/nodes.py
+++ b/cfg_checker/nodes.py
@@ -140,6 +140,53 @@
self.gather_node_info()
return self.nodes
+ def get_info(self):
+ _info = {
+ 'mcp_release': self.mcp_release,
+ 'openstack_release': self.openstack_release
+ }
+ return _info
+
+ def get_cmd_for_nodes(self, cmd, target_key, target_dict=None):
+ """Function runs. cmd.run and parses result into place
+ or into dict structure provided
+
+ :return: no return value, data pulished internally
+ """
+ logger_cli.debug(
+ "... collecting results for '{}'".format(cmd)
+ )
+ if target_dict:
+ _nodes = target_dict
+ else:
+ _nodes = self.nodes
+ _result = self.execute_cmd_on_active_nodes(cmd)
+ for node, data in _nodes.iteritems():
+ if node in self.skip_list:
+ logger_cli.debug(
+ "... '{}' skipped while collecting '{}'".format(
+ node,
+ cmd
+ )
+ )
+ continue
+ # Prepare target key
+ if target_key not in data:
+ data[target_key] = None
+ # Save data
+ if data['status'] == const.NODE_DOWN:
+ data[target_key] = None
+ elif not _result[node]:
+ logger_cli.debug(
+ "... '{}' not responded after '{}'".format(
+ node,
+ config.salt_timeout
+ )
+ )
+ data[target_key] = None
+ else:
+ data[target_key] = _result[node]
+
def get_specific_pillar_for_nodes(self, pillar_path):
"""Function gets pillars on given path for all nodes
@@ -322,6 +369,20 @@
self.not_responded = [_n for _n in _r.keys() if not _r[_n]]
return _r
+ def execute_cmd_on_active_nodes(self, cmd):
+ # execute cmd
+ self.not_responded = []
+ _r = self.salt.cmd(
+ self.active_nodes_compound,
+ 'cmd.run',
+ param=cmd,
+ expr_form="compound"
+ )
+
+ # all false returns means that there is no response
+ self.not_responded = [_n for _n in _r.keys() if not _r[_n]]
+ return _r
+
def is_node_available(self, node, log=True):
if node in self.skip_list:
if log:
diff --git a/cfg_checker/reports/reporter.py b/cfg_checker/reports/reporter.py
index a624dd3..8059fab 100644
--- a/cfg_checker/reports/reporter.py
+++ b/cfg_checker/reports/reporter.py
@@ -4,6 +4,7 @@
from cfg_checker.common import const
from cfg_checker.common import logger_cli
+from cfg_checker.nodes import salt_master
import jinja2
@@ -13,6 +14,12 @@
pkg_dir = os.path.join(pkg_dir, os.pardir, os.pardir)
pkg_dir = os.path.normpath(pkg_dir)
+# % threshhold values
+_disk_warn = 80
+_disk_critical = 90
+_ram_warn = 5
+_ram_critical = 3
+
def line_breaks(text):
# replace python linebreaks with html breaks
@@ -41,7 +48,7 @@
return sorted(_list)[-1]
-def make_action_label(act):
+def make_pkg_action_label(act):
_act_labels = {
const.ACT_UPGRADE: "Upgrade possible",
const.ACT_NEED_UP: "Needs upgrade",
@@ -52,7 +59,7 @@
return _act_labels[act]
-def make_action_class(act):
+def make_pkg_action_class(act):
_act_classes = {
const.ACT_UPGRADE: "possible",
const.ACT_NEED_UP: "needs_up",
@@ -63,7 +70,7 @@
return _act_classes[act]
-def make_status_label(sts):
+def make_pkg_status_label(sts):
_status_labels = {
const.VERSION_OK: "OK",
const.VERSION_UP: "Upgraded",
@@ -74,8 +81,12 @@
return _status_labels[sts]
-def make_status_class(sts):
- return const.all_statuses[sts]
+def make_pkg_status_class(sts):
+ return const.all_pkg_statuses[sts]
+
+
+def make_node_status(sts):
+ return const.node_status[sts]
def make_repo_info(repos):
@@ -144,10 +155,11 @@
self.jinja2_env.filters['get_max'] = get_max
self.jinja2_env.filters['get_sorted_keys'] = get_sorted_keys
- self.jinja2_env.filters['make_status_label'] = make_status_label
- self.jinja2_env.filters['make_status_class'] = make_status_class
- self.jinja2_env.filters['make_action_label'] = make_action_label
- self.jinja2_env.filters['make_action_class'] = make_action_class
+ self.jinja2_env.filters['pkg_status_label'] = make_pkg_status_label
+ self.jinja2_env.filters['pkg_status_class'] = make_pkg_status_class
+ self.jinja2_env.filters['pkg_action_label'] = make_pkg_action_label
+ self.jinja2_env.filters['pkg_action_class'] = make_pkg_action_class
+ self.jinja2_env.filters['node_status_class'] = make_node_status
self.jinja2_env.filters['make_repo_info'] = make_repo_info
# render!
@@ -194,6 +206,130 @@
class HTMLNetworkReport(_TMPLBase):
tmpl = "network_check_tmpl.j2"
+ def _extend_data(self, data):
+ def get_bytes(value):
+ if value[-1] == 'G':
+ return int(float(value[:-1]) * 1024 * 1024 * 1024)
+ elif value[-1] == 'M':
+ return int(float(value[:-1]) * 1024 * 1024)
+ elif value[-1] == 'K':
+ return int(float(value[:-1]) * 1024)
+ else:
+ return int(value)
+
+ def _lscpu(field, key, _dict):
+ _f_cmd = salt_master.get_cmd_for_nodes
+ _cmd = "lscpu | grep -e \"^{}:\" | cut -d\":\" -f2 " \
+ "| sed -e 's/^[[:space:]]*//'"
+ _f_cmd(_cmd.format(field), key, target_dict=_dict)
+
+ def _free(field, key, _dict):
+ _f_cmd = salt_master.get_cmd_for_nodes
+ _cmd = "free -h | sed -n '/Mem/s/ \\+/ /gp' | cut -d\" \" -f {}"
+ _f_cmd(_cmd.format(field), key, target_dict=_dict)
+
+ def _services(_dict):
+ _key = "services"
+ _key_r = "services_raw"
+ _f_cmd = salt_master.get_cmd_for_nodes
+ _cmd = "service --status-all"
+ _f_cmd(_cmd, _key_r, target_dict=_dict)
+ for node, dt in _dict.iteritems():
+ dt[_key] = {}
+ lines = dt[_key_r].splitlines()
+ for line in lines:
+ li = line.split()
+ _status = li[1]
+ _name = li[3]
+ if _status == '-':
+ dt[_key][_name] = False
+ elif _status == '+':
+ dt[_key][_name] = True
+ else:
+ dt[_key][_name] = None
+ dt.pop(_key_r)
+
+ data["const"] = {
+ "ram_warn": _ram_warn,
+ "ram_critical": _ram_critical,
+ "disk_warn": _disk_warn,
+ "disk_critical": _disk_critical
+ }
+
+ # get kernel version
+ salt_master.get_cmd_for_nodes(
+ "uname -r",
+ "kernel",
+ target_dict=data["nodes"]
+ )
+ # cpu info
+ # Sample: VT-x, KVM, full
+ _lscpu("Virtualization", "virt_mode", data["nodes"])
+ _lscpu("Hypervisor vendor", "virt_vendor", data["nodes"])
+ _lscpu("Virtualization type", "virt_type", data["nodes"])
+ # sample: 4
+ _lscpu("CPU(s)", "cpus", data["nodes"])
+
+ # free ram
+ # sample: 16425392 14883144 220196
+ _free("2", "ram_total", data["nodes"])
+ _free("3", "ram_used", data["nodes"])
+ _free("4", "ram_free", data["nodes"])
+ _free("7", "ram_available", data["nodes"])
+ for _data in data["nodes"].itervalues():
+ _total = get_bytes(_data["ram_total"])
+ _avail = get_bytes(_data["ram_available"])
+ _m = _avail * 100.0 / _total
+ if _m < _ram_critical:
+ _data["ram_status"] = "fail"
+ elif _m < _ram_warn:
+ _data["ram_status"] = "warn"
+ else:
+ _data["ram_status"] = ""
+
+ # disk space
+ # sample: /dev/vda1 78G 33G 45G 43%
+ salt_master.get_cmd_for_nodes(
+ "df -h | sed -n '/^\\/dev/s/ \\+/ /gp' | cut -d\" \" -f 1-5",
+ "disk_raw",
+ target_dict=data["nodes"]
+ )
+ for _data in data["nodes"].itervalues():
+ _data["disk"] = {}
+ # show first device row by default
+ _data["disk_max_dev"] = None
+ _d = _data["disk"]
+ _r = _data["disk_raw"]
+ _r = _r.splitlines()
+ _max = -1
+ for idx in range(0, len(_r)):
+ _t = _r[idx].split()
+ _d[_t[0]] = {}
+ _d[_t[0]]['v'] = _t[1:]
+ _chk = int(_t[-1].split('%')[0])
+ if _chk > _max:
+ _data["disk_max_dev"] = _t[0]
+ _max = _chk
+ if _chk > _disk_critical:
+ _d[_t[0]]['f'] = "fail"
+ elif _chk > _disk_warn:
+ _d[_t[0]]['f'] = "warn"
+ else:
+ _d[_t[0]]['f'] = ""
+
+ # prepare networks data for report
+ for net, net_v in data['map'].iteritems():
+ for node, ifs in net_v.iteritems():
+ for d in ifs:
+ _err = "fail"
+ d['interface_error'] = _err if d['interface_error'] else ""
+ d['mtu_error'] = _err if d['mtu_error'] else ""
+ d['status_error'] = _err if d['status_error'] else ""
+ d['subnet_gateway_error'] = \
+ _err if d['subnet_gateway_error'] else ""
+
+ _services(data["nodes"])
+
class ReportToFile(object):
def __init__(self, report, target):