Refactor working with Networks and Pinger class
- Mapper moved to separate module
- Other modules can use Mapper to get desired networks
- salt_master is now a separate single instance
- Updated file handling on salt
- ping.py, an scripted flexible interface to ping command
multithreaded ping execution, 15 at once
- New commands in network: 'ping' and 'list'
- New error when runtime has no network listed in reclass
Fixes:
- Master node code handling
- Unknown node codes detection
- Proper node code search and handling
- File upload procedures updated
- Packages report fix
Change-Id: I5959210aed53b20b04b05ea880218e93239bb661
Related-PROD: PROD-28199
diff --git a/cfg_checker/clients/__init__.py b/cfg_checker/clients/__init__.py
index 88992f7..c827a38 100644
--- a/cfg_checker/clients/__init__.py
+++ b/cfg_checker/clients/__init__.py
@@ -20,10 +20,10 @@
logger.info("Creating salt remote instance")
# create it once
if salt is None:
- salt = SaltRemote(config)
+ salt = SaltRemote()
# do most expensive operation with no strict timeout possible
# all nodes that answer ping
- salt.nodes_active = salt.get_active_nodes()
+ # salt.nodes_active = salt.get_active_nodes()
# return once required
return salt
diff --git a/cfg_checker/common/const.py b/cfg_checker/common/const.py
index f1f69ae..1ae2bba 100644
--- a/cfg_checker/common/const.py
+++ b/cfg_checker/common/const.py
@@ -43,6 +43,8 @@
VERSION_NA: "no status"
}
+uknown_code = "unk"
+
all_roles_map = {
"apt": "repository",
"bmk": "validation",
@@ -60,5 +62,6 @@
"mtr": "stacklight_metering",
"osd": "storage_node",
"prx": "proxy",
- "rgw": "storage_rados"
+ "rgw": "storage_rados",
+ "unk": "uknown"
}
diff --git a/cfg_checker/common/other.py b/cfg_checker/common/other.py
index 1d34776..d9e434a 100644
--- a/cfg_checker/common/other.py
+++ b/cfg_checker/common/other.py
@@ -2,7 +2,7 @@
import re
import subprocess
-from cfg_checker.common.const import all_roles_map
+from cfg_checker.common.const import all_roles_map, uknown_code
from cfg_checker.common.exception import ConfigException
pkg_dir = os.path.dirname(__file__)
@@ -70,10 +70,27 @@
def get_node_code(self, fqdn):
# validate
_isvalid, _message = self.validate_name(fqdn, message=True)
- _code = re.findall("[a-zA-Z]+", fqdn.split('.')[0])
+ _code = re.findall("[a-zA-Z]+?(?=(?:[0-9]+$)|$)", fqdn.split('.')[0])
# check if it is valid and raise if not
if _isvalid:
- return _code[0]
+ # try to match it with ones in map
+ _c = _code[0]
+ match = any([r in _c for r in all_roles_map.keys()])
+ if match:
+ # no match, try to find it
+ match = False
+ for r in all_roles_map.keys():
+ _idx = _c.find(r)
+ if _idx > -1:
+ _c = _c[_idx:]
+ match = True
+ break
+ if match:
+ return _c
+ else:
+ return uknown_code
+ else:
+ return uknown_code
else:
raise ConfigException(_message)
diff --git a/cfg_checker/common/salt_utils.py b/cfg_checker/common/salt_utils.py
index 2927e28..4dcbd30 100644
--- a/cfg_checker/common/salt_utils.py
+++ b/cfg_checker/common/salt_utils.py
@@ -189,6 +189,8 @@
class SaltRemote(SaltRest):
+ master_node = ""
+
def __init__(self):
super(SaltRemote, self).__init__()
@@ -362,7 +364,7 @@
"makedirs": makedirs
}
salt_output = self.cmd(
- "cfg01*",
+ self.master_node,
"file.touch",
param=path,
kwarg=_kwarg
@@ -376,7 +378,7 @@
_args = [path]
_args.extend(strings_list)
salt_output = self.cmd(
- "cfg01*",
+ self.master_node,
"file.write",
param=_args,
kwarg=_kwarg
@@ -428,7 +430,7 @@
"makedirs": makedirs
}
salt_output = self.cmd(
- "cfg01*",
+ self.master_node,
"file.manage_file",
param=_arg,
kwarg=_kwarg
diff --git a/cfg_checker/helpers/args_utils.py b/cfg_checker/helpers/args_utils.py
index 1661c96..726c20e 100644
--- a/cfg_checker/helpers/args_utils.py
+++ b/cfg_checker/helpers/args_utils.py
@@ -19,7 +19,7 @@
_c = args.command if hasattr(args, 'command') else ''
_t = args.type if hasattr(args, 'type') else ''
raise ConfigException(
- "Argument '{}' not found executing: mcp_check {} {}".format(
+ "Argument '{}' not found executing: mcp-check {} {}".format(
str_arg,
_c,
_t
diff --git a/cfg_checker/modules/network/__init__.py b/cfg_checker/modules/network/__init__.py
index 075dccf..c81b63a 100644
--- a/cfg_checker/modules/network/__init__.py
+++ b/cfg_checker/modules/network/__init__.py
@@ -1,7 +1,7 @@
from cfg_checker.common import logger_cli
from cfg_checker.helpers import args_utils
+from cfg_checker.modules.network import checker, mapper, pinger
-import checker
command_help = "Network infrastructure checks and reports"
@@ -32,20 +32,47 @@
help="HTML filename to save report"
)
+ net_ping_parser = net_subparsers.add_parser(
+ 'ping',
+ help="Ping all nodes with each other using network CIDR"
+ )
+
+ net_ping_parser.add_argument(
+ '--cidr',
+ metavar='network_ping_cidr',
+ help="Subnet CIDR to ping nodes in"
+ )
+ net_ping_parser.add_argument(
+ '--mtu',
+ metavar='network_ping_mtu',
+ help="MTU size to use. Ping will be done for MTU - 20 - 8"
+ )
+ net_ping_parser.add_argument(
+ '--detailed',
+ action="store_true", default=False,
+ help="Print detailed report at the end"
+ )
+
+ net_subparsers.add_parser(
+ 'map',
+ help="Print network map"
+ )
+
+ net_subparsers.add_parser(
+ 'list',
+ help="List networks in reclass"
+ )
+
return _parser
-def _prepare_check():
- _checker_class = checker.NetworkChecker()
- _checker_class.collect_reclass_networks()
- _checker_class.collect_network_info()
- return _checker_class
-
-
def _prepare_map():
- _map_class = None
+ _mapper = mapper.NetworkMapper()
+ _mapper.map_network(_mapper.RECLASS)
+ _mapper.map_network(_mapper.RUNTIME)
+ _mapper.map_network(_mapper.CONFIG)
- return _map_class
+ return _mapper
def do_check(args):
@@ -53,8 +80,8 @@
# should not print map, etc...
# Just bare summary and errors
logger_cli.info("# Network check to console")
- netChecker = _prepare_check()
- netChecker.print_network_report()
+ netChecker = checker.NetworkChecker()
+ netChecker.check_networks()
# save what was collected
netChecker.errors.save_iteration_data()
@@ -76,7 +103,7 @@
_filename = args_utils.get_arg(args, 'html')
- netChecker = _prepare_check()
+ netChecker = checker.NetworkChecker()
netChecker.create_html_report(_filename)
return
@@ -88,9 +115,21 @@
logger_cli.info("# Network report")
_type, _filename = args_utils.get_network_map_type_and_filename(args)
- # networkMap = _prepare_map
+ networkMap = _prepare_map()
- # TODO: Create map class to generate network map
+ networkMap.print_map()
+
+ return
+
+
+def do_list(args):
+ # Network Map
+ # Should generate network map to console or HTML
+ _map = mapper.NetworkMapper()
+ reclass = _map.map_network(_map.RECLASS)
+ _s = [str(_n) for _n in reclass.keys()]
+ logger_cli.info("# Reclass networks list")
+ logger_cli.info("\n".join(_s))
return
@@ -99,8 +138,13 @@
# Network pinger
# Checks if selected nodes are pingable
# with a desireble parameters: MTU, Frame, etc
+ if not args.cidr:
+ logger_cli.error("\n# Use mcp-check network list to get list of CIDRs")
+ _cidr = args_utils.get_arg(args, "cidr")
+ _pinger = pinger.NetworkPinger(mtu=args.mtu, detailed=args.detailed)
# TODO: Simple ping based on parameters
+ _pinger.ping_nodes(_cidr)
return
@@ -108,6 +152,7 @@
def do_trace(args):
# Network packet tracer
# Check if packet is delivered to proper network host
+ logger_cli.info("# Packet Tracer not yet implemented")
# TODO: Packet tracer
diff --git a/cfg_checker/modules/network/checker.py b/cfg_checker/modules/network/checker.py
index 7e6a239..89db6ba 100644
--- a/cfg_checker/modules/network/checker.py
+++ b/cfg_checker/modules/network/checker.py
@@ -1,382 +1,20 @@
-import ipaddress
-import json
-
-
from cfg_checker.common import logger_cli
+from cfg_checker.modules.network.mapper import NetworkMapper
from cfg_checker.modules.network.network_errors import NetworkErrors
-from cfg_checker.nodes import SaltNodes
from cfg_checker.reports import reporter
-class NetworkChecker(SaltNodes):
+class NetworkChecker(object):
def __init__(self):
- logger_cli.info("# Gathering environment information")
- super(NetworkChecker, self).__init__()
- logger_cli.info("# Initializing error logs folder")
+ logger_cli.debug("... init error logs folder")
self.errors = NetworkErrors()
+ self.mapper = NetworkMapper(self.errors)
- # adding net data to tree
- def _add_data(self, _list, _n, _h, _d):
- if _n not in _list:
- _list[_n] = {}
- _list[_n][_h] = [_d]
- elif _h not in _list[_n]:
- # there is no such host, just create it
- _list[_n][_h] = [_d]
- else:
- # there is such host... this is an error
- self.errors.add_error(
- self.errors.NET_DUPLICATE_IF,
- host=_h,
- dup_if=_d['name']
- )
- _list[_n][_h].append(_d)
+ def check_networks(self):
+ self.mapper.map_network(self.mapper.RECLASS)
+ self.mapper.map_network(self.mapper.RUNTIME)
- # TODO: refactor map creation. Build one map instead of two separate
- def _map_network_for_host(self, host, if_class, net_list, data):
- # filter networks for this IF IP
- _nets = [n for n in net_list.keys() if if_class.ip in n]
- _masks = [n.netmask for n in _nets]
- if len(_nets) > 1:
- # There a multiple network found for this IP, Error
- self.errors.add_error(
- self.errors.NET_SUBNET_INTERSECT,
- host=host,
- networks="; ".join(_nets)
- )
- # check mask match
- if len(_nets) > 0 and if_class.netmask not in _masks:
- self.errors.add_error(
- self.errors.NET_MASK_MISMATCH,
- host=host,
- if_name=data['name'],
- if_cidr=if_class.exploded,
- if_mapped_networks=", ".join([str(_n) for _n in _nets])
- )
-
- if len(_nets) < 1:
- self._add_data(net_list, if_class.network, host, data)
- else:
- # add all data
- for net in _nets:
- self._add_data(net_list, net, host, data)
-
- return net_list
-
- def collect_network_info(self):
- """
- Collects info on the network using ifs_data.py script
-
- :return: none
- """
- logger_cli.info("# Mapping node runtime network data")
- _result = self.execute_script_on_active_nodes(
- "ifs_data.py",
- args=["json"]
- )
- self.stage = "Runtime"
- for key in self.nodes.keys():
- # check if we are to work with this node
- if not self.is_node_available(key):
- continue
- # due to much data to be passed from salt, it is happening in order
- if key in _result:
- _text = _result[key]
- _dict = json.loads(_text[_text.find('{'):])
- self.nodes[key]['routes'] = _dict.pop("routes")
- self.nodes[key]['networks'] = _dict
- else:
- self.nodes[key]['networks'] = {}
- self.nodes[key]['routes'] = {}
- logger_cli.debug("... {} has {} networks".format(
- key,
- len(self.nodes[key]['networks'].keys())
- ))
- logger_cli.info("-> done collecting networks data")
-
- # TODO: Mimic reclass structure for easy compare
- logger_cli.info("### Building network tree")
- # match interfaces by IP subnets
- _all_nets = {}
- for host, node_data in self.nodes.iteritems():
- if not self.is_node_available(host):
- continue
-
- for net_name, net_data in node_data['networks'].iteritems():
- # get ips and calculate subnets
- if net_name in ['lo']:
- # skip the localhost
- continue
- # get data and make sure that wide mask goes first
- _ip4s = sorted(
- net_data['ipv4'],
- key=lambda s: s[s.index('/'):]
- )
- for _ip_str in _ip4s:
- # create interface class
- _if = ipaddress.IPv4Interface(_ip_str)
- # check if this is a VIP
- # ...all those will have /32 mask
- net_data['vip'] = None
- if _if.network.prefixlen == 32:
- net_data['vip'] = str(_if.exploded)
- if 'name' not in net_data:
- net_data['name'] = net_name
- if 'ifs' not in net_data:
- net_data['ifs'] = [_if]
- # map it
- _all_nets = self._map_network_for_host(
- host,
- _if,
- _all_nets,
- net_data
- )
- else:
- # data is already there, just add VIP
- net_data['ifs'].append(_if)
-
- # save collected info
- self.all_nets = _all_nets
-
- def collect_reclass_networks(self):
- logger_cli.info("# Mapping reclass networks")
- self.stage = "Reclass"
- # Get networks from reclass and mark them
- _reclass_nets = {}
- # Get required pillars
- self.get_specific_pillar_for_nodes("linux:network")
- for node in self.nodes.keys():
- # check if this node
- if not self.is_node_available(node):
- continue
- # get the reclass value
- _pillar = self.nodes[node]['pillars']['linux']['network']
- # we should be ready if there is no interface in reclass for a node
- # for example on APT node
- if 'interface' in _pillar:
- _pillar = _pillar['interface']
- else:
- logger_cli.info(
- "... node '{}' skipped, no IF section in reclass".format(
- node
- )
- )
- continue
- for _if_name, _if_data in _pillar.iteritems():
- if 'address' in _if_data:
- _if = ipaddress.IPv4Interface(
- _if_data['address'] + '/' + _if_data['netmask']
- )
- _if_data['name'] = _if_name
- _if_data['ifs'] = [_if]
-
- _reclass_nets = self._map_network_for_host(
- node,
- _if,
- _reclass_nets,
- _if_data
- )
-
- self.reclass_nets = _reclass_nets
-
- def print_network_report(self):
- """
- Create text report for CLI
-
- :return: none
- """
- _all_nets = self.all_nets.keys()
- logger_cli.info("# Reclass networks")
- logger_cli.info(
- " {0:17} {1:25}: "
- "{2:19} {3:5}{4:10} {5}{6} {7} / {8} / {9}".format(
- "Hostname",
- "IF",
- "IP",
- "rtMTU",
- "rcMTU",
- "rtState",
- "rcState",
- "rtGate",
- "rtDef.Gate",
- "rcGate"
- )
- )
- # TODO: Move matching to separate function
- self.stage = "Matching"
- _reclass = [n for n in _all_nets if n in self.reclass_nets]
- for network in _reclass:
- # shortcuts
- _net = str(network)
- logger_cli.info("-> {}".format(_net))
- names = sorted(self.all_nets[network].keys())
- for hostname in names:
- if not self.is_node_available(hostname, log=False):
- logger_cli.info(
- " {0:17} {1}".format(
- hostname.split('.')[0],
- "... no data for the node"
- )
- )
- # add non-responsive node erorr
- self.errors.add_error(
- self.errors.NET_NODE_NON_RESPONSIVE,
- host=hostname
- )
-
- # print empty row
- _text = " # node non-responsive"
- logger_cli.info(
- " {0:17} {1}".format(
- hostname.split('.')[0],
- _text
- )
- )
- continue
-
- # get the gateway for current net
- _routes = self.nodes[hostname]['routes']
- _route = _routes[_net] if _net in _routes else None
- if not _route:
- _gate = "no route!"
- else:
- _gate = _route['gateway'] if _route['gateway'] else "-"
-
- # get the default gateway
- if 'default' in _routes:
- _d_gate = ipaddress.IPv4Address(
- _routes['default']['gateway']
- )
- else:
- _d_gate = None
- _d_gate_str = _d_gate if _d_gate else "No default gateway!"
-
- _a = self.all_nets[network][hostname]
- for _host in _a:
- for _if in _host['ifs']:
- # get proper reclass
- _ip_str = str(_if.exploded)
- _r = {}
- if hostname in self.reclass_nets[network]:
- for _item in self.reclass_nets[network][hostname]:
- for _item_ifs in _item['ifs']:
- if _ip_str == str(_item_ifs.exploded):
- _r = _item
- else:
- self.errors.add_error(
- self.errors.NET_NODE_UNEXPECTED_IF,
- host=hostname,
- if_name=_host['name'],
- if_ip=_ip_str
- )
-
- # check if node is UP
- if not self.is_node_available(hostname):
- _r_gate = "-"
- # get proper network from reclass
- else:
- # Lookup match for the ip
- _r_gate = "no IF in reclass!"
- # get all networks with this hostname
- _rn = self.reclass_nets
- _nets = filter(
- lambda n: hostname in _rn[n].keys(),
- self.reclass_nets
- )
- _rd = None
- for _item in _nets:
- # match ip
- _r_dat = self.reclass_nets[_item][hostname]
- for _r_ifs in _r_dat:
- for _r_if in _r_ifs['ifs']:
- if _if.ip == _r_if.ip:
- _rd = _r_ifs
- break
- if _rd:
- _gs = 'gateway'
- _e = "empty"
- _r_gate = _rd[_gs] if _gs in _rd else _e
- break
-
- # IF status in reclass
- if 'enabled' not in _r:
- _enabled = "(no record!)"
- else:
- _e = "enabled"
- _d = "disabled"
- _enabled = "("+_e+")" if _r[_e] else "("+_d+")"
-
- _name = _host['name']
- _rc_mtu = _r['mtu'] if 'mtu' in _r else None
- _rc_mtu_s = str(_rc_mtu) if _rc_mtu else '(-)'
- # check if this is a VIP address
- # no checks needed if yes.
- if _host['vip'] != _ip_str:
- if _rc_mtu:
- # if there is an MTU value, match it
- if _host['mtu'] != _rc_mtu_s:
- self.errors.add_error(
- self.errors.NET_MTU_MISMATCH,
- host=hostname,
- if_name=_name,
- if_cidr=_ip_str,
- reclass_mtu=_rc_mtu,
- runtime_mtu=_host['mtu']
- )
- elif _host['mtu'] != '1500':
- # there is no MTU value in reclass
- # and runtime value is not default
- self.errors.add_error(
- self.errors.NET_MTU_EMPTY,
- host=hostname,
- if_name=_name,
- if_cidr=_ip_str,
- if_mtu=_host['mtu']
- )
- else:
- # this is a VIP
- _name = " "*20
- _ip_str += " VIP"
- _enabled = "(-)"
- _r_gate = "-"
-
- _text = "{0:25} {1:19} {2:5}{3:10} {4:4}{5:10} " \
- "{6} / {7} / {8}".format(
- _name,
- _ip_str,
- _host['mtu'],
- _rc_mtu_s,
- _host['state'],
- _enabled,
- _gate,
- _d_gate_str,
- _r_gate
- )
- logger_cli.info(
- " {0:17} {1}".format(
- hostname.split('.')[0],
- _text
- )
- )
-
- logger_cli.info("\n# Other networks")
- _other = [n for n in _all_nets if n not in self.reclass_nets]
- for network in _other:
- logger_cli.info("-> {}".format(str(network)))
- names = sorted(self.all_nets[network].keys())
-
- for hostname in names:
- for _n in self.all_nets[network][hostname]:
- _ifs = [str(ifs.ip) for ifs in _n['ifs']]
- _text = "{0:25}: {1:19} {2:5} {3:4}".format(
- _n['name'],
- ", ".join(_ifs),
- _n['mtu'],
- _n['state']
- )
- logger_cli.info(
- " {0:17} {1}".format(hostname.split('.')[0], _text)
- )
+ self.mapper.print_map()
def print_summary(self):
logger_cli.info(self.errors.get_summary(print_zeros=False))
diff --git a/cfg_checker/modules/network/mapper.py b/cfg_checker/modules/network/mapper.py
new file mode 100644
index 0000000..c44775f
--- /dev/null
+++ b/cfg_checker/modules/network/mapper.py
@@ -0,0 +1,430 @@
+import ipaddress
+import json
+
+from cfg_checker.common import logger_cli
+from cfg_checker.common.exception import InvalidReturnException
+from cfg_checker.modules.network.network_errors import NetworkErrors
+from cfg_checker.nodes import salt_master
+
+# TODO: use templated approach
+# net interface structure should be the same
+_if_item = {
+ "name": "unnamed interface",
+ "mac": "",
+ "routes": {},
+ "ip": [],
+ "parameters": {}
+}
+
+# collection of configurations
+_network_item = {
+ "runtime": {},
+ "config": {},
+ "reclass": {}
+}
+
+
+class NetworkMapper(object):
+ RECLASS = "reclass"
+ CONFIG = "config"
+ RUNTIME = "runtime"
+
+ def __init__(self, errors_class=None):
+ logger_cli.info("# Initializing mapper")
+ self.networks = {}
+ self.nodes = salt_master.get_nodes()
+ if errors_class:
+ self.errors = errors_class
+ else:
+ logger_cli.debug("... init error logs folder")
+ self.errors = NetworkErrors()
+
+ # adding net data to tree
+ def _add_data(self, _list, _n, _h, _d):
+ if _n not in _list:
+ _list[_n] = {}
+ _list[_n][_h] = [_d]
+ elif _h not in _list[_n]:
+ # there is no such host, just create it
+ _list[_n][_h] = [_d]
+ else:
+ # there is such host... this is an error
+ self.errors.add_error(
+ self.errors.NET_DUPLICATE_IF,
+ host=_h,
+ dup_if=_d['name']
+ )
+ _list[_n][_h].append(_d)
+
+ # TODO: refactor map creation. Build one map instead of two separate
+ def _map_network_for_host(self, host, if_class, net_list, data):
+ # filter networks for this IF IP
+ _nets = [n for n in net_list.keys() if if_class.ip in n]
+ _masks = [n.netmask for n in _nets]
+ if len(_nets) > 1:
+ # There a multiple network found for this IP, Error
+ self.errors.add_error(
+ self.errors.NET_SUBNET_INTERSECT,
+ host=host,
+ ip=str(if_class.exploded),
+ networks="; ".join([str(_n) for _n in _nets])
+ )
+ # check mask match
+ if len(_nets) > 0 and if_class.netmask not in _masks:
+ self.errors.add_error(
+ self.errors.NET_MASK_MISMATCH,
+ host=host,
+ if_name=data['name'],
+ if_cidr=if_class.exploded,
+ if_mapped_networks=", ".join([str(_n) for _n in _nets])
+ )
+
+ if len(_nets) < 1:
+ self._add_data(net_list, if_class.network, host, data)
+ else:
+ # add all data
+ for net in _nets:
+ self._add_data(net_list, net, host, data)
+
+ return net_list
+
+ def _map_reclass_networks(self):
+ # class uses nodes from self.nodes dict
+ _reclass = {}
+ # Get required pillars
+ salt_master.get_specific_pillar_for_nodes("linux:network")
+ for node in salt_master.nodes.keys():
+ # check if this node
+ if not salt_master.is_node_available(node):
+ continue
+ # get the reclass value
+ _pillar = salt_master.nodes[node]['pillars']['linux']['network']
+ # we should be ready if there is no interface in reclass for a node
+ # for example on APT nohde
+ if 'interface' in _pillar:
+ _pillar = _pillar['interface']
+ else:
+ logger_cli.info(
+ "... node '{}' skipped, no IF section in reclass".format(
+ node
+ )
+ )
+ continue
+ for _if_name, _if_data in _pillar.iteritems():
+ if 'address' in _if_data:
+ _if = ipaddress.IPv4Interface(
+ _if_data['address'] + '/' + _if_data['netmask']
+ )
+ _if_data['name'] = _if_name
+ _if_data['ifs'] = [_if]
+
+ _reclass = self._map_network_for_host(
+ node,
+ _if,
+ _reclass,
+ _if_data
+ )
+
+ return _reclass
+
+ def _map_configured_networks(self):
+ # class uses nodes from self.nodes dict
+ _confs = {}
+
+ return _confs
+
+ def _map_runtime_networks(self):
+ # class uses nodes from self.nodes dict
+ _runtime = {}
+ logger_cli.info("# Mapping node runtime network data")
+ salt_master.prepare_script_on_active_nodes("ifs_data.py")
+ _result = salt_master.execute_script_on_active_nodes(
+ "ifs_data.py",
+ args=["json"]
+ )
+ for key in salt_master.nodes.keys():
+ # check if we are to work with this node
+ if not salt_master.is_node_available(key):
+ continue
+ # due to much data to be passed from salt_master,
+ # it is happening in order
+ if key in _result:
+ _text = _result[key]
+ if '{' in _text and '}' in _text:
+ _text = _text[_text.find('{'):]
+ else:
+ raise InvalidReturnException(
+ "Non-json object returned: '{}'".format(
+ _text
+ )
+ )
+ _dict = json.loads(_text[_text.find('{'):])
+ salt_master.nodes[key]['routes'] = _dict.pop("routes")
+ salt_master.nodes[key]['networks'] = _dict
+ else:
+ salt_master.nodes[key]['networks'] = {}
+ salt_master.nodes[key]['routes'] = {}
+ logger_cli.debug("... {} has {} networks".format(
+ key,
+ len(salt_master.nodes[key]['networks'].keys())
+ ))
+ logger_cli.info("-> done collecting networks data")
+
+ logger_cli.info("-> mapping IPs")
+ # match interfaces by IP subnets
+ for host, node_data in salt_master.nodes.iteritems():
+ if not salt_master.is_node_available(host):
+ continue
+
+ for net_name, net_data in node_data['networks'].iteritems():
+ # get ips and calculate subnets
+ if net_name in ['lo']:
+ # skip the localhost
+ continue
+ # get data and make sure that wide mask goes first
+ _ip4s = sorted(
+ net_data['ipv4'],
+ key=lambda s: s[s.index('/'):]
+ )
+ for _ip_str in _ip4s:
+ # create interface class
+ _if = ipaddress.IPv4Interface(_ip_str)
+ # check if this is a VIP
+ # ...all those will have /32 mask
+ net_data['vip'] = None
+ if _if.network.prefixlen == 32:
+ net_data['vip'] = str(_if.exploded)
+ if 'name' not in net_data:
+ net_data['name'] = net_name
+ if 'ifs' not in net_data:
+ net_data['ifs'] = [_if]
+ # map it
+ _runtime = self._map_network_for_host(
+ host,
+ _if,
+ _runtime,
+ net_data
+ )
+ else:
+ # data is already there, just add VIP
+ net_data['ifs'].append(_if)
+
+ return _runtime
+
+ def map_network(self, source):
+ # maps target network using given source
+ _networks = None
+
+ if source == self.RECLASS:
+ _networks = self._map_reclass_networks()
+ elif source == self.CONFIG:
+ _networks = self._map_configured_networks()
+ elif source == self.RUNTIME:
+ _networks = self._map_runtime_networks()
+
+ self.networks[source] = _networks
+ return _networks
+
+ def print_map(self):
+ """
+ Create text report for CLI
+
+ :return: none
+ """
+ _runtime = self.networks[self.RUNTIME]
+ _reclass = self.networks[self.RECLASS]
+ logger_cli.info("# Reclass networks")
+ logger_cli.info(
+ " {0:17} {1:25}: "
+ "{2:19} {3:5}{4:10} {5}{6} {7} / {8} / {9}".format(
+ "Hostname",
+ "IF",
+ "IP",
+ "rtMTU",
+ "rcMTU",
+ "rtState",
+ "rcState",
+ "rtGate",
+ "rtDef.Gate",
+ "rcGate"
+ )
+ )
+ for network in _reclass:
+ # shortcuts
+ _net = str(network)
+ logger_cli.info("-> {}".format(_net))
+ if network not in _runtime:
+ # reclass has network that not found in runtime
+ self.errors.add_error(
+ self.errors.NET_NO_RUNTIME_NETWORK,
+ reclass_net=str(network)
+ )
+ logger_cli.info(" {:-^50}".format(" No runtime network "))
+ continue
+ names = sorted(_runtime[network].keys())
+ for hostname in names:
+ if not salt_master.is_node_available(hostname, log=False):
+ logger_cli.info(
+ " {0:17} {1}".format(
+ hostname.split('.')[0],
+ "... no data for the node"
+ )
+ )
+ # add non-responsive node erorr
+ self.errors.add_error(
+ self.errors.NET_NODE_NON_RESPONSIVE,
+ host=hostname
+ )
+
+ # print empty row
+ _text = " # node non-responsive"
+ logger_cli.info(
+ " {0:17} {1}".format(
+ hostname.split('.')[0],
+ _text
+ )
+ )
+ continue
+
+ # get the gateway for current net
+ _routes = salt_master.nodes[hostname]['routes']
+ _route = _routes[_net] if _net in _routes else None
+ if not _route:
+ _gate = "no route!"
+ else:
+ _gate = _route['gateway'] if _route['gateway'] else "-"
+
+ # get the default gateway
+ if 'default' in _routes:
+ _d_gate = ipaddress.IPv4Address(
+ _routes['default']['gateway']
+ )
+ else:
+ _d_gate = None
+ _d_gate_str = _d_gate if _d_gate else "No default gateway!"
+
+ _a = _runtime[network][hostname]
+ for _host in _a:
+ for _if in _host['ifs']:
+ # get proper reclass
+ _ip_str = str(_if.exploded)
+ _r = {}
+ if hostname in _reclass[network]:
+ for _item in _reclass[network][hostname]:
+ for _item_ifs in _item['ifs']:
+ if _ip_str == str(_item_ifs.exploded):
+ _r = _item
+ else:
+ self.errors.add_error(
+ self.errors.NET_NODE_UNEXPECTED_IF,
+ host=hostname,
+ if_name=_host['name'],
+ if_ip=_ip_str
+ )
+
+ # check if node is UP
+ if not salt_master.is_node_available(hostname):
+ _r_gate = "-"
+ # get proper network from reclass
+ else:
+ # Lookup match for the ip
+ _r_gate = "no IF in reclass!"
+ # get all networks with this hostname
+ _nets = filter(
+ lambda n: hostname in _reclass[n].keys(),
+ _reclass
+ )
+ _rd = None
+ for _item in _nets:
+ # match ip
+ _r_dat = _reclass[_item][hostname]
+ for _r_ifs in _r_dat:
+ for _r_if in _r_ifs['ifs']:
+ if _if.ip == _r_if.ip:
+ _rd = _r_ifs
+ break
+ if _rd:
+ _gs = 'gateway'
+ _e = "empty"
+ _r_gate = _rd[_gs] if _gs in _rd else _e
+ break
+
+ # IF status in reclass
+ if 'enabled' not in _r:
+ _enabled = "(no record!)"
+ else:
+ _e = "enabled"
+ _d = "disabled"
+ _enabled = "("+_e+")" if _r[_e] else "("+_d+")"
+
+ _name = _host['name']
+ _rc_mtu = _r['mtu'] if 'mtu' in _r else None
+ _rc_mtu_s = str(_rc_mtu) if _rc_mtu else '(-)'
+ # check if this is a VIP address
+ # no checks needed if yes.
+ if _host['vip'] != _ip_str:
+ if _rc_mtu:
+ # if there is an MTU value, match it
+ if _host['mtu'] != _rc_mtu_s:
+ self.errors.add_error(
+ self.errors.NET_MTU_MISMATCH,
+ host=hostname,
+ if_name=_name,
+ if_cidr=_ip_str,
+ reclass_mtu=_rc_mtu,
+ runtime_mtu=_host['mtu']
+ )
+ elif _host['mtu'] != '1500':
+ # there is no MTU value in reclass
+ # and runtime value is not default
+ self.errors.add_error(
+ self.errors.NET_MTU_EMPTY,
+ host=hostname,
+ if_name=_name,
+ if_cidr=_ip_str,
+ if_mtu=_host['mtu']
+ )
+ else:
+ # this is a VIP
+ _name = " "*20
+ _ip_str += " VIP"
+ _enabled = "(-)"
+ _r_gate = "-"
+
+ _text = "{0:25} {1:19} {2:5}{3:10} {4:4}{5:10} " \
+ "{6} / {7} / {8}".format(
+ _name,
+ _ip_str,
+ _host['mtu'],
+ _rc_mtu_s,
+ _host['state'],
+ _enabled,
+ _gate,
+ _d_gate_str,
+ _r_gate
+ )
+ logger_cli.info(
+ " {0:17} {1}".format(
+ hostname.split('.')[0],
+ _text
+ )
+ )
+
+ logger_cli.info("\n# Other networks")
+ _other = [n for n in _runtime if n not in _reclass]
+ for network in _other:
+ logger_cli.info("-> {}".format(str(network)))
+ names = sorted(_runtime[network].keys())
+
+ for hostname in names:
+ for _n in _runtime[network][hostname]:
+ _ifs = [str(ifs.ip) for ifs in _n['ifs']]
+ _text = "{0:25}: {1:19} {2:5} {3:4}".format(
+ _n['name'],
+ ", ".join(_ifs),
+ _n['mtu'],
+ _n['state']
+ )
+ logger_cli.info(
+ " {0:17} {1}".format(hostname.split('.')[0], _text)
+ )
diff --git a/cfg_checker/modules/network/network_errors.py b/cfg_checker/modules/network/network_errors.py
index cb9dfef..6086be0 100644
--- a/cfg_checker/modules/network/network_errors.py
+++ b/cfg_checker/modules/network/network_errors.py
@@ -15,6 +15,7 @@
NET_MASK_MISMATCH = next(_c)
NET_NODE_NON_RESPONSIVE = next(_c)
NET_NODE_UNEXPECTED_IF = next(_c)
+ NET_NO_RUNTIME_NETWORK = next(_c)
def __init__(self):
super(NetworkErrors, self).__init__("NET")
@@ -47,6 +48,10 @@
self.NET_NODE_UNEXPECTED_IF,
"Node has unexpected IF with mapped IP"
)
+ self.add_error_type(
+ self.NET_NO_RUNTIME_NETWORK,
+ "Reclass network not found in Runtime"
+ )
del _c
diff --git a/cfg_checker/modules/network/pinger.py b/cfg_checker/modules/network/pinger.py
new file mode 100644
index 0000000..8eea315
--- /dev/null
+++ b/cfg_checker/modules/network/pinger.py
@@ -0,0 +1,180 @@
+import ipaddress
+import json
+
+from cfg_checker.common import logger, logger_cli
+from cfg_checker.helpers.console_utils import Progress
+from cfg_checker.modules.network.mapper import NetworkMapper
+from cfg_checker.nodes import salt_master
+
+
+# ping -s 9072 -M do -n -c 1 -w 1 -W 1 ctl01
+
+
+# This is independent class with a salt.nodes input
+class NetworkPinger(object):
+ def __init__(self, mtu=None, detailed=False):
+ logger_cli.info("# Initializing")
+ # all active nodes in the cloud
+ self.target_nodes = salt_master.get_nodes()
+ # default MTU value
+ self.target_mtu = mtu if mtu else 64
+ # only data
+ self.packet_size = int(self.target_mtu) - 20 - 8
+ self.detailed_summary = detailed
+
+ def _collect_node_addresses(self, target_net):
+ # use reclass model and standard methods
+ # to create list of nodes with target network
+ _mapper = NetworkMapper()
+ _reclass = _mapper.map_network(_mapper.RECLASS)
+ if target_net in _reclass:
+ return _reclass[target_net]
+ else:
+ logger_cli.info(
+ "# Target network of {} not found in reclass".format(
+ target_net.exploded
+ )
+ )
+ return None
+
+ def ping_nodes(self, network_cidr_str):
+ # Conduct actual ping using network CIDR
+ logger_cli.info("# Collecting node pairs")
+ _fake_if = ipaddress.IPv4Interface(unicode(network_cidr_str))
+ _net = _fake_if.network
+ # collect nodes and ips from reclass
+ nodes = self._collect_node_addresses(_net)
+ # build list of packets to be sent
+ # source -> target
+ _count = 0
+ _packets = {}
+ _nodes = sorted(nodes.keys())
+ _nodes_total = len(_nodes)
+ logger_cli.info("-> {} nodes found within subnet of '{}'".format(
+ _nodes_total,
+ network_cidr_str
+ ))
+ while len(_nodes) > 0:
+ src_host = _nodes.pop()
+ src_data = nodes[src_host]
+ src_if_name = src_data[0]['name']
+ src_ips = [str(_if.ip) for _if in src_data[0]['ifs']]
+ _packets[src_host] = {
+ "ip": src_ips[0],
+ "if_name": src_if_name,
+ "targets": {}
+ }
+
+ for tgt_host, tgt_data in nodes.iteritems():
+ for tgt_if in tgt_data:
+ tgt_if_name = tgt_if['name']
+ _ip_index = 0
+ for tgt_ip in tgt_if['ifs']:
+ _ip = str(tgt_ip.ip)
+ if _ip not in src_ips:
+ _packets[src_host]["targets"][tgt_host] = []
+ _tgt = {
+ "ip": _ip,
+ "tgt_host": tgt_host,
+ "ip_index": _ip_index,
+ "if_name": tgt_if_name,
+ "mtu": self.target_mtu,
+ "size": self.packet_size
+ }
+ _packets[src_host]["targets"][tgt_host].append(
+ _tgt
+ )
+ _count += 1
+ _ip_index += 1
+ else:
+ pass
+ logger_cli.info("-> {} packets to send".format(_count))
+
+ # do ping of packets
+ logger_cli.info("# Pinging nodes: MTU={}".format(self.target_mtu))
+ salt_master.prepare_script_on_active_nodes("ping.py")
+ _errors = []
+ _success = []
+ _progress = Progress(_count)
+ _progress_index = 0
+ _node_index = 0
+ for src, src_data in _packets.iteritems():
+ _targets = src_data["targets"]
+ _node_index += 1
+ # create 'targets.json' on source host
+ _path = salt_master.prepare_json_on_node(
+ src,
+ _targets,
+ "targets.json"
+ )
+ # execute ping.py
+ _results = salt_master.execute_script_on_node(
+ src,
+ "ping.py",
+ args=[_path]
+ )
+ _progress_index += len(_targets)
+ # print progress
+ _progress.write_progress(
+ _progress_index,
+ note='/ {}/{} nodes / current {}'.format(
+ _node_index,
+ _nodes_total,
+ src
+ )
+ )
+ # Parse salt output
+ _result = _results[src]
+ _result = json.loads(_result)
+ # Handle return codes
+ for tgt_node, _tgt_ips in _result.iteritems():
+ for _params in _tgt_ips:
+ _body = "{}({}) --{}--> {}({}@{})\n".format(
+ src,
+ src_data["if_name"],
+ _params["returncode"],
+ tgt_node,
+ _params["if_name"],
+ _params["ip"]
+ )
+ _stdout = ""
+ _stderr = ""
+ if len(_params["stdout"]) > 0:
+ _stdout = "stdout:\n{}\n".format(_params["stdout"])
+ if len(_params["stderr"]) > 0:
+ _stderr = "stderr:\n{}\n".format(_params["stderr"])
+
+ if _params["returncode"]:
+ _errors.append("FAIL: {}{}{}".format(
+ _body,
+ _stdout,
+ _stderr
+ ))
+ else:
+ _success.append("PASS: {}{}{}".format(
+ _body,
+ _stdout,
+ _stderr
+ ))
+
+ # Parse results back in place
+ src_data["targets"] = _result
+
+ _progress.newline()
+
+ if self.detailed_summary:
+ logger_cli.info("\n{:=^8s}".format("PASS"))
+ logger_cli.info("\n".join(_success))
+ else:
+ logger.info("\n{:=^8s}".format("PASS"))
+ logger.info("\n".join(_success))
+ if len(_errors) > 0:
+ logger_cli.info("\n{:=^8s}".format("FAIL"))
+ logger_cli.info("\n".join(_errors))
+
+ logger_cli.info(
+ "# {} failed, {} passed".format(
+ len(_errors),
+ len(_success)
+ )
+ )
diff --git a/cfg_checker/modules/packages/checker.py b/cfg_checker/modules/packages/checker.py
index 3225f70..0bcb1a6 100644
--- a/cfg_checker/modules/packages/checker.py
+++ b/cfg_checker/modules/packages/checker.py
@@ -3,13 +3,13 @@
from cfg_checker.common import const, logger_cli
from cfg_checker.common.exception import ConfigException
from cfg_checker.helpers.console_utils import Progress
-from cfg_checker.nodes import SaltNodes
+from cfg_checker.nodes import salt_master
from cfg_checker.reports import reporter
from versions import DebianVersion, PkgVersions, VersionCmpResult
-class CloudPackageChecker(SaltNodes):
+class CloudPackageChecker(object):
@staticmethod
def presort_packages(all_packages, full=None):
logger_cli.info("-> Presorting packages")
@@ -52,14 +52,14 @@
# sort packages
_pn, _val = all_packages.popitem()
_c = _val['desc']['component']
- if full:
+ if not full:
# Check if this packet has errors
# if all is ok -> just skip it
_max_status = max(_val['results'].keys())
if _max_status <= const.VERSION_OK:
_max_action = max(_val['results'][_max_status].keys())
if _max_action == const.ACT_NA:
- # this package do not ha any comments
+ # this package do not has any comments
# ...just skip it from report
continue
@@ -118,9 +118,12 @@
:return: none
"""
logger_cli.info("# Collecting installed packages")
- _result = self.execute_script_on_active_nodes("pkg_versions.py")
+ if not salt_master.nodes:
+ salt_master.nodes = salt_master.get_nodes()
+ salt_master.prepare_script_on_active_nodes("pkg_versions.py")
+ _result = salt_master.execute_script_on_active_nodes("pkg_versions.py")
- for key in self.nodes.keys():
+ for key in salt_master.nodes.keys():
# due to much data to be passed from salt, it is happening in order
if key in _result:
_text = _result[key]
@@ -135,12 +138,12 @@
)
_dict = {}
- self.nodes[key]['packages'] = _dict
+ salt_master.nodes[key]['packages'] = _dict
else:
- self.nodes[key]['packages'] = {}
+ salt_master.nodes[key]['packages'] = {}
logger_cli.debug("... {} has {} packages installed".format(
key,
- len(self.nodes[key]['packages'].keys())
+ len(salt_master.nodes[key]['packages'].keys())
))
logger_cli.info("-> Done")
@@ -156,12 +159,12 @@
logger_cli.info(
"# Cross-comparing: Installed vs Candidates vs Release"
)
- _progress = Progress(len(self.nodes.keys()))
+ _progress = Progress(len(salt_master.nodes.keys()))
_progress_index = 0
_total_processed = 0
# Collect packages from all of the nodes in flat dict
_all_packages = {}
- for node_name, node_value in self.nodes.iteritems():
+ for node_name, node_value in salt_master.nodes.iteritems():
_uniq_len = len(_all_packages.keys())
_progress_index += 1
# progress will jump from node to node
@@ -182,8 +185,8 @@
# All packages list with version and node list
if _name not in _all_packages:
# shortcuts for this cloud values
- _os = self.openstack_release
- _mcp = self.mcp_release
+ _os = salt_master.openstack_release
+ _mcp = salt_master.mcp_release
_pkg_desc = {}
if _desc[_name]:
# shortcut to version library
@@ -256,9 +259,9 @@
filename
)
payload = {
- "nodes": self.nodes,
- "mcp_release": self.mcp_release,
- "openstack_release": self.openstack_release
+ "nodes": salt_master.nodes,
+ "mcp_release": salt_master.mcp_release,
+ "openstack_release": salt_master.openstack_release
}
payload.update(self.presort_packages(self._packages, full))
_report(payload)
diff --git a/cfg_checker/nodes.py b/cfg_checker/nodes.py
index 5e47447..9a1fd48 100644
--- a/cfg_checker/nodes.py
+++ b/cfg_checker/nodes.py
@@ -1,9 +1,12 @@
+import json
import os
from copy import deepcopy
+from cfg_checker.clients import get_salt_remote, salt
from cfg_checker.common import config, const
from cfg_checker.common import logger, logger_cli
-from cfg_checker.common import salt_utils, utils
+from cfg_checker.common import utils
+from cfg_checker.common.exception import SaltException
from cfg_checker.common.settings import pkg_dir
node_tmpl = {
@@ -17,13 +20,18 @@
class SaltNodes(object):
def __init__(self):
- logger_cli.info("# Collecting nodes")
+ logger_cli.info("# Gathering environment information")
# simple salt rest client
- self.salt = salt_utils.SaltRemote()
+ self.salt = salt
+ self.nodes = None
+ def gather_node_info(self):
# Keys for all nodes
# this is not working in scope of 2016.8.3, will overide with list
logger_cli.debug("... collecting node names existing in the cloud")
+ if not self.salt:
+ self.salt = get_salt_remote(config)
+
try:
_keys = self.salt.list_keys()
_str = []
@@ -85,20 +93,26 @@
)
)
# get master node fqdn
- self.master_node = filter(
+ _filtered = filter(
lambda nd: self.nodes[nd]['role'] == const.all_roles_map['cfg'],
self.nodes
- )[0]
+ )
+ if len(_filtered) < 1:
+ raise SaltException(
+ "No master node detected! Check/Update node role map."
+ )
+ else:
+ self.salt.master_node = _filtered[0]
# OpenStack versions
self.mcp_release = self.salt.pillar_get(
- self.master_node,
+ self.salt.master_node,
"_param:apt_mk_version"
- )[self.master_node]
+ )[self.salt.master_node]
self.openstack_release = self.salt.pillar_get(
- self.master_node,
+ self.salt.master_node,
"_param:openstack_version"
- )[self.master_node]
+ )[self.salt.master_node]
def skip_node(self, node):
# Add node to skip list
@@ -113,6 +127,8 @@
return False
def get_nodes(self):
+ if not self.nodes:
+ self.gather_node_info()
return self.nodes
def get_specific_pillar_for_nodes(self, pillar_path):
@@ -156,7 +172,44 @@
else:
_data[_pillar_keys[-1]] = _result[node]
- def execute_script_on_active_nodes(self, script_filename, args=[]):
+ def prepare_json_on_node(self, node, _dict, filename):
+ # this function assumes that all folders are created
+ _dumps = json.dumps(_dict, indent=2).splitlines()
+ _storage_path = os.path.join(
+ config.salt_file_root, config.salt_scripts_folder
+ )
+ logger_cli.debug(
+ "... uploading data as '{}' "
+ "to master's file cache folder: '{}'".format(
+ filename,
+ _storage_path
+ )
+ )
+ _cache_path = os.path.join(_storage_path, filename)
+ _source_path = os.path.join(
+ 'salt://',
+ config.salt_scripts_folder,
+ filename
+ )
+ _target_path = os.path.join(
+ '/root',
+ config.salt_scripts_folder,
+ filename
+ )
+
+ logger_cli.debug("... creating file in cache '{}'".format(_cache_path))
+ self.salt.f_touch_master(_cache_path)
+ self.salt.f_append_master(_cache_path, _dumps)
+ logger.debug("... syncing file to '{}'".format(node))
+ self.salt.get_file(
+ node,
+ _source_path,
+ _target_path,
+ tgt_type="compound"
+ )
+ return _target_path
+
+ def prepare_script_on_active_nodes(self, script_filename):
# Prepare script
_p = os.path.join(pkg_dir, 'scripts', script_filename)
with open(_p, 'rt') as fd:
@@ -171,7 +224,7 @@
_storage_path
)
)
- self.salt.mkdir("cfg01*", _storage_path)
+ self.salt.mkdir(self.salt.master_node, _storage_path)
# Form cache, source and target path
_cache_path = os.path.join(_storage_path, script_filename)
_source_path = os.path.join(
@@ -202,7 +255,6 @@
),
tgt_type="compound"
)
- logger_cli.info("-> Running script to all active nodes")
logger.debug("... syncing file to nodes")
self.salt.get_file(
self.active_nodes_compound,
@@ -210,7 +262,42 @@
_target_path,
tgt_type="compound"
)
- # execute pkg collecting script
+ # return path on nodes, just in case
+ return _target_path
+
+ def execute_script_on_node(self, node, script_filename, args=[]):
+ # Prepare path
+ _target_path = os.path.join(
+ '/root',
+ config.salt_scripts_folder,
+ script_filename
+ )
+
+ # execute script
+ logger.debug("... running script on '{}'".format(node))
+ # handle results for each node
+ _script_arguments = " ".join(args) if args else ""
+ self.not_responded = []
+ _r = self.salt.cmd(
+ node,
+ 'cmd.run',
+ param='python {} {}'.format(_target_path, _script_arguments),
+ expr_form="compound"
+ )
+
+ # all false returns means that there is no response
+ self.not_responded = [_n for _n in _r.keys() if not _r[_n]]
+ return _r
+
+ def execute_script_on_active_nodes(self, script_filename, args=[]):
+ # Prepare path
+ _target_path = os.path.join(
+ '/root',
+ config.salt_scripts_folder,
+ script_filename
+ )
+
+ # execute script
logger.debug("... running script")
# handle results for each node
_script_arguments = " ".join(args) if args else ""
@@ -237,3 +324,6 @@
return False
else:
return True
+
+
+salt_master = SaltNodes()
diff --git a/scripts/ping.py b/scripts/ping.py
new file mode 100644
index 0000000..4e2778d
--- /dev/null
+++ b/scripts/ping.py
@@ -0,0 +1,159 @@
+import json
+import os
+import platform
+import sys
+from copy import deepcopy
+from multiprocessing.dummy import Pool
+from subprocess import PIPE, Popen
+
+_os = platform.system()
+_packets = {}
+
+_defaults = {
+ "ip": None,
+ "size": 0,
+ "fragmentation": False,
+ "count": 1,
+ "exit_timeout": 1,
+ "response_timeout": 1,
+ "numeric": True
+}
+
+_help_message = \
+ "Invalid parameters. Use: 'ping.py [PKT_SIZE] <IP>' or 'ping.py n.json'\n"
+
+_template = {
+ "returncode": -1,
+ "stdout": "",
+ "stderr": ""
+}
+
+
+def shell(command):
+ _ps = Popen(
+ " ".join(command),
+ shell=True,
+ stdout=PIPE,
+ stderr=PIPE
+ )
+ _out = _ps.communicate()
+ _err = _out[1]
+ _out = _out[0]
+ return _ps.returncode, _out, _err
+
+
+def write_help():
+ _t = deepcopy(_template)
+ _t["returncode"] = 1
+ _t["stderr"] = _help_message
+ write_outcome(_t)
+
+
+def write_outcome(_params):
+ sys.stdout.write(json.dumps(_params))
+
+
+def do_ping(_params):
+ # Load params and defaults
+ _d = deepcopy(_defaults)
+ for key in _params:
+ _d[key] = _params[key]
+
+ # Build cmd
+ _cmd = ["ping"]
+ if _os == "Darwin":
+ if not _d["fragmentation"]:
+ _cmd.append("-D")
+ if _d["exit_timeout"]:
+ _cmd += ["-t", str(_d["exit_timeout"])]
+ elif _os == "Linux":
+ if not _d["fragmentation"]:
+ _cmd += ["-M", "do"]
+ if _d["exit_timeout"]:
+ _cmd += ["-w", str(_d["exit_timeout"])]
+ else:
+ # Windows or other OS
+ _t = deepcopy(_template)
+ _t["returncode"] = 1
+ _t["stderr"] = \
+ "ping.py: '{}' support not implemented".format(_os)
+ write_outcome(_t)
+ sys.exit(1)
+
+ if _d["size"]:
+ _cmd += ["-s", str(_d["size"])]
+ if _d["count"]:
+ _cmd += ["-c", str(_d["count"])]
+ if _d["numeric"]:
+ _cmd.append("-n")
+ if _d["response_timeout"]:
+ _cmd += ["-W", str(_d["response_timeout"])]
+
+ _cmd.append(_d["ip"])
+ # sys.stdout.write("# {}\n".format(" ".join(_cmd)))
+ _r, _out, _err = shell(_cmd)
+
+ # TODO: parse results, latency, etc
+ _t = deepcopy(_template)
+ _t["returncode"] = _r
+ _t["stdout"] = _out
+ _t["stderr"] = _err
+ _params.update(_t)
+ return _params
+
+
+def load_targets(filename):
+ # load target ips from json
+ with open(filename, "r") as f:
+ j = json.load(f)
+
+ return j
+
+
+if len(sys.argv) < 2:
+ # no params given
+ write_help()
+elif len(sys.argv) < 3:
+ # one param: decide if it json file or IP
+ _arg = sys.argv[1]
+ if os.path.isfile(_arg):
+ _packets = load_targets(_arg)
+ # up to 15 packets at once
+ pool = Pool(15)
+ # prepare threaded map
+ _param_map = []
+ for _node, _data in _packets.iteritems():
+ if isinstance(_data, list):
+ for target in _data:
+ _param_map.append(target)
+ elif isinstance(_data, dict):
+ _param_map.append(_data)
+ else:
+ _t = deepcopy(_template)
+ _t["returncode"] = 1
+ _t["stderr"] = \
+ "TypeError: 'list' or 'dict' expected. " \
+ "Got '{}': '{}'".format(
+ type(_data).__name__,
+ _data
+ )
+ _packets[_node] = _t
+ _threaded_out = pool.map(do_ping, _param_map)
+ for _out in _threaded_out:
+ if isinstance(_packets[_out["tgt_host"]], dict):
+ _packets[_out["tgt_host"]] = _out
+ elif isinstance(_packets[_out["tgt_host"]], list):
+ _packets[_out["tgt_host"]][_out["ip_index"]] = _out
+ sys.stdout.write(json.dumps(_packets))
+ else:
+ # IP given
+ _ip = sys.argv[1]
+ write_outcome(do_ping(_ip))
+elif len(sys.argv) < 4:
+ # two params: size and IP
+ _s = sys.argv[1]
+ _ip = sys.argv[2]
+ write_outcome(do_ping(_ip, size=_s))
+else:
+ # too many params given
+ write_help()
diff --git a/scripts_ds/health_checks.py b/scripts_ds/health_checks.py
new file mode 100644
index 0000000..c321149
--- /dev/null
+++ b/scripts_ds/health_checks.py
@@ -0,0 +1,731 @@
+import subprocess
+import socket
+import salt.utils
+import logging
+import os
+import re
+import json
+
+__author__ = "Dzmitry Stremkouski"
+__copyright__ = "Copyright 2019, Mirantis Inc."
+__license__ = "Apache 2.0"
+
+logger = logging.getLogger(__name__)
+stream = logging.StreamHandler()
+logger.addHandler(stream)
+
+
+def _failed_minions(out, agent, failed_minions):
+
+ ''' Verify failed minions '''
+
+ if len(failed_minions) > 0:
+ logger.error("%s check FAILED" % agent)
+ logger.error("Some minions returned non-zero exit code or empty data")
+ logger.error("Failed minions:" + str(failed_minions))
+ for minion in failed_minions:
+ logger.error(minion)
+ logger.debug(str(out[minion]['ret']))
+ __context__['retcode'] = 2
+ return False
+
+ return True
+
+
+def _minions_output(out, agent, ignore_dead, ignore_empty=False):
+
+ ''' Verify minions output and exit code '''
+
+ if not out:
+ logger.error("%s check FAILED" % agent)
+ logger.error("No response from master cmd")
+ __context__['retcode'] = 2
+ return False
+
+ if not ignore_dead:
+ jid = out.itervalues().next()['jid']
+ job_stats = __salt__['saltutil.runner']( 'jobs.print_job', arg=[jid] ) or None
+ if not job_stats:
+ logger.error("%s check FAILED" % agent)
+ logger.error("No response from master runner")
+ __context__['retcode'] = 2
+ return False
+
+ job_result = job_stats[jid]['Result']
+ job_minions = job_stats[jid]['Minions']
+ if len(job_minions) != len(job_result):
+ logger.error("%s check FAILED" % agent)
+ logger.error("Some minions are offline")
+ logger.error(list(set(job_minions) - set(job_result.keys())))
+ __context__['retcode'] = 2
+ return False
+
+ failed_minions = []
+ for minion in out:
+ if 'retcode' in out[minion]:
+ if out[minion]['retcode'] == 0:
+ if not ignore_empty:
+ if isinstance(out[minion]['ret'], bool):
+ if minion not in failed_minions:
+ failed_minions.append(minion)
+ elif len(out[minion]['ret']) == 0:
+ if minion not in failed_minions:
+ failed_minions.append(minion)
+ else:
+ if minion not in failed_minions:
+ failed_minions.append(minion)
+ else:
+ if minion not in failed_minions:
+ failed_minions.append(minion)
+
+ if not _failed_minions(out, agent, failed_minions):
+ __context__['retcode'] = 2
+ return False
+
+ return True
+
+
+def minions_check(wait_timeout=1, gather_job_wait_timeout=1, target='*', target_type='glob', ignore_dead=False):
+
+ ''' Verify minions are online '''
+
+ agent = "Minions"
+ out = __salt__['saltutil.cmd']( tgt=target,
+ tgt_type=target_type,
+ fun='test.ping',
+ timeout=wait_timeout,
+ gather_job_timeout=gather_job_wait_timeout
+ ) or None
+
+ return _minions_output(out, agent, ignore_dead, ignore_empty=True)
+
+
+def time_diff_check(time_diff=1, target='*', target_type='glob', ignore_dead=False, **kwargs):
+
+ ''' Verify time diff on servers '''
+
+ agent = "Time diff"
+ out = __salt__['saltutil.cmd']( tgt=target,
+ tgt_type=target_type,
+ fun='status.time',
+ arg=['%s'],
+ timeout=3
+ ) or None
+
+ if not _minions_output(out, agent, ignore_dead):
+ __context__['retcode'] = 2
+ return False
+
+ minions_times = {}
+ env_times = []
+ verified_minions = []
+
+ for minion in out:
+ verified_minions.append(minion)
+ if out[minion]['retcode'] == 0:
+ minion_time = int(out[minion]['ret'])
+ if str(minion_time) not in minions_times:
+ minions_times[str(minion_time)] = []
+ minions_times[str(minion_time)].append(minion)
+ env_times.append(minion_time)
+
+ env_times.sort()
+ diff = env_times[-1] - env_times[0]
+
+ if diff > time_diff:
+ __context__['retcode'] = 2
+ if kwargs.get("debug", False):
+ return False, minions_times
+ else:
+ return False
+
+ if kwargs.get("debug", False):
+ logger.info(verified_minions)
+ return True
+
+
+def contrail_check(target='I@contrail:control or I@contrail:collector or I@opencontrail:compute or I@opencontrail:client', target_type='compound', ignore_dead=False, **kwargs):
+
+ ''' Verify contrail status returns nothing critical '''
+
+ agent = "Contrail status"
+ out = __salt__['saltutil.cmd']( tgt=target,
+ tgt_type=target_type,
+ fun='cmd.run',
+ arg=['contrail-status'],
+ timeout=5
+ ) or None
+
+ if not _minions_output(out, agent, ignore_dead):
+ __context__['retcode'] = 2
+ return False
+
+ failed_minions = []
+ pattern = '^(==|$|\S+\s+(active|backup|inactive\s\(disabled\son\sboot\)))'
+ prog = re.compile(pattern)
+
+ validated = []
+ for minion in out:
+ for line in out[minion]['ret'].split('\n'):
+ if not prog.match(line) and minion not in failed_minions:
+ failed_minions.append(minion)
+ validated.append(minion)
+
+ if not _failed_minions(out, agent, failed_minions):
+ __context__['retcode'] = 2
+ return False
+
+ if kwargs.get("debug", False):
+ logger.info(validated)
+ return True
+
+
+def galera_check(cluster_size=3, target='I@galera:master or I@galera:slave', target_type='compound', ignore_dead=False, **kwargs):
+
+ ''' Verify galera cluster size and state '''
+
+ agent = "Galera status"
+ out = __salt__['saltutil.cmd']( tgt=target,
+ tgt_type=target_type,
+ fun='mysql.status',
+ timeout=3
+ ) or None
+
+ if not _minions_output(out, agent, ignore_dead):
+ __context__['retcode'] = 2
+ return False
+
+ failed_minions = []
+
+ validated = []
+ for minion in out:
+ if int(out[minion]['ret']['wsrep_cluster_size']) != int(cluster_size) and minion not in failed_minions:
+ failed_minions.append(minion)
+ if out[minion]['ret']['wsrep_evs_state'] != 'OPERATIONAL' and minion not in failed_minions:
+ failed_minions.append(minion)
+ validated.append(minion)
+
+ if not _failed_minions(out, agent, failed_minions):
+ __context__['retcode'] = 2
+ return False
+
+ if kwargs.get("debug", False):
+ logger.info(validated)
+ logger.info("Cluster size: " + str(out[validated[0]]['ret']['wsrep_cluster_size']))
+ logger.info("Cluster state: " + str(out[validated[0]]['ret']['wsrep_evs_state']))
+ return True
+
+
+def _quote_str(s, l=False, r=False):
+
+ ''' Quting rabbitmq erl objects for json import '''
+
+ if len(s) > 0:
+ if l:
+ s = s.lstrip()
+ if r:
+ s = s.rstrip()
+ if (s[0] == "'") and (s[-1] != "'") and r and not l:
+ s += "'"
+ if (s[0] == '"') and (s[-1] != '"') and r and not l:
+ s += '"'
+ if (s[-1] == "'") and (s[0] != "'") and l and not r:
+ s = "'" + s
+ if (s[-1] == '"') and (s[0] != '"') and l and not r:
+ s = '"' + s
+ if (s[-1] != "'") and (s[-1] != '"') and (s[0] != "'") and (s[0] != '"'):
+ s = '"' + s.replace('"', '\\\"') + '"'
+ else:
+ if (not l) and (not r) and s[0] != '"' and not s[-1] != '"':
+ s= s.replace('"', '\\\"')
+ return s.replace("'", '"')
+ else:
+ return s
+
+
+def _sanitize_rmqctl_output(string):
+
+ ''' Sanitizing rabbitmq erl objects for json import '''
+
+ rabbitctl_json = ""
+ for line in string.split(','):
+ copy = line
+ left = ""
+ right = ""
+ mid = copy
+ lpar = False
+ rpar = False
+ if re.search('([\[\{\s]+)(.*)', copy):
+ mid = re.sub('^([\[\{\s]+)','', copy)
+ left = copy[:-len(mid)]
+ copy = mid
+ lpar = True
+ if re.search('(.*)([\]\}\s]+)$', copy):
+ mid = re.sub('([\]\}\s]+)$','', copy)
+ right = copy[len(mid):]
+ copy = mid
+ rpar = True
+ result = left + _quote_str(mid, l=lpar, r=rpar) + right
+ if (not rpar) and lpar and (len(left.strip()) > 0) and (left.strip()[-1] == '{'):
+ result += ":"
+ else:
+ result += ","
+ rabbitctl_json += result
+
+ rabbitctl_json = rabbitctl_json[:-1]
+ new_rabbitctl_json = rabbitctl_json
+ for s in re.findall('"[^:\[{\]}]+"\s*:\s*("[^\[{\]}]+")', rabbitctl_json):
+ if '"' in s[1:][:-1]:
+ orig = s
+ changed = '"' + s.replace('\\', '\\\\').replace('"', '\\\"') + '"'
+ new_rabbitctl_json = new_rabbitctl_json.replace(orig, changed)
+ return new_rabbitctl_json
+
+
+def rabbitmq_cmd(cmd):
+
+ ''' JSON formatted RabbitMQ command output '''
+
+ supported_commands = ['status', 'cluster_status', 'list_hashes', 'list_ciphers']
+ if cmd not in supported_commands:
+ logger.error("Command is not supported yet, sorry")
+ logger.error("Supported commands are: " + str(supported_commands))
+ __context__['retcode'] = 2
+ return False
+
+ proc = subprocess.Popen(['rabbitmqctl', cmd], stdout=subprocess.PIPE)
+ stdout, stderr = proc.communicate()
+
+ rabbitmqctl_cutoff = stdout[int(stdout.find('[')):int(stdout.rfind(']'))+1].replace('\n','')
+ return json.loads(_sanitize_rmqctl_output(rabbitmqctl_cutoff))
+
+
+def rabbitmq_check(target='I@rabbitmq:server', target_type='compound', ignore_dead=False, **kwargs):
+
+ ''' Verify rabbit cluster and it's alarms '''
+
+ agent = "RabbitMQ status"
+ out = __salt__['saltutil.cmd']( tgt=target,
+ tgt_type=target_type,
+ fun='health_checks.rabbitmq_cmd',
+ arg=['cluster_status'],
+ timeout=3
+ ) or None
+
+ if not _minions_output(out, agent, ignore_dead):
+ __context__['retcode'] = 2
+ return False
+
+ failed_minions = []
+
+ for minion in out:
+ rabbitmqctl_json = out[minion]['ret']
+ running_nodes = []
+ available_nodes = []
+ alarms = []
+ for el in rabbitmqctl_json:
+ if 'alarms' in el:
+ alarms = el['alarms']
+ if 'nodes' in el:
+ available_nodes = el['nodes'][0]['disc']
+ if 'running_nodes' in el:
+ running_nodes = el['running_nodes']
+
+ if running_nodes.sort() == available_nodes.sort():
+ nodes_alarms = []
+ for node in running_nodes:
+ for el in alarms:
+ if node in el:
+ if len(el[node]) > 0:
+ nodes_alarms.append(el[node])
+ if len(nodes_alarms) > 0:
+ failed_minions.append(minion)
+ else:
+ failed_minions.append(minion)
+
+ if not _failed_minions(out, agent, failed_minions):
+ __context__['retcode'] = 2
+ return False
+
+ if kwargs.get("debug", False):
+ logger.info(running_nodes)
+ return True
+
+
+def haproxy_status(socket_path='/run/haproxy/admin.sock', buff_size = 8192, encoding = 'UTF-8', stats_filter=[]):
+
+ ''' JSON formatted haproxy status '''
+
+ stat_cmd = 'show stat\n'
+
+ if not os.path.exists(socket_path):
+ logger.error('Socket %s does not exist or haproxy not running' % socket_path)
+ __context__['retcode'] = 2
+ return False
+
+ client = socket.socket( socket.AF_UNIX, socket.SOCK_STREAM)
+ client.connect(socket_path)
+ stat_cmd = 'show stat\n'
+
+ client.send(bytearray(stat_cmd, encoding))
+ output = client.recv(buff_size)
+
+ res = ""
+ while output:
+ res += output.decode(encoding)
+ output = client.recv(buff_size)
+ client.close()
+
+ haproxy_stats = {}
+ res_list = res.split('\n')
+ fields = res_list[0][2:].split(',')
+ stats_list = []
+ for line in res_list[1:]:
+ if len(line.strip()) > 0:
+ stats_list.append(line)
+
+ for i in range(len(stats_list)):
+ element = {}
+ for n in fields:
+ element[n] = stats_list[i].split(',')[fields.index(n)]
+ server_name = element.pop('pxname')
+ server_type = element.pop('svname')
+ if stats_filter:
+ filtered_element = element.copy()
+ for el in element:
+ if el not in stats_filter:
+ filtered_element.pop(el)
+ element = filtered_element
+ if server_name not in haproxy_stats:
+ haproxy_stats[server_name] = {}
+ if server_type == "FRONTEND" or server_type == "BACKEND":
+ haproxy_stats[server_name][server_type] = element
+ else:
+ if 'UPSTREAM' not in haproxy_stats[server_name]:
+ haproxy_stats[server_name]['UPSTREAM'] = {}
+ haproxy_stats[server_name]['UPSTREAM'][server_type] = element
+
+ return haproxy_stats
+
+
+def haproxy_check(target='I@haproxy:proxy', target_type='compound', ignore_dead=False, ignore_services=[], ignore_upstreams=[], ignore_no_upstream=False, **kwargs):
+
+ ''' Verify haproxy backends status '''
+
+ agent = "haproxy status"
+ out = __salt__['saltutil.cmd']( tgt=target,
+ tgt_type=target_type,
+ fun='health_checks.haproxy_status',
+ arg=["stats_filter=['status']"],
+ timeout=3
+ ) or None
+
+ if not _minions_output(out, agent, ignore_dead):
+ __context__['retcode'] = 2
+ return False
+
+ failed_minions = []
+ verified_minions = []
+ for minion in out:
+ verified_minions.append(minion)
+ haproxy_json = out[minion]['ret']
+ for service in haproxy_json:
+ if service not in ignore_services:
+ if haproxy_json[service]['FRONTEND']['status'] != 'OPEN':
+ if minion not in failed_minions:
+ failed_minions.append(minion)
+ if haproxy_json[service]['BACKEND']['status'] != 'UP':
+ if minion not in failed_minions:
+ failed_minions.append(minion)
+ if 'UPSTREAM' in haproxy_json[service]:
+ for upstream in haproxy_json[service]['UPSTREAM']:
+ if upstream not in ignore_upstreams:
+ if haproxy_json[service]['UPSTREAM'][upstream]['status'] != 'UP':
+ if minion not in failed_minions:
+ failed_minions.append(minion)
+ else:
+ if not ignore_no_upstream:
+ if minion not in failed_minions:
+ failed_minions.append(minion)
+
+ if not _failed_minions(out, agent, failed_minions):
+ __context__['retcode'] = 2
+ return False
+
+ if kwargs.get("debug", False):
+ logger.info(verified_minions)
+ return True
+
+
+def df_check(target='*', target_type='glob', verify='space', space_limit=80, inode_limit=80, ignore_dead=False, ignore_partitions=[], **kwargs):
+
+ ''' Verify storage space/inodes status '''
+
+ supported_options = ['space', 'inodes']
+ if verify not in supported_options:
+ logger.error('Unsupported "verify" option.')
+ logger.error('Supported options are: %s' % str(supported_options))
+ __context__['retcode'] = 2
+ return False
+
+ if verify == 'space':
+ fun_cmd = 'disk.usage'
+ json_arg = 'capacity'
+ limit = space_limit
+ elif verify == 'inodes':
+ fun_cmd = 'disk.inodeusage'
+ json_arg = 'use'
+ limit = inode_limit
+
+ agent = "df status"
+ out = __salt__['saltutil.cmd']( tgt=target,
+ tgt_type=target_type,
+ fun=fun_cmd,
+ timeout=3
+ ) or None
+
+ if not _minions_output(out, agent, ignore_dead):
+ __context__['retcode'] = 2
+ return False
+
+ failed_minions = []
+ verified_minions = []
+ for minion in out:
+ verified_minions.append(minion)
+ df_json = out[minion]['ret']
+ for disk in df_json:
+ if disk not in ignore_partitions:
+ if int(df_json[disk][json_arg][:-1]) > int(limit):
+ if minion not in failed_minions:
+ failed_minions.append(minion)
+
+ if not _failed_minions(out, agent, failed_minions):
+ __context__['retcode'] = 2
+ return False
+
+ if kwargs.get("debug", False):
+ logger.info(verified_minions)
+ return True
+
+
+def load_check(target='*', target_type='glob', la1=3, la5=3, la15=3, ignore_dead=False, **kwargs):
+
+ ''' Verify load average status '''
+
+ agent = "load average status"
+ out = __salt__['saltutil.cmd']( tgt=target,
+ tgt_type=target_type,
+ fun='status.loadavg',
+ timeout=3
+ ) or None
+
+ if not _minions_output(out, agent, ignore_dead):
+ __context__['retcode'] = 2
+ return False
+
+ failed_minions = []
+ verified_minions = []
+ for minion in out:
+ verified_minions.append(minion)
+ la_json = out[minion]['ret']
+ if float(la_json['1-min']) > float(la1):
+ if minion not in failed_minions:
+ failed_minions.append(minion)
+ if float(la_json['5-min']) > float(la5):
+ if minion not in failed_minions:
+ failed_minions.append(minion)
+ if float(la_json['15-min']) > float(la15):
+ if minion not in failed_minions:
+ failed_minions.append(minion)
+
+ if not _failed_minions(out, agent, failed_minions):
+ __context__['retcode'] = 2
+ return False
+
+ if kwargs.get("debug", False):
+ logger.info(verified_minions)
+ return True
+
+
+def netdev_check(target='*', target_type='glob', rx_drop_limit=0, tx_drop_limit=0, ignore_devices=[], ignore_dead=False, **kwargs):
+
+ ''' Verify netdev rx/tx drop status '''
+
+ agent = "netdev rx/tx status"
+ out = __salt__['saltutil.cmd']( tgt=target,
+ tgt_type=target_type,
+ fun='status.netdev',
+ timeout=3
+ ) or None
+
+ if not _minions_output(out, agent, ignore_dead):
+ __context__['retcode'] = 2
+ return False
+
+ failed_minions = []
+ verified_minions = []
+ for minion in out:
+ verified_minions.append(minion)
+ dev_json = out[minion]['ret']
+ for netdev in dev_json:
+ if netdev not in ignore_devices:
+ if int(dev_json[netdev]['rx_drop']) > int(rx_drop_limit):
+ if minion not in failed_minions:
+ failed_minions.append(minion)
+ if int(dev_json[netdev]['tx_drop']) > int(tx_drop_limit):
+ if minion not in failed_minions:
+ failed_minions.append(minion)
+
+ if not _failed_minions(out, agent, failed_minions):
+ __context__['retcode'] = 2
+ return False
+
+ if kwargs.get("debug", False):
+ logger.info(verified_minions)
+ return True
+
+
+def mem_check(target='*', target_type='glob', used_limit=80, ignore_dead=False, **kwargs):
+
+ ''' Verify available memory status '''
+
+ agent = "available memory status"
+ out = __salt__['saltutil.cmd']( tgt=target,
+ tgt_type=target_type,
+ fun='status.meminfo',
+ timeout=3
+ ) or None
+
+ if not _minions_output(out, agent, ignore_dead):
+ __context__['retcode'] = 2
+ return False
+
+ failed_minions = []
+ verified_minions = []
+ for minion in out:
+ mem_avail = int(out[minion]['ret']['MemAvailable']['value'])
+ mem_total = int(out[minion]['ret']['MemTotal']['value'])
+ used_pct = float((mem_total - mem_avail) * 100 / mem_total)
+ if used_pct > float(used_limit):
+ if minion not in failed_minions:
+ failed_minions.append(minion)
+ else:
+ verified_minions.append( { minion : str(used_pct) + '%' } )
+
+ if not _failed_minions(out, agent, failed_minions):
+ __context__['retcode'] = 2
+ return False
+
+ if kwargs.get("debug", False):
+ logger.info(verified_minions)
+ return True
+
+
+def ntp_status(params = ['-4', '-p', '-n']):
+
+ ''' JSON formatted ntpq command output '''
+
+ ntp_states = [
+ { 'indicator': '#', 'comment': 'source selected, distance exceeds maximum value' },
+ { 'indicator': 'o', 'comment': 'source selected, Pulse Per Second (PPS) used' },
+ { 'indicator': '+', 'comment': 'source selected, included in final set' },
+ { 'indicator': 'x', 'comment': 'source false ticker' },
+ { 'indicator': '.', 'comment': 'source selected from end of candidate list' },
+ { 'indicator': '-', 'comment': 'source discarded by cluster algorithm' },
+ { 'indicator': '*', 'comment': 'current time source' },
+ { 'indicator': ' ', 'comment': 'source discarded high stratum, failed sanity' }
+ ]
+ ntp_state_indicators = []
+ for state in ntp_states:
+ ntp_state_indicators.append(state['indicator'])
+ source_types = {}
+ source_types['l'] = "local (such as a GPS, WWVB)"
+ source_types['u'] = "unicast (most common)"
+ source_types['m'] = "multicast"
+ source_types['b'] = "broadcast"
+ source_types['-'] = "netaddr"
+
+ proc = subprocess.Popen(['ntpq'] + params, stdout=subprocess.PIPE)
+ stdout, stderr = proc.communicate()
+
+ ntp_lines = stdout.split('\n')
+ fields = re.sub("\s+", " ", ntp_lines[0]).split()
+ fields[fields.index('st')] = 'stratum'
+ fields[fields.index('t')] = 'source_type'
+
+ ntp_peers = {}
+ for line in ntp_lines[2:]:
+ if len(line.strip()) > 0:
+ element = {}
+ values = re.sub("\s+", " ", line).split()
+ for i in range(len(values)):
+ if fields[i] == 'source_type':
+ element[fields[i]] = { 'indicator': values[i], 'comment': source_types[values[i]] }
+ elif fields[i] in ['stratum', 'when', 'poll', 'reach']:
+ if values[i] == '-':
+ element[fields[i]] = int(-1)
+ else:
+ element[fields[i]] = int(values[i])
+ elif fields[i] in ['delay', 'offset', 'jitter']:
+ element[fields[i]] = float(values[i])
+ else:
+ element[fields[i]] = values[i]
+ peer = element.pop('remote')
+ peer_state = peer[0]
+ if peer_state in ntp_state_indicators:
+ peer = peer[1:]
+ else:
+ peer_state = 'f'
+ element['current'] = False
+ if peer_state == '*':
+ element['current'] = True
+ for state in ntp_states:
+ if state['indicator'] == peer_state:
+ element['state'] = state.copy()
+ if peer_state == 'f' and state['indicator'] == ' ':
+ fail_state = state.copy()
+ fail_state.pop('indicator')
+ fail_state['indicator'] = 'f'
+ element['state'] = fail_state
+ ntp_peers[peer] = element
+
+ return ntp_peers
+
+
+def ntp_check(min_peers=1, max_stratum=3, target='*', target_type='glob', ignore_dead=False, **kwargs):
+
+ ''' Verify NTP peers status '''
+
+ agent = "ntpd peers status"
+ out = __salt__['saltutil.cmd']( tgt=target,
+ tgt_type=target_type,
+ fun='health_checks.ntp_status',
+ timeout=3
+ ) or None
+
+ if not _minions_output(out, agent, ignore_dead):
+ __context__['retcode'] = 2
+ return False
+
+ failed_minions = []
+ verified_minions = []
+ for minion in out:
+ ntp_json = out[minion]['ret']
+ good_peers = []
+ for peer in ntp_json:
+ if ntp_json[peer]['stratum'] < int(max_stratum) + 1:
+ good_peers.append(peer)
+ if len(good_peers) > int(min_peers) - 1:
+ if minion not in verified_minions:
+ verified_minions.append(minion)
+ else:
+ if minion not in failed_minions:
+ failed_minions.append(minion)
+
+ if not _failed_minions(out, agent, failed_minions):
+ __context__['retcode'] = 2
+ return False
+
+ if kwargs.get("debug", False):
+ logger.info(verified_minions)
+ return True
diff --git a/scripts_ds/net_checks.py b/scripts_ds/net_checks.py
new file mode 100644
index 0000000..982ee89
--- /dev/null
+++ b/scripts_ds/net_checks.py
@@ -0,0 +1,303 @@
+import fcntl
+import logging
+import socket
+import struct
+from os import listdir, path
+from re import search as research
+from subprocess import PIPE, Popen
+
+logger = logging.getLogger(__name__)
+stream = logging.StreamHandler()
+logger.addHandler(stream)
+
+
+def get_ip(iface='ens2'):
+
+ ''' Get ip address from an interface if applicable
+
+ :param iface: Interface name. Type: str
+
+ '''
+
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ sockfd = sock.fileno()
+ SIOCGIFADDR = 0x8915
+ ifreq = struct.pack('16sH14s', iface, socket.AF_INET, '\x00'*14)
+
+ try:
+ res = fcntl.ioctl(sockfd, SIOCGIFADDR, ifreq)
+ except:
+ logger.debug("No ip addresses assigned to %s" % iface)
+ return None
+
+ ip = struct.unpack('16sH2x4s8x', res)[2]
+ return socket.inet_ntoa(ip)
+
+
+def get_nics():
+
+ ''' List nics '''
+
+ nics = []
+ nics_list = listdir('/sys/class/net/')
+ for nic_name in nics_list:
+ if research('(br|bond|ens|enp|eth|one|ten|fourty)[0-9]+', nic_name):
+
+ # Interface should be in "up" state in order to get carrier status
+ Popen("ip li set dev " + nic_name + " up", shell=True, stdout=PIPE)
+
+ with open("/sys/class/net/" + nic_name + "/carrier", 'r') as f:
+ try:
+ carrier = int(f.read())
+ except:
+ carrier = 0
+
+ bond = ""
+ if path.isfile("/sys/class/net/" + nic_name + "/master/uevent"):
+ with open(
+ "/sys/class/net/" + nic_name + "/master/uevent", 'r'
+ ) as f:
+ for line in f:
+ sline = line.strip()
+ if 'INTERFACE=bond' in sline:
+ bond = sline.split('=')[1]
+ if len(bond) == 0:
+ with open("/sys/class/net/" + nic_name + "/address", 'r') as f:
+ macaddr = f.read().strip()
+ else:
+ with open("/proc/net/bonding/" + bond, 'r') as f:
+ line = f.readline()
+ if_struct = False
+ while line:
+ sline = line.strip()
+ if 'Slave Interface: ' + nic_name in sline and not if_struct:
+ if_struct = True
+ if 'Permanent HW addr: ' in sline and if_struct:
+ macaddr = sline.split()[3]
+ break
+ line = f.readline()
+
+ with open("/sys/class/net/" + nic_name + "/mtu", 'r') as f:
+ mtu = f.read()
+
+ ip = str(get_ip(nic_name))
+
+ nics.append([nic_name, ip, macaddr, carrier, mtu])
+
+ return sorted(nics)
+
+
+def get_ten_pci():
+
+ ''' List ten nics pci addresses '''
+
+ nics = []
+ nics_list = listdir('/sys/class/net/')
+ for nic_name in nics_list:
+ if research('ten[0-9]+', nic_name):
+ with open(
+ "/sys/class/net/" + nic_name + "/device/uevent", 'r'
+ ) as f:
+ for line in f:
+ sline = line.strip()
+ if "PCI_SLOT_NAME=" in sline:
+ nics.append([nic_name, sline.split("=")[1]])
+
+ return sorted(nics)
+
+
+def mesh_ping(mesh):
+
+ ''' One to many ICMP check
+
+ :param hosts: Target hosts. Type: list of ip addresses
+
+ '''
+
+ io = []
+ minion_id = __salt__['config.get']('id')
+
+ for host, hostobj in mesh:
+ if host == minion_id:
+ for mesh_net, addr, targets in hostobj:
+ if addr in targets:
+ targets.remove(addr)
+ for tgt in targets:
+ # This one will run in parallel with everyone else
+ worker = Popen(
+ "ping -c 1 -w 1 -W 1 " + str(tgt),
+ shell=True, stdout=PIPE, stderr=PIPE
+ )
+ ping_out = worker.communicate()[0]
+ if worker.returncode != 0:
+ io.append(
+ mesh_net + ': ' + addr + ' -> ' + tgt + ': Failed'
+ )
+
+ return io
+
+
+def minion_list():
+
+ ''' List registered minions '''
+
+ return listdir('/etc/salt/pki/master/minions/')
+
+
+def verify_addresses():
+
+ ''' Verify addresses taken from pillars '''
+
+ nodes = nodes_addresses()
+ verifier = {}
+ failed = []
+
+ for node, nodeobj in nodes:
+ for item in nodeobj:
+ addr = item[1]
+ if addr in verifier:
+ failed.append([node, verifier[addr], addr])
+ else:
+ verifier[addr] = node
+
+ if failed:
+ logger.error("FAILED. Duplicates found")
+ logger.error(failed)
+ return False
+ else:
+ logger.setLevel(logging.INFO)
+ logger.info(["PASSED"])
+ return True
+
+
+def nodes_addresses():
+
+ ''' List servers addresses '''
+
+ compound = 'linux:network:interface'
+ out = __salt__['saltutil.cmd'](
+ tgt='I@' + compound,
+ tgt_type='compound',
+ fun='pillar.get',
+ arg=[compound],
+ timeout=10
+ ) or None
+
+ servers = []
+ for minion in minion_list():
+ addresses = []
+ if minion in out:
+ ifaces = out[minion]['ret']
+ for iface in ifaces:
+ ifobj = ifaces[iface]
+ if ifobj['enabled'] and 'address' in ifobj:
+ if 'mesh' in ifobj:
+ mesh = ifobj['mesh']
+ else:
+ mesh = 'default'
+ addresses.append([mesh, ifobj['address']])
+ servers.append([minion, addresses])
+
+ return servers
+
+
+def get_mesh():
+
+ ''' Build addresses mesh '''
+
+ full_mesh = {}
+ nodes = nodes_addresses()
+
+ for node, nodeobj in nodes:
+ for item in nodeobj:
+ mesh = item[0]
+ addr = item[1]
+ if mesh not in full_mesh:
+ full_mesh[mesh] = []
+ full_mesh[mesh].append(addr)
+
+ for node, nodeobj in nodes:
+ for item in nodeobj:
+ mesh = item[0]
+ tgts = full_mesh[mesh]
+ item.append(tgts)
+
+ return nodes
+
+
+def ping_check():
+
+ ''' Ping addresses in a mesh '''
+
+ mesh = get_mesh()
+ out = __salt__['saltutil.cmd'](
+ tgt='*',
+ tgt_type='glob',
+ fun='net_checks.mesh_ping',
+ arg=[mesh],
+ timeout=10
+ ) or None
+
+ failed = []
+
+ if out:
+ for minion in out:
+ ret = out[minion]['ret']
+ if ret:
+ failed.append(ret)
+ else:
+ failed = ["No response from minions"]
+
+ if failed:
+ logger.error("FAILED")
+ logger.error('\n'.join(str(x) for x in failed))
+ return False
+ else:
+ logger.setLevel(logging.INFO)
+ logger.info(["PASSED"])
+ return True
+
+
+def get_nics_csv(delim=","):
+
+ ''' List nics in csv format
+
+ :param delim: Delimiter char. Type: str
+
+ '''
+
+ header = "server,nic_name,ip_addr,mac_addr,link,chassis_id,chassis_name,port_mac,port_descr\n"
+ io = ""
+
+ # Try to reuse lldp output if possible
+ try:
+ lldp_info = Popen(
+ "lldpcli -f keyvalue s n s",
+ shell=True,
+ stdout=PIPE
+ ).communicate()[0]
+ except:
+ lldp_info = ""
+
+ for nic in get_nics():
+ lldp = ""
+ nic_name = nic[0]
+ if research('(one|ten|fourty)[0-9]+', nic_name):
+ # Check if we can fetch lldp data for that nic
+ for line in lldp_info.splitlines():
+ chassis = 'lldp.' + nic[0] + '.chassis'
+ port = 'lldp.' + nic[0] + '.port'
+ if chassis in line or port in line:
+ lldp += delim + line.split('=')[1]
+ if not lldp:
+ lldp = delim + delim + delim + delim
+
+ io += __salt__['config.get']('id') + \
+ delim + nic_name + \
+ delim + str(nic[1]).strip() + \
+ delim + str(nic[2]).strip() + \
+ delim + str(nic[3]).strip() + \
+ delim + str(nic[4]).strip() + \
+ lldp + "\n"
+
+ return header + io
diff --git a/setup.py b/setup.py
index 3db2008..2f93c71 100644
--- a/setup.py
+++ b/setup.py
@@ -9,7 +9,7 @@
DATA = [
('etc', [f for f in glob.glob(os.path.join('etc', '*'))]),
('templates', [f for f in glob.glob(os.path.join('templates', '*'))]),
- # ('res', [f for f in glob.glob(os.path.join('res', '*'))])
+ ('scripts', [f for f in glob.glob(os.path.join('scripts', '*'))])
]
dependencies = [