Blame - cfg_checker/modules/reclass/comparer.py - mcp/cfg-checker

blob: 0dec9d1078b12e8873062b65fcc3bb781521c5d1 [file] [log] [blame]

Alex Savatieiev	d48994d	2018-12-13 12:13:00 +0100	[diff] [blame]	1	"""Model Comparer:
				2	- yaml parser
				3	- class tree comparison
				4	"""
				5	import itertools
Alex Savatieiev	d48994d	2018-12-13 12:13:00 +0100	[diff] [blame]	6	import os
				7	import yaml
				8
Alex Savatieiev	f526dc0	2019-03-06 10:11:32 -0600	[diff] [blame^]	9	from cfg_checker.reports import reporter
Alex Savatieiev	5118de0	2019-02-20 15:50:42 -0600	[diff] [blame]	10	from cfg_checker.common import logger, logger_cli
Alex Savatieiev	d48994d	2018-12-13 12:13:00 +0100	[diff] [blame]	11
				12
Alex Savatieiev	4f149d0	2019-02-28 17:15:29 -0600	[diff] [blame]	13	def get_element(element_path, input_data):
				14	paths = element_path.split(":")
				15	data = input_data
				16	for i in range(0, len(paths)):
				17	data = data[paths[i]]
				18	return data
				19
				20
				21	def pop_element(element_path, input_data):
				22	paths = element_path.split(":")
				23	data = input_data
				24	# Search for last dict
				25	for i in range(0, len(paths)-1):
				26	data = data[paths[i]]
				27	# pop the actual element
				28	return data.pop(paths[-1])
				29
				30
Alex Savatieiev	d48994d	2018-12-13 12:13:00 +0100	[diff] [blame]	31	class ModelComparer(object):
				32	"""Collection of functions to compare model data.
				33	"""
Alex Savatieiev	4f149d0	2019-02-28 17:15:29 -0600	[diff] [blame]	34	# key order is important
				35	_model_parts = {
				36	"01_nodes": "nodes",
				37	"02_system": "classes:system",
				38	"03_cluster": "classes:cluster",
				39	"04_other": "classes"
				40	}
				41
Alex Savatieiev	d48994d	2018-12-13 12:13:00 +0100	[diff] [blame]	42	models = {}
Alex Savatieiev	06ab17d	2019-02-26 18:40:48 -0600	[diff] [blame]	43	models_path = "/srv/salt/reclass"
				44	model_name_1 = "source"
				45	model_path_1 = os.path.join(models_path, model_name_1)
				46	model_name_2 = "target"
				47	model_path_2 = os.path.join(models_path, model_name_1)
Alex Savatieiev	d48994d	2018-12-13 12:13:00 +0100	[diff] [blame]	48
				49	@staticmethod
				50	def load_yaml_class(fname):
				51	"""Loads a yaml from the file and forms a tree item
				52
				53	Arguments:
				54	fname {string} -- full path to the yaml file
				55	"""
				56	_yaml = {}
				57	try:
				58	_size = 0
				59	with open(fname, 'r') as f:
				60	_yaml = yaml.load(f)
				61	_size = f.tell()
				62	# TODO: do smth with the data
				63	if not _yaml:
				64	logger_cli.warning("WARN: empty file '{}'".format(fname))
				65	_yaml = {}
				66	else:
				67	logger.debug("...loaded YAML '{}' ({}b)".format(fname, _size))
				68	return _yaml
				69	except yaml.YAMLError as exc:
				70	logger_cli.error(exc)
				71	except IOError as e:
				72	logger_cli.error(
				73	"Error loading file '{}': {}".format(fname, e.message)
				74	)
				75	raise Exception("CRITICAL: Failed to load YAML data: {}".format(
Alex Savatieiev	36b938d	2019-01-21 11:01:18 +0100	[diff] [blame]	76	e.message + e.strerror
Alex Savatieiev	d48994d	2018-12-13 12:13:00 +0100	[diff] [blame]	77	))
				78
				79	def load_model_tree(self, name, root_path="/srv/salt/reclass"):
				80	"""Walks supplied path for the YAML filed and loads the tree
				81
				82	Arguments:
				83	root_folder_path {string} -- Path to Model's root folder. Optional
				84	"""
				85	logger_cli.info("Loading reclass tree from '{}'".format(root_path))
				86	# prepare the file tree to walk
				87	raw_tree = {}
				88	# Credits to Andrew Clark@MIT. Original code is here:
				89	# http://code.activestate.com/recipes/577879-create-a-nested-dictionary-from-oswalk/
				90	root_path = root_path.rstrip(os.sep)
				91	start = root_path.rfind(os.sep) + 1
				92	root_key = root_path.rsplit(os.sep, 1)[1]
				93	# Look Ma! I am walking the file tree with no recursion!
				94	for path, dirs, files in os.walk(root_path):
				95	# if this is a hidden folder, ignore it
Alex Savatieiev	06ab17d	2019-02-26 18:40:48 -0600	[diff] [blame]	96	_folders_list = path[start:].split(os.sep)
				97	if any(item.startswith(".") for item in _folders_list):
Alex Savatieiev	d48994d	2018-12-13 12:13:00 +0100	[diff] [blame]	98	continue
				99	# cut absolute part of the path and split folder names
				100	folders = path[start:].split(os.sep)
				101	subdir = {}
				102	# create generator of files that are not hidden
Alex Savatieiev	36b938d	2019-01-21 11:01:18 +0100	[diff] [blame]	103	_exts = ('.yml', '.yaml')
Alex Savatieiev	06ab17d	2019-02-26 18:40:48 -0600	[diff] [blame]	104	_subfiles = (_fl for _fl in files
				105	if _fl.endswith(_exts) and not _fl.startswith('.'))
Alex Savatieiev	d48994d	2018-12-13 12:13:00 +0100	[diff] [blame]	106	for _file in _subfiles:
				107	# cut file extension. All reclass files are '.yml'
				108	_subnode = _file
				109	# load all YAML class data into the tree
				110	subdir[_subnode] = self.load_yaml_class(
				111	os.path.join(path, _file)
				112	)
Alex Savatieiev	36b938d	2019-01-21 11:01:18 +0100	[diff] [blame]	113	try:
				114	# Save original filepath, just in case
				115	subdir[_subnode]["_source"] = os.path.join(
				116	path[start:],
				117	_file
				118	)
				119	except Exception:
				120	logger.warning(
				121	"Non-yaml file detected: {}".format(_file)
				122	)
Alex Savatieiev	d48994d	2018-12-13 12:13:00 +0100	[diff] [blame]	123	# creating dict structure out of folder list. Pure python magic
				124	parent = reduce(dict.get, folders[:-1], raw_tree)
				125	parent[folders[-1]] = subdir
Alex Savatieiev	4f149d0	2019-02-28 17:15:29 -0600	[diff] [blame]	126
				127	self.models[name] = {}
				128	# Brake in according to pathes
				129	_parts = self._model_parts.keys()
				130	_parts = sorted(_parts)
				131	for ii in range(0, len(_parts)):
				132	self.models[name][_parts[ii]] = pop_element(
				133	self._model_parts[_parts[ii]],
				134	raw_tree[root_key]
				135	)
				136
Alex Savatieiev	d48994d	2018-12-13 12:13:00 +0100	[diff] [blame]	137	# save it as a single data object
Alex Savatieiev	4f149d0	2019-02-28 17:15:29 -0600	[diff] [blame]	138	self.models[name]["all_diffs"] = raw_tree[root_key]
Alex Savatieiev	d48994d	2018-12-13 12:13:00 +0100	[diff] [blame]	139	return True
				140
Alex Savatieiev	4f149d0	2019-02-28 17:15:29 -0600	[diff] [blame]	141	def find_changes(self, dict1, dict2, path=""):
				142	_report = {}
				143	for k in dict1.keys():
				144	# yamls might load values as non-str types
				145	if not isinstance(k, str):
				146	_new_path = path + ":" + str(k)
				147	else:
				148	_new_path = path + ":" + k
				149	# ignore _source key
				150	if k == "_source":
				151	continue
				152	# check if this is an env name cluster entry
				153	if dict2 is not None and \
				154	k == self.model_name_1 and \
				155	self.model_name_2 in dict2.keys():
				156	k1 = self.model_name_1
				157	k2 = self.model_name_2
				158	if type(dict1[k1]) is dict:
				159	if path == "":
				160	_new_path = k1
				161	_child_report = self.find_changes(
				162	dict1[k1],
				163	dict2[k2],
				164	_new_path
				165	)
				166	_report.update(_child_report)
				167	elif dict2 is None or k not in dict2:
				168	# no key in dict2
				169	_report[_new_path] = {
				170	"type": "value",
				171	"raw_values": [dict1[k], "N/A"],
				172	"str_values": [
				173	"{}".format(dict1[k]),
				174	"n/a"
				175	]
				176	}
				177	logger.info(
				178	"{}: {}, {}".format(_new_path, dict1[k], "N/A")
				179	)
				180	else:
				181	if type(dict1[k]) is dict:
				182	if path == "":
				183	_new_path = k
				184	_child_report = self.find_changes(
				185	dict1[k],
				186	dict2[k],
				187	_new_path
				188	)
				189	_report.update(_child_report)
				190	elif type(dict1[k]) is list and type(dict2[k]) is list:
				191	# use ifilterfalse to compare lists of dicts
				192	try:
				193	_removed = list(
				194	itertools.ifilterfalse(
				195	lambda x: x in dict2[k],
				196	dict1[k]
				197	)
				198	)
				199	_added = list(
				200	itertools.ifilterfalse(
				201	lambda x: x in dict1[k],
				202	dict2[k]
				203	)
				204	)
				205	except TypeError as e:
				206	# debug routine,
				207	# should not happen, due to list check above
				208	logger.error(
				209	"Caught lambda type mismatch: {}".format(
				210	e.message
				211	)
				212	)
				213	logger_cli.warning(
				214	"Types mismatch for correct compare: "
				215	"{}, {}".format(
				216	type(dict1[k]),
				217	type(dict2[k])
				218	)
				219	)
				220	_removed = None
				221	_added = None
				222	_original = ["= {}".format(item) for item in dict1[k]]
				223	if _removed or _added:
				224	_removed_str_lst = ["- {}".format(item)
				225	for item in _removed]
				226	_added_str_lst = ["+ {}".format(item)
				227	for item in _added]
				228	_report[_new_path] = {
				229	"type": "list",
				230	"raw_values": [
				231	dict1[k],
				232	_removed_str_lst + _added_str_lst
				233	],
				234	"str_values": [
				235	"{}".format('\n'.join(_original)),
				236	"{}\n{}".format(
				237	'\n'.join(_removed_str_lst),
				238	'\n'.join(_added_str_lst)
				239	)
				240	]
				241	}
				242	logger.info(
				243	"{}:\n"
				244	"{} original items total".format(
				245	_new_path,
				246	len(dict1[k])
				247	)
				248	)
				249	if _removed:
				250	logger.info(
				251	"{}".format('\n'.join(_removed_str_lst))
				252	)
				253	if _added:
				254	logger.info(
				255	"{}".format('\n'.join(_added_str_lst))
				256	)
				257	else:
				258	# in case of type mismatch
				259	# considering it as not equal
				260	d1 = dict1
				261	d2 = dict2
				262	val1 = d1[k] if isinstance(d1, dict) else d1
				263	val2 = d2[k] if isinstance(d2, dict) else d2
				264	try:
				265	match = val1 == val2
				266	except TypeError as e:
				267	logger.warning(
				268	"One of the values is not a dict: "
				269	"{}, {}".format(
				270	str(dict1),
				271	str(dict2)
				272	))
				273	match = False
				274	if not match:
				275	_report[_new_path] = {
				276	"type": "value",
				277	"raw_values": [val1, val2],
				278	"str_values": [
				279	"{}".format(val1),
				280	"{}".format(val2)
				281	]
				282	}
				283	logger.info("{}: {}, {}".format(
				284	_new_path,
				285	val1,
				286	val2
				287	))
				288	return _report
				289
				290
Alex Savatieiev	d48994d	2018-12-13 12:13:00 +0100	[diff] [blame]	291	def generate_model_report_tree(self):
Alex Savatieiev	0137dad	2019-01-25 16:18:42 +0100	[diff] [blame]	292	"""Use two loaded models to generate comparison table with
Alex Savatieiev	d48994d	2018-12-13 12:13:00 +0100	[diff] [blame]	293	values are groupped by YAML files
				294	"""
Alex Savatieiev	4f149d0	2019-02-28 17:15:29 -0600	[diff] [blame]	295	# We are to cut both models into logical pieces
				296	# nodes, will not be equal most of the time
				297	# system, must be pretty much the same or we in trouble
				298	# cluster, will be the most curious part for comparison
				299	# other, all of the rest
Alex Savatieiev	36b938d	2019-01-21 11:01:18 +0100	[diff] [blame]	300
Alex Savatieiev	4f149d0	2019-02-28 17:15:29 -0600	[diff] [blame]	301	_diff_report = {}
				302	for _key in self._model_parts.keys():
				303	# tmp report for keys
				304	_tmp_diffs = self.find_changes(
				305	self.models[self.model_name_1][_key],
				306	self.models[self.model_name_2][_key]
				307	)
				308	# prettify the report
				309	for key in _tmp_diffs.keys():
				310	# break the key in two parts
				311	_ext = ".yml"
				312	if ".yaml" in key:
				313	_ext = ".yaml"
				314	_split = key.split(_ext)
				315	_file_path = _split[0]
				316	_param_path = "none"
				317	if len(_split) > 1:
				318	_param_path = _split[1]
				319	_tmp_diffs[key].update({
				320	"class_file": _file_path + _ext,
				321	"param": _param_path,
				322	})
				323	_diff_report[_key[3:]] = {
				324	"path": self._model_parts[_key],
				325	"diffs": _tmp_diffs
				326	}
				327
				328	_diff_report["diff_names"] = [self.model_name_1, self.model_name_2]
				329	return _diff_report
Alex Savatieiev	d48994d	2018-12-13 12:13:00 +0100	[diff] [blame]	330
Alex Savatieiev	c905571	2019-03-01 14:43:56 -0600	[diff] [blame]	331	def compare_models(self):
				332	# Do actual compare using model names from the class
				333	self.load_model_tree(
				334	self.model_name_1,
				335	self.model_path_1
				336	)
				337	self.load_model_tree(
				338	self.model_name_2,
				339	self.model_path_2
				340	)
				341	# Models should have similar structure to be compared
				342	# classes/system
				343	# classes/cluster
				344	# nodes
Alex Savatieiev	d48994d	2018-12-13 12:13:00 +0100	[diff] [blame]	345
Alex Savatieiev	c905571	2019-03-01 14:43:56 -0600	[diff] [blame]	346	diffs = self.generate_model_report_tree()
Alex Savatieiev	d48994d	2018-12-13 12:13:00 +0100	[diff] [blame]	347
Alex Savatieiev	c905571	2019-03-01 14:43:56 -0600	[diff] [blame]	348	report_file = \
				349	self.model_name_1 + "-vs-" + self.model_name_2 + ".html"
				350	# HTML report class is post-callable
				351	report = reporter.ReportToFile(
				352	reporter.HTMLModelCompare(),
				353	report_file
				354	)
				355	logger_cli.info("...generating report to {}".format(report_file))
				356	# report will have tabs for each of the comparable entities in diffs
				357	report({
				358	"nodes": {},
				359	"all_diffs": diffs,
				360	})
				361	# with open("./gen_tree.json", "w+") as _out:
				362	# _out.write(json.dumps(mComparer.generate_model_report_tree))
Alex Savatieiev	06ab17d	2019-02-26 18:40:48 -0600	[diff] [blame]	363
Alex Savatieiev	c905571	2019-03-01 14:43:56 -0600	[diff] [blame]	364	return