Alex | 0989ecf | 2022-03-29 13:43:21 -0500 | [diff] [blame] | 1 | # Author: Alex Savatieiev (osavatieiev@mirantis.com; a.savex@gmail.com) |
| 2 | # Copyright 2019-2022 Mirantis, Inc. |
Alex Savatieiev | d48994d | 2018-12-13 12:13:00 +0100 | [diff] [blame] | 3 | """Model Comparer: |
| 4 | - yaml parser |
| 5 | - class tree comparison |
| 6 | """ |
| 7 | import itertools |
Alex Savatieiev | d48994d | 2018-12-13 12:13:00 +0100 | [diff] [blame] | 8 | import os |
Alex | 3ebc563 | 2019-04-18 16:47:18 -0500 | [diff] [blame] | 9 | |
Alex | 3bc95f6 | 2020-03-05 17:00:04 -0600 | [diff] [blame] | 10 | from functools import reduce |
| 11 | |
Alex | 3ebc563 | 2019-04-18 16:47:18 -0500 | [diff] [blame] | 12 | from cfg_checker.common import logger, logger_cli |
Alex | 3ebc563 | 2019-04-18 16:47:18 -0500 | [diff] [blame] | 13 | |
Alex Savatieiev | d48994d | 2018-12-13 12:13:00 +0100 | [diff] [blame] | 14 | import yaml |
| 15 | |
Alex Savatieiev | d48994d | 2018-12-13 12:13:00 +0100 | [diff] [blame] | 16 | |
Alex | 3ebc563 | 2019-04-18 16:47:18 -0500 | [diff] [blame] | 17 | def get_element(element_path, input_data): |
Alex Savatieiev | 4f149d0 | 2019-02-28 17:15:29 -0600 | [diff] [blame] | 18 | paths = element_path.split(":") |
| 19 | data = input_data |
| 20 | for i in range(0, len(paths)): |
| 21 | data = data[paths[i]] |
| 22 | return data |
| 23 | |
| 24 | |
Alex | 3ebc563 | 2019-04-18 16:47:18 -0500 | [diff] [blame] | 25 | def pop_element(element_path, input_data): |
Alex Savatieiev | 4f149d0 | 2019-02-28 17:15:29 -0600 | [diff] [blame] | 26 | paths = element_path.split(":") |
| 27 | data = input_data |
| 28 | # Search for last dict |
| 29 | for i in range(0, len(paths)-1): |
| 30 | data = data[paths[i]] |
| 31 | # pop the actual element |
| 32 | return data.pop(paths[-1]) |
| 33 | |
| 34 | |
Alex Savatieiev | d48994d | 2018-12-13 12:13:00 +0100 | [diff] [blame] | 35 | class ModelComparer(object): |
| 36 | """Collection of functions to compare model data. |
| 37 | """ |
Alex Savatieiev | 4f149d0 | 2019-02-28 17:15:29 -0600 | [diff] [blame] | 38 | # key order is important |
| 39 | _model_parts = { |
| 40 | "01_nodes": "nodes", |
| 41 | "02_system": "classes:system", |
| 42 | "03_cluster": "classes:cluster", |
| 43 | "04_other": "classes" |
| 44 | } |
Alex | 3ebc563 | 2019-04-18 16:47:18 -0500 | [diff] [blame] | 45 | |
Alex Savatieiev | d48994d | 2018-12-13 12:13:00 +0100 | [diff] [blame] | 46 | models = {} |
Alex Savatieiev | 06ab17d | 2019-02-26 18:40:48 -0600 | [diff] [blame] | 47 | models_path = "/srv/salt/reclass" |
| 48 | model_name_1 = "source" |
| 49 | model_path_1 = os.path.join(models_path, model_name_1) |
| 50 | model_name_2 = "target" |
| 51 | model_path_2 = os.path.join(models_path, model_name_1) |
Alex Savatieiev | d48994d | 2018-12-13 12:13:00 +0100 | [diff] [blame] | 52 | |
| 53 | @staticmethod |
| 54 | def load_yaml_class(fname): |
| 55 | """Loads a yaml from the file and forms a tree item |
| 56 | |
| 57 | Arguments: |
| 58 | fname {string} -- full path to the yaml file |
| 59 | """ |
| 60 | _yaml = {} |
| 61 | try: |
| 62 | _size = 0 |
| 63 | with open(fname, 'r') as f: |
Alex | b8af13a | 2019-04-16 18:38:12 -0500 | [diff] [blame] | 64 | _yaml = yaml.load(f, Loader=yaml.FullLoader) |
Alex Savatieiev | d48994d | 2018-12-13 12:13:00 +0100 | [diff] [blame] | 65 | _size = f.tell() |
| 66 | # TODO: do smth with the data |
| 67 | if not _yaml: |
Alex | 1839bbf | 2019-08-22 17:17:21 -0500 | [diff] [blame] | 68 | # logger.warning("WARN: empty file '{}'".format(fname)) |
Alex Savatieiev | d48994d | 2018-12-13 12:13:00 +0100 | [diff] [blame] | 69 | _yaml = {} |
| 70 | else: |
Alex | c4f5962 | 2021-08-27 13:42:00 -0500 | [diff] [blame] | 71 | logger.debug("... loaded YAML '{}' ({}b)".format(fname, _size)) |
Alex Savatieiev | d48994d | 2018-12-13 12:13:00 +0100 | [diff] [blame] | 72 | return _yaml |
| 73 | except yaml.YAMLError as exc: |
| 74 | logger_cli.error(exc) |
| 75 | except IOError as e: |
| 76 | logger_cli.error( |
| 77 | "Error loading file '{}': {}".format(fname, e.message) |
| 78 | ) |
| 79 | raise Exception("CRITICAL: Failed to load YAML data: {}".format( |
Alex Savatieiev | 36b938d | 2019-01-21 11:01:18 +0100 | [diff] [blame] | 80 | e.message + e.strerror |
Alex Savatieiev | d48994d | 2018-12-13 12:13:00 +0100 | [diff] [blame] | 81 | )) |
| 82 | |
| 83 | def load_model_tree(self, name, root_path="/srv/salt/reclass"): |
| 84 | """Walks supplied path for the YAML filed and loads the tree |
| 85 | |
| 86 | Arguments: |
| 87 | root_folder_path {string} -- Path to Model's root folder. Optional |
| 88 | """ |
Alex Savatieiev | 42b89fa | 2019-03-07 18:45:26 -0600 | [diff] [blame] | 89 | logger_cli.info("# Loading reclass tree from '{}'".format(root_path)) |
Alex Savatieiev | d48994d | 2018-12-13 12:13:00 +0100 | [diff] [blame] | 90 | # prepare the file tree to walk |
| 91 | raw_tree = {} |
| 92 | # Credits to Andrew Clark@MIT. Original code is here: |
| 93 | # http://code.activestate.com/recipes/577879-create-a-nested-dictionary-from-oswalk/ |
| 94 | root_path = root_path.rstrip(os.sep) |
| 95 | start = root_path.rfind(os.sep) + 1 |
| 96 | root_key = root_path.rsplit(os.sep, 1)[1] |
| 97 | # Look Ma! I am walking the file tree with no recursion! |
| 98 | for path, dirs, files in os.walk(root_path): |
| 99 | # if this is a hidden folder, ignore it |
Alex Savatieiev | 06ab17d | 2019-02-26 18:40:48 -0600 | [diff] [blame] | 100 | _folders_list = path[start:].split(os.sep) |
| 101 | if any(item.startswith(".") for item in _folders_list): |
Alex Savatieiev | d48994d | 2018-12-13 12:13:00 +0100 | [diff] [blame] | 102 | continue |
| 103 | # cut absolute part of the path and split folder names |
| 104 | folders = path[start:].split(os.sep) |
| 105 | subdir = {} |
| 106 | # create generator of files that are not hidden |
Alex Savatieiev | 36b938d | 2019-01-21 11:01:18 +0100 | [diff] [blame] | 107 | _exts = ('.yml', '.yaml') |
Alex Savatieiev | 06ab17d | 2019-02-26 18:40:48 -0600 | [diff] [blame] | 108 | _subfiles = (_fl for _fl in files |
| 109 | if _fl.endswith(_exts) and not _fl.startswith('.')) |
Alex Savatieiev | d48994d | 2018-12-13 12:13:00 +0100 | [diff] [blame] | 110 | for _file in _subfiles: |
| 111 | # cut file extension. All reclass files are '.yml' |
| 112 | _subnode = _file |
| 113 | # load all YAML class data into the tree |
| 114 | subdir[_subnode] = self.load_yaml_class( |
| 115 | os.path.join(path, _file) |
| 116 | ) |
Alex Savatieiev | 36b938d | 2019-01-21 11:01:18 +0100 | [diff] [blame] | 117 | try: |
| 118 | # Save original filepath, just in case |
| 119 | subdir[_subnode]["_source"] = os.path.join( |
| 120 | path[start:], |
| 121 | _file |
| 122 | ) |
| 123 | except Exception: |
| 124 | logger.warning( |
| 125 | "Non-yaml file detected: {}".format(_file) |
| 126 | ) |
Alex Savatieiev | d48994d | 2018-12-13 12:13:00 +0100 | [diff] [blame] | 127 | # creating dict structure out of folder list. Pure python magic |
| 128 | parent = reduce(dict.get, folders[:-1], raw_tree) |
| 129 | parent[folders[-1]] = subdir |
Alex | 3ebc563 | 2019-04-18 16:47:18 -0500 | [diff] [blame] | 130 | |
Alex Savatieiev | 4f149d0 | 2019-02-28 17:15:29 -0600 | [diff] [blame] | 131 | self.models[name] = {} |
| 132 | # Brake in according to pathes |
| 133 | _parts = self._model_parts.keys() |
| 134 | _parts = sorted(_parts) |
| 135 | for ii in range(0, len(_parts)): |
| 136 | self.models[name][_parts[ii]] = pop_element( |
| 137 | self._model_parts[_parts[ii]], |
| 138 | raw_tree[root_key] |
| 139 | ) |
Alex | 3ebc563 | 2019-04-18 16:47:18 -0500 | [diff] [blame] | 140 | |
Alex Savatieiev | d48994d | 2018-12-13 12:13:00 +0100 | [diff] [blame] | 141 | # save it as a single data object |
Alex Savatieiev | 3db12a7 | 2019-03-22 16:32:31 -0500 | [diff] [blame] | 142 | self.models[name]["rc_diffs"] = raw_tree[root_key] |
Alex Savatieiev | d48994d | 2018-12-13 12:13:00 +0100 | [diff] [blame] | 143 | return True |
| 144 | |
Alex Savatieiev | 4f149d0 | 2019-02-28 17:15:29 -0600 | [diff] [blame] | 145 | def find_changes(self, dict1, dict2, path=""): |
| 146 | _report = {} |
| 147 | for k in dict1.keys(): |
| 148 | # yamls might load values as non-str types |
| 149 | if not isinstance(k, str): |
| 150 | _new_path = path + ":" + str(k) |
| 151 | else: |
| 152 | _new_path = path + ":" + k |
| 153 | # ignore _source key |
| 154 | if k == "_source": |
| 155 | continue |
Alex | e9908f7 | 2020-05-19 16:04:53 -0500 | [diff] [blame] | 156 | # ignore secrets and other secure stuff |
Alex | 1839bbf | 2019-08-22 17:17:21 -0500 | [diff] [blame] | 157 | if isinstance(k, str) and k == "secrets.yml": |
| 158 | continue |
| 159 | if isinstance(k, str) and k.find("_password") > 0: |
| 160 | continue |
Alex | e9908f7 | 2020-05-19 16:04:53 -0500 | [diff] [blame] | 161 | if isinstance(k, str) and k.find("_key") > 0: |
| 162 | continue |
| 163 | if isinstance(k, str) and k.find("_token") > 0: |
| 164 | continue |
Alex Savatieiev | 4f149d0 | 2019-02-28 17:15:29 -0600 | [diff] [blame] | 165 | # check if this is an env name cluster entry |
| 166 | if dict2 is not None and \ |
| 167 | k == self.model_name_1 and \ |
| 168 | self.model_name_2 in dict2.keys(): |
| 169 | k1 = self.model_name_1 |
| 170 | k2 = self.model_name_2 |
| 171 | if type(dict1[k1]) is dict: |
| 172 | if path == "": |
| 173 | _new_path = k1 |
| 174 | _child_report = self.find_changes( |
| 175 | dict1[k1], |
| 176 | dict2[k2], |
| 177 | _new_path |
| 178 | ) |
| 179 | _report.update(_child_report) |
| 180 | elif dict2 is None or k not in dict2: |
| 181 | # no key in dict2 |
| 182 | _report[_new_path] = { |
| 183 | "type": "value", |
| 184 | "raw_values": [dict1[k], "N/A"], |
| 185 | "str_values": [ |
| 186 | "{}".format(dict1[k]), |
| 187 | "n/a" |
| 188 | ] |
| 189 | } |
| 190 | logger.info( |
| 191 | "{}: {}, {}".format(_new_path, dict1[k], "N/A") |
| 192 | ) |
| 193 | else: |
| 194 | if type(dict1[k]) is dict: |
| 195 | if path == "": |
| 196 | _new_path = k |
| 197 | _child_report = self.find_changes( |
| 198 | dict1[k], |
| 199 | dict2[k], |
| 200 | _new_path |
| 201 | ) |
| 202 | _report.update(_child_report) |
| 203 | elif type(dict1[k]) is list and type(dict2[k]) is list: |
| 204 | # use ifilterfalse to compare lists of dicts |
| 205 | try: |
| 206 | _removed = list( |
Alex | 3bc95f6 | 2020-03-05 17:00:04 -0600 | [diff] [blame] | 207 | itertools.filterfalse( |
Alex Savatieiev | 4f149d0 | 2019-02-28 17:15:29 -0600 | [diff] [blame] | 208 | lambda x: x in dict2[k], |
| 209 | dict1[k] |
| 210 | ) |
| 211 | ) |
| 212 | _added = list( |
Alex | 3bc95f6 | 2020-03-05 17:00:04 -0600 | [diff] [blame] | 213 | itertools.filterfalse( |
Alex Savatieiev | 4f149d0 | 2019-02-28 17:15:29 -0600 | [diff] [blame] | 214 | lambda x: x in dict1[k], |
| 215 | dict2[k] |
| 216 | ) |
| 217 | ) |
| 218 | except TypeError as e: |
| 219 | # debug routine, |
| 220 | # should not happen, due to list check above |
| 221 | logger.error( |
| 222 | "Caught lambda type mismatch: {}".format( |
| 223 | e.message |
| 224 | ) |
| 225 | ) |
| 226 | logger_cli.warning( |
| 227 | "Types mismatch for correct compare: " |
| 228 | "{}, {}".format( |
| 229 | type(dict1[k]), |
| 230 | type(dict2[k]) |
| 231 | ) |
| 232 | ) |
| 233 | _removed = None |
| 234 | _added = None |
| 235 | _original = ["= {}".format(item) for item in dict1[k]] |
| 236 | if _removed or _added: |
| 237 | _removed_str_lst = ["- {}".format(item) |
| 238 | for item in _removed] |
Alex | 3ebc563 | 2019-04-18 16:47:18 -0500 | [diff] [blame] | 239 | _added_str_lst = ["+ {}".format(i) for i in _added] |
Alex Savatieiev | 4f149d0 | 2019-02-28 17:15:29 -0600 | [diff] [blame] | 240 | _report[_new_path] = { |
| 241 | "type": "list", |
| 242 | "raw_values": [ |
| 243 | dict1[k], |
| 244 | _removed_str_lst + _added_str_lst |
| 245 | ], |
| 246 | "str_values": [ |
| 247 | "{}".format('\n'.join(_original)), |
| 248 | "{}\n{}".format( |
| 249 | '\n'.join(_removed_str_lst), |
| 250 | '\n'.join(_added_str_lst) |
| 251 | ) |
| 252 | ] |
| 253 | } |
| 254 | logger.info( |
| 255 | "{}:\n" |
| 256 | "{} original items total".format( |
| 257 | _new_path, |
| 258 | len(dict1[k]) |
| 259 | ) |
| 260 | ) |
| 261 | if _removed: |
| 262 | logger.info( |
| 263 | "{}".format('\n'.join(_removed_str_lst)) |
| 264 | ) |
| 265 | if _added: |
| 266 | logger.info( |
| 267 | "{}".format('\n'.join(_added_str_lst)) |
| 268 | ) |
| 269 | else: |
| 270 | # in case of type mismatch |
| 271 | # considering it as not equal |
| 272 | d1 = dict1 |
| 273 | d2 = dict2 |
| 274 | val1 = d1[k] if isinstance(d1, dict) else d1 |
| 275 | val2 = d2[k] if isinstance(d2, dict) else d2 |
| 276 | try: |
| 277 | match = val1 == val2 |
| 278 | except TypeError as e: |
| 279 | logger.warning( |
| 280 | "One of the values is not a dict: " |
Alex | 3bc95f6 | 2020-03-05 17:00:04 -0600 | [diff] [blame] | 281 | "{}, {}; {}".format( |
Alex Savatieiev | 4f149d0 | 2019-02-28 17:15:29 -0600 | [diff] [blame] | 282 | str(dict1), |
Alex | 3bc95f6 | 2020-03-05 17:00:04 -0600 | [diff] [blame] | 283 | str(dict2), |
| 284 | e.message |
Alex Savatieiev | 4f149d0 | 2019-02-28 17:15:29 -0600 | [diff] [blame] | 285 | )) |
| 286 | match = False |
| 287 | if not match: |
| 288 | _report[_new_path] = { |
| 289 | "type": "value", |
| 290 | "raw_values": [val1, val2], |
| 291 | "str_values": [ |
| 292 | "{}".format(val1), |
| 293 | "{}".format(val2) |
| 294 | ] |
| 295 | } |
| 296 | logger.info("{}: {}, {}".format( |
| 297 | _new_path, |
| 298 | val1, |
| 299 | val2 |
| 300 | )) |
| 301 | return _report |
| 302 | |
Alex Savatieiev | d48994d | 2018-12-13 12:13:00 +0100 | [diff] [blame] | 303 | def generate_model_report_tree(self): |
Alex Savatieiev | 0137dad | 2019-01-25 16:18:42 +0100 | [diff] [blame] | 304 | """Use two loaded models to generate comparison table with |
Alex Savatieiev | d48994d | 2018-12-13 12:13:00 +0100 | [diff] [blame] | 305 | values are groupped by YAML files |
| 306 | """ |
Alex Savatieiev | 4f149d0 | 2019-02-28 17:15:29 -0600 | [diff] [blame] | 307 | # We are to cut both models into logical pieces |
| 308 | # nodes, will not be equal most of the time |
| 309 | # system, must be pretty much the same or we in trouble |
| 310 | # cluster, will be the most curious part for comparison |
| 311 | # other, all of the rest |
Alex Savatieiev | 36b938d | 2019-01-21 11:01:18 +0100 | [diff] [blame] | 312 | |
Alex Savatieiev | 4f149d0 | 2019-02-28 17:15:29 -0600 | [diff] [blame] | 313 | _diff_report = {} |
| 314 | for _key in self._model_parts.keys(): |
| 315 | # tmp report for keys |
| 316 | _tmp_diffs = self.find_changes( |
| 317 | self.models[self.model_name_1][_key], |
| 318 | self.models[self.model_name_2][_key] |
| 319 | ) |
| 320 | # prettify the report |
| 321 | for key in _tmp_diffs.keys(): |
| 322 | # break the key in two parts |
| 323 | _ext = ".yml" |
| 324 | if ".yaml" in key: |
| 325 | _ext = ".yaml" |
| 326 | _split = key.split(_ext) |
| 327 | _file_path = _split[0] |
| 328 | _param_path = "none" |
| 329 | if len(_split) > 1: |
| 330 | _param_path = _split[1] |
| 331 | _tmp_diffs[key].update({ |
| 332 | "class_file": _file_path + _ext, |
| 333 | "param": _param_path, |
| 334 | }) |
| 335 | _diff_report[_key[3:]] = { |
| 336 | "path": self._model_parts[_key], |
| 337 | "diffs": _tmp_diffs |
| 338 | } |
| 339 | |
| 340 | _diff_report["diff_names"] = [self.model_name_1, self.model_name_2] |
| 341 | return _diff_report |