blob: 0559132fc19e1dfbad270992fb68aab183a375f3 [file] [log] [blame]
Alexe0c5b9e2019-04-23 18:51:23 -05001import json
Alex Savatieiev9b2f6512019-02-20 18:05:00 -06002import os
Alex1f90e7b2021-09-03 15:31:28 -05003import yaml
Alex3ebc5632019-04-18 16:47:18 -05004from copy import deepcopy
Alex9a4ad212020-10-01 18:04:25 -05005from multiprocessing.dummy import Pool
Alex1f90e7b2021-09-03 15:31:28 -05006from time import sleep
Alex Savatieiev9b2f6512019-02-20 18:05:00 -06007
Alex9a4ad212020-10-01 18:04:25 -05008from cfg_checker.clients import get_salt_remote, get_kube_remote
9from cfg_checker.common.const import all_salt_roles_map, all_kube_roles_map
Alexe9908f72020-05-19 16:04:53 -050010from cfg_checker.common.const import NODE_UP, NODE_DOWN, NODE_SKIP
Alex9a4ad212020-10-01 18:04:25 -050011from cfg_checker.common.const import ubuntu_versions, nova_openstack_versions
Alex7c9494e2019-04-22 10:40:59 -050012from cfg_checker.common import logger, logger_cli
Alexe0c5b9e2019-04-23 18:51:23 -050013from cfg_checker.common import utils
Alex9a4ad212020-10-01 18:04:25 -050014from cfg_checker.common.file_utils import create_temp_file_with_content
15from cfg_checker.common.exception import SaltException, KubeException
16from cfg_checker.common.ssh_utils import PortForward, SshShell
17from cfg_checker.common.settings import pkg_dir, ENV_TYPE_KUBE, ENV_TYPE_SALT
18from cfg_checker.helpers.console_utils import Progress
19
Alex Savatieiev9b2f6512019-02-20 18:05:00 -060020
21node_tmpl = {
22 'role': '',
23 'node_group': '',
Alexe9908f72020-05-19 16:04:53 -050024 'status': NODE_DOWN,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -060025 'pillars': {},
Alex9a4ad212020-10-01 18:04:25 -050026 'grains': {},
27 'raw': {}
Alex Savatieiev9b2f6512019-02-20 18:05:00 -060028}
29
30
Alex9a4ad212020-10-01 18:04:25 -050031def _prepare_skipped_nodes(_names, skip_list, skip_list_file):
32 _skipped_minions = []
33 # skip list file
34 if skip_list_file:
35 _valid, _invalid = utils.get_nodes_list(skip_list_file)
Alex9a4ad212020-10-01 18:04:25 -050036 _skipped_minions.extend(_valid)
Alex359e5752021-08-16 17:28:30 -050037 if len(_invalid) < 1:
38 logger_cli.info(
39 "\n# WARNING: Detected invalid entries "
40 "in nodes skip list:\n{}\n".format(
41 "\n".join(_invalid)
42 )
43 )
Alexe8643642021-08-23 14:08:46 -050044
Alex9a4ad212020-10-01 18:04:25 -050045 # process wildcard, create node list out of mask
46 if skip_list:
47 _list = []
48 _invalid = []
49 for _item in skip_list:
50 if '*' in _item:
51 _str = _item[:_item.index('*')]
52 _nodes = [_m for _m in _names if _m.startswith(_str)]
53 if not _nodes:
54 logger_cli.warn(
55 "# WARNING: No nodes found for {}".format(_item)
56 )
57 _list.extend(_nodes)
58 else:
59 if _item in _names:
60 _list += _item
61 else:
62 logger_cli.warn(
63 "# WARNING: No node found for {}".format(_item)
64 )
65 # removing duplicates
66 _list = list(set(_list))
67 _skipped_minions.extend(_list)
68
69 return _skipped_minions
70
71
72class Nodes(object):
73 def __init__(self, config):
74 self.nodes = None
75 self.env_config = config
76
77 def skip_node(self, node):
78 # Add node to skip list
79 # Fro example if it is fails to comply with the rules
80
81 # check if we know such node
82 if node in self.nodes.keys() and node not in self.skip_list:
83 # yes, add it
84 self.skip_list.append(node)
85 return True
86 else:
87 return False
88
89 def get_nodes(self, skip_list=None, skip_list_file=None):
90 if not self.nodes:
91 if not skip_list and self.env_config.skip_nodes:
92 self.gather_node_info(
93 self.env_config.skip_nodes,
94 skip_list_file
95 )
96 else:
97 self.gather_node_info(skip_list, skip_list_file)
98 return self.nodes
99
100 def get_info(self):
101 _info = {
102 'mcp_release': self.mcp_release,
103 'openstack_release': self.openstack_release
104 }
105 return _info
106
107 def is_node_available(self, node, log=True):
108 if node in self.skip_list:
109 if log:
110 logger_cli.info("-> node '{}' not active".format(node))
111 return False
112 elif node in self.not_responded:
113 if log:
114 logger_cli.info("-> node '{}' not responded".format(node))
115 return False
116 else:
117 return True
118
119
120class SaltNodes(Nodes):
121 def __init__(self, config):
122 super(SaltNodes, self).__init__(config)
Alexe0c5b9e2019-04-23 18:51:23 -0500123 logger_cli.info("# Gathering environment information")
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600124 # simple salt rest client
Alex9a4ad212020-10-01 18:04:25 -0500125 self.salt = None
126 self.env_type = ENV_TYPE_SALT
Alex3ebc5632019-04-18 16:47:18 -0500127
Alexe9908f72020-05-19 16:04:53 -0500128 def gather_node_info(self, skip_list, skip_list_file):
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600129 # Keys for all nodes
130 # this is not working in scope of 2016.8.3, will overide with list
Alexb151fbe2019-04-22 16:53:30 -0500131 logger_cli.debug("... collecting node names existing in the cloud")
Alexe0c5b9e2019-04-23 18:51:23 -0500132 if not self.salt:
Alex9a4ad212020-10-01 18:04:25 -0500133 self.salt = get_salt_remote(self.env_config)
Alexe0c5b9e2019-04-23 18:51:23 -0500134
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600135 try:
136 _keys = self.salt.list_keys()
137 _str = []
Alex3bc95f62020-03-05 17:00:04 -0600138 for _k, _v in _keys.items():
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600139 _str.append("{}: {}".format(_k, len(_v)))
140 logger_cli.info("-> keys collected: {}".format(", ".join(_str)))
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600141
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600142 self.node_keys = {
143 'minions': _keys['minions']
144 }
Alex3ebc5632019-04-18 16:47:18 -0500145 except Exception:
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600146 _keys = None
147 self.node_keys = None
Alex3ebc5632019-04-18 16:47:18 -0500148
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600149 # List of minions with grains
150 _minions = self.salt.list_minions()
151 if _minions:
Alex3ebc5632019-04-18 16:47:18 -0500152 logger_cli.info(
153 "-> api reported {} active minions".format(len(_minions))
154 )
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600155 elif not self.node_keys:
156 # this is the last resort
Alex9a4ad212020-10-01 18:04:25 -0500157 _minions = self.env_config.load_nodes_list()
Alex3ebc5632019-04-18 16:47:18 -0500158 logger_cli.info(
159 "-> {} nodes loaded from list file".format(len(_minions))
160 )
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600161 else:
162 _minions = self.node_keys['minions']
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600163
Alexe9908f72020-05-19 16:04:53 -0500164 # Skip nodes if needed
Alex9a4ad212020-10-01 18:04:25 -0500165 _skipped_minions = \
166 _prepare_skipped_nodes(_minions, skip_list, skip_list_file)
Alexe9908f72020-05-19 16:04:53 -0500167
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600168 # in case API not listed minions, we need all that answer ping
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600169 _active = self.salt.get_active_nodes()
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600170 logger_cli.info("-> nodes responded: {}".format(len(_active)))
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600171 # iterate through all accepted nodes and create a dict for it
172 self.nodes = {}
Alex Savatieievefa79c42019-03-14 19:14:04 -0500173 self.skip_list = []
Alexe9908f72020-05-19 16:04:53 -0500174 _domains = set()
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600175 for _name in _minions:
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600176 _nc = utils.get_node_code(_name)
Alex9a4ad212020-10-01 18:04:25 -0500177 _rmap = all_salt_roles_map
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600178 _role = _rmap[_nc] if _nc in _rmap else 'unknown'
Alexe9908f72020-05-19 16:04:53 -0500179 if _name in _skipped_minions:
180 _status = NODE_SKIP
Alex Savatieievefa79c42019-03-14 19:14:04 -0500181 self.skip_list.append(_name)
Alexe9908f72020-05-19 16:04:53 -0500182 else:
183 _status = NODE_UP if _name in _active else NODE_DOWN
184 if _status == NODE_DOWN:
185 self.skip_list.append(_name)
186 logger_cli.info(
187 "-> '{}' is down, "
188 "added to skip list".format(
189 _name
190 )
191 )
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600192 self.nodes[_name] = deepcopy(node_tmpl)
Alexe9908f72020-05-19 16:04:53 -0500193 self.nodes[_name]['shortname'] = _name.split(".", 1)[0]
194 _domains.add(_name.split(".", 1)[1])
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600195 self.nodes[_name]['node_group'] = _nc
196 self.nodes[_name]['role'] = _role
197 self.nodes[_name]['status'] = _status
Alexe9908f72020-05-19 16:04:53 -0500198 _domains = list(_domains)
199 if len(_domains) > 1:
200 logger_cli.warning(
201 "Multiple domains detected: {}".format(",".join(_domains))
202 )
Alex205546c2020-12-30 19:22:30 -0600203 # TODO: Use domain with biggest node count by default
204 # or force it via config option
Alexe9908f72020-05-19 16:04:53 -0500205 else:
206 self.domain = _domains[0]
Alex Savatieievefa79c42019-03-14 19:14:04 -0500207 logger_cli.info("-> {} nodes inactive".format(len(self.skip_list)))
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600208 logger_cli.info("-> {} nodes collected".format(len(self.nodes)))
209
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600210 # form an all nodes compound string to use in salt
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600211 self.active_nodes_compound = self.salt.compound_string_from_list(
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600212 filter(
Alexe9908f72020-05-19 16:04:53 -0500213 lambda nd: self.nodes[nd]['status'] == NODE_UP,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600214 self.nodes
215 )
216 )
Alex41485522019-04-12 17:26:18 -0500217 # get master node fqdn
Alex3bc95f62020-03-05 17:00:04 -0600218 # _filtered = filter(
219 # lambda nd: self.nodes[nd]['role'] == const.all_roles_map['cfg'],
220 # self.nodes
221 # )
Alex9a4ad212020-10-01 18:04:25 -0500222 _role = all_salt_roles_map['cfg']
Alex3bc95f62020-03-05 17:00:04 -0600223 _filtered = [n for n, v in self.nodes.items() if v['role'] == _role]
Alexe0c5b9e2019-04-23 18:51:23 -0500224 if len(_filtered) < 1:
225 raise SaltException(
226 "No master node detected! Check/Update node role map."
227 )
228 else:
229 self.salt.master_node = _filtered[0]
Alex3ebc5632019-04-18 16:47:18 -0500230
Alex41485522019-04-12 17:26:18 -0500231 # OpenStack versions
232 self.mcp_release = self.salt.pillar_get(
Alexe0c5b9e2019-04-23 18:51:23 -0500233 self.salt.master_node,
Alex41485522019-04-12 17:26:18 -0500234 "_param:apt_mk_version"
Alexe0c5b9e2019-04-23 18:51:23 -0500235 )[self.salt.master_node]
Alex41485522019-04-12 17:26:18 -0500236 self.openstack_release = self.salt.pillar_get(
Alexe0c5b9e2019-04-23 18:51:23 -0500237 self.salt.master_node,
Alex41485522019-04-12 17:26:18 -0500238 "_param:openstack_version"
Alexe0c5b9e2019-04-23 18:51:23 -0500239 )[self.salt.master_node]
Alexd0391d42019-05-21 18:48:55 -0500240 # Preload codenames
241 # do additional queries to get linux codename and arch for each node
242 self.get_specific_pillar_for_nodes("_param:linux_system_codename")
243 self.get_specific_pillar_for_nodes("_param:linux_system_architecture")
244 for _name in self.nodes.keys():
Alexe9547d82019-06-03 15:22:50 -0500245 _n = self.nodes[_name]
246 if _name not in self.skip_list:
247 _p = _n['pillars']['_param']
248 _n['linux_codename'] = _p['linux_system_codename']
249 _n['linux_arch'] = _p['linux_system_architecture']
Alex41485522019-04-12 17:26:18 -0500250
Alex1839bbf2019-08-22 17:17:21 -0500251 def get_cmd_for_nodes(self, cmd, target_key, target_dict=None, nodes=None):
Alex836fac82019-08-22 13:36:16 -0500252 """Function runs. cmd.run and parses result into place
253 or into dict structure provided
254
255 :return: no return value, data pulished internally
256 """
257 logger_cli.debug(
258 "... collecting results for '{}'".format(cmd)
259 )
260 if target_dict:
261 _nodes = target_dict
262 else:
263 _nodes = self.nodes
Alex1839bbf2019-08-22 17:17:21 -0500264 _result = self.execute_cmd_on_active_nodes(cmd, nodes=nodes)
Alex3bc95f62020-03-05 17:00:04 -0600265 for node, data in _nodes.items():
Alexf3dbe862019-10-07 15:17:04 -0500266
Alex836fac82019-08-22 13:36:16 -0500267 if node in self.skip_list:
268 logger_cli.debug(
269 "... '{}' skipped while collecting '{}'".format(
270 node,
271 cmd
272 )
273 )
274 continue
275 # Prepare target key
276 if target_key not in data:
277 data[target_key] = None
278 # Save data
Alexe9908f72020-05-19 16:04:53 -0500279 if data['status'] in [NODE_DOWN, NODE_SKIP]:
Alex836fac82019-08-22 13:36:16 -0500280 data[target_key] = None
Alex1839bbf2019-08-22 17:17:21 -0500281 elif node not in _result:
282 continue
Alex836fac82019-08-22 13:36:16 -0500283 elif not _result[node]:
284 logger_cli.debug(
285 "... '{}' not responded after '{}'".format(
286 node,
Alex9a4ad212020-10-01 18:04:25 -0500287 self.env_config.salt_timeout
Alex836fac82019-08-22 13:36:16 -0500288 )
289 )
290 data[target_key] = None
291 else:
292 data[target_key] = _result[node]
293
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600294 def get_specific_pillar_for_nodes(self, pillar_path):
295 """Function gets pillars on given path for all nodes
296
297 :return: no return value, data pulished internally
298 """
Alex3ebc5632019-04-18 16:47:18 -0500299 logger_cli.debug(
Alexb151fbe2019-04-22 16:53:30 -0500300 "... collecting node pillars for '{}'".format(pillar_path)
Alex3ebc5632019-04-18 16:47:18 -0500301 )
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600302 _result = self.salt.pillar_get(self.active_nodes_compound, pillar_path)
Alex Savatieievefa79c42019-03-14 19:14:04 -0500303 self.not_responded = []
Alex3bc95f62020-03-05 17:00:04 -0600304 for node, data in self.nodes.items():
Alex Savatieievefa79c42019-03-14 19:14:04 -0500305 if node in self.skip_list:
306 logger_cli.debug(
307 "... '{}' skipped while collecting '{}'".format(
308 node,
309 pillar_path
310 )
311 )
312 continue
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600313 _pillar_keys = pillar_path.split(':')
314 _data = data['pillars']
315 # pre-create nested dict
316 for idx in range(0, len(_pillar_keys)-1):
317 _key = _pillar_keys[idx]
318 if _key not in _data:
319 _data[_key] = {}
320 _data = _data[_key]
Alexe9908f72020-05-19 16:04:53 -0500321 if data['status'] in [NODE_DOWN, NODE_SKIP]:
Alex Savatieievefa79c42019-03-14 19:14:04 -0500322 _data[_pillar_keys[-1]] = None
323 elif not _result[node]:
324 logger_cli.debug(
325 "... '{}' not responded after '{}'".format(
326 node,
Alex9a4ad212020-10-01 18:04:25 -0500327 self.env_config.salt_timeout
Alex Savatieievefa79c42019-03-14 19:14:04 -0500328 )
329 )
330 _data[_pillar_keys[-1]] = None
331 self.not_responded.append(node)
332 else:
333 _data[_pillar_keys[-1]] = _result[node]
Alex3ebc5632019-04-18 16:47:18 -0500334
Alexe0c5b9e2019-04-23 18:51:23 -0500335 def prepare_json_on_node(self, node, _dict, filename):
Alex359e5752021-08-16 17:28:30 -0500336 if node in self.skip_list:
337 logger_cli.debug(
338 "... '{}' skipped while preparing json file of '{}'".format(
339 node,
340 filename
341 )
342 )
343
Alexe0c5b9e2019-04-23 18:51:23 -0500344 # this function assumes that all folders are created
345 _dumps = json.dumps(_dict, indent=2).splitlines()
346 _storage_path = os.path.join(
Alex9a4ad212020-10-01 18:04:25 -0500347 self.env_config.salt_file_root, self.env_config.salt_scripts_folder
Alexe0c5b9e2019-04-23 18:51:23 -0500348 )
349 logger_cli.debug(
350 "... uploading data as '{}' "
351 "to master's file cache folder: '{}'".format(
352 filename,
353 _storage_path
354 )
355 )
356 _cache_path = os.path.join(_storage_path, filename)
357 _source_path = os.path.join(
358 'salt://',
Alex9a4ad212020-10-01 18:04:25 -0500359 self.env_config.salt_scripts_folder,
Alexe0c5b9e2019-04-23 18:51:23 -0500360 filename
361 )
362 _target_path = os.path.join(
363 '/root',
Alex9a4ad212020-10-01 18:04:25 -0500364 self.env_config.salt_scripts_folder,
Alexe0c5b9e2019-04-23 18:51:23 -0500365 filename
366 )
367
368 logger_cli.debug("... creating file in cache '{}'".format(_cache_path))
369 self.salt.f_touch_master(_cache_path)
370 self.salt.f_append_master(_cache_path, _dumps)
371 logger.debug("... syncing file to '{}'".format(node))
372 self.salt.get_file(
373 node,
374 _source_path,
375 _target_path,
376 tgt_type="compound"
377 )
378 return _target_path
379
380 def prepare_script_on_active_nodes(self, script_filename):
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600381 # Prepare script
382 _p = os.path.join(pkg_dir, 'scripts', script_filename)
383 with open(_p, 'rt') as fd:
384 _script = fd.read().splitlines()
385 _storage_path = os.path.join(
Alex9a4ad212020-10-01 18:04:25 -0500386 self.env_config.salt_file_root, self.env_config.salt_scripts_folder
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600387 )
388 logger_cli.debug(
Alexb151fbe2019-04-22 16:53:30 -0500389 "... uploading script {} "
Alex3ebc5632019-04-18 16:47:18 -0500390 "to master's file cache folder: '{}'".format(
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600391 script_filename,
392 _storage_path
393 )
394 )
Alexe0c5b9e2019-04-23 18:51:23 -0500395 self.salt.mkdir(self.salt.master_node, _storage_path)
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600396 # Form cache, source and target path
397 _cache_path = os.path.join(_storage_path, script_filename)
398 _source_path = os.path.join(
399 'salt://',
Alex9a4ad212020-10-01 18:04:25 -0500400 self.env_config.salt_scripts_folder,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600401 script_filename
402 )
403 _target_path = os.path.join(
404 '/root',
Alex9a4ad212020-10-01 18:04:25 -0500405 self.env_config.salt_scripts_folder,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600406 script_filename
407 )
408
Alexb151fbe2019-04-22 16:53:30 -0500409 logger_cli.debug("... creating file in cache '{}'".format(_cache_path))
Alex3ebc5632019-04-18 16:47:18 -0500410 self.salt.f_touch_master(_cache_path)
411 self.salt.f_append_master(_cache_path, _script)
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600412 # command salt to copy file to minions
Alex3ebc5632019-04-18 16:47:18 -0500413 logger_cli.debug(
Alexb151fbe2019-04-22 16:53:30 -0500414 "... creating script target folder '{}'".format(
Alex3ebc5632019-04-18 16:47:18 -0500415 _cache_path
416 )
417 )
418 self.salt.mkdir(
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600419 self.active_nodes_compound,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600420 os.path.join(
421 '/root',
Alex9a4ad212020-10-01 18:04:25 -0500422 self.env_config.salt_scripts_folder
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600423 ),
424 tgt_type="compound"
425 )
Alex3ebc5632019-04-18 16:47:18 -0500426 logger.debug("... syncing file to nodes")
427 self.salt.get_file(
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600428 self.active_nodes_compound,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600429 _source_path,
430 _target_path,
431 tgt_type="compound"
432 )
Alexe0c5b9e2019-04-23 18:51:23 -0500433 # return path on nodes, just in case
434 return _target_path
435
436 def execute_script_on_node(self, node, script_filename, args=[]):
437 # Prepare path
438 _target_path = os.path.join(
439 '/root',
Alex9a4ad212020-10-01 18:04:25 -0500440 self.env_config.salt_scripts_folder,
Alexe0c5b9e2019-04-23 18:51:23 -0500441 script_filename
442 )
443
444 # execute script
445 logger.debug("... running script on '{}'".format(node))
446 # handle results for each node
447 _script_arguments = " ".join(args) if args else ""
448 self.not_responded = []
449 _r = self.salt.cmd(
450 node,
451 'cmd.run',
452 param='python {} {}'.format(_target_path, _script_arguments),
453 expr_form="compound"
454 )
455
456 # all false returns means that there is no response
457 self.not_responded = [_n for _n in _r.keys() if not _r[_n]]
458 return _r
459
Alex1f90e7b2021-09-03 15:31:28 -0500460 def execute_script_on_active_nodes(self, script_filename, args=None):
Alexe0c5b9e2019-04-23 18:51:23 -0500461 # Prepare path
462 _target_path = os.path.join(
463 '/root',
Alex9a4ad212020-10-01 18:04:25 -0500464 self.env_config.salt_scripts_folder,
Alexe0c5b9e2019-04-23 18:51:23 -0500465 script_filename
466 )
467
468 # execute script
Alexd0391d42019-05-21 18:48:55 -0500469 logger_cli.debug("... running script")
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600470 # handle results for each node
Alex1f90e7b2021-09-03 15:31:28 -0500471 _script_arguments = args if args else ""
Alex Savatieievefa79c42019-03-14 19:14:04 -0500472 self.not_responded = []
473 _r = self.salt.cmd(
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600474 self.active_nodes_compound,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600475 'cmd.run',
476 param='python {} {}'.format(_target_path, _script_arguments),
477 expr_form="compound"
478 )
479
Alex Savatieievefa79c42019-03-14 19:14:04 -0500480 # all false returns means that there is no response
Alex3ebc5632019-04-18 16:47:18 -0500481 self.not_responded = [_n for _n in _r.keys() if not _r[_n]]
Alex Savatieievefa79c42019-03-14 19:14:04 -0500482 return _r
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600483
Alex1839bbf2019-08-22 17:17:21 -0500484 def execute_cmd_on_active_nodes(self, cmd, nodes=None):
Alex836fac82019-08-22 13:36:16 -0500485 # execute cmd
486 self.not_responded = []
487 _r = self.salt.cmd(
Alex1839bbf2019-08-22 17:17:21 -0500488 nodes if nodes else self.active_nodes_compound,
Alex836fac82019-08-22 13:36:16 -0500489 'cmd.run',
490 param=cmd,
491 expr_form="compound"
492 )
493
494 # all false returns means that there is no response
495 self.not_responded = [_n for _n in _r.keys() if not _r[_n]]
496 return _r
497
Alex9a4ad212020-10-01 18:04:25 -0500498
499class KubeNodes(Nodes):
500 def __init__(self, config):
501 super(KubeNodes, self).__init__(config)
502 logger_cli.info("# Gathering environment information")
503 # simple salt rest client
504 self.kube = get_kube_remote(self.env_config)
505 self.env_type = ENV_TYPE_KUBE
Alex1f90e7b2021-09-03 15:31:28 -0500506 self._namespace = "qa-space"
507 self._configmap_name = self.env_config.kube_scripts_folder
508
509 # prepare needed resources
510 self._check_namespace()
511 self._scripts = self._check_config_map()
512
513 def _check_namespace(self):
514 # ensure namespace
515 logger_cli.debug(
516 "... checking namespace '{}'".format(self._namespace)
517 )
518 if not self.kube.ensure_namespace(self._namespace):
519 raise KubeException(
520 "Failed to manage namespace '{}'".format(self._namespace)
521 )
522
523 def _check_config_map(self):
524 # ensure config map exists
525 logger_cli.debug(
526 "... checking config map '{}'".format(self._configmap_name)
527 )
528 _source = os.path.join(pkg_dir, 'scripts')
529 return self.kube.create_config_map(
530 self._namespace,
531 self._configmap_name,
532 _source
533 )
Alex9a4ad212020-10-01 18:04:25 -0500534
535 def gather_node_info(self, skip_list, skip_list_file):
536 # Gather nodes info and query pod lists for each node
537 logger_cli.debug("... collecting node names existing in the cloud")
538
539 # Gather node names and info
540 _nodes = self.kube.get_node_info()
541 _node_names = list(_nodes.keys())
542 # Skip nodes if needed
543 _skipped_nodes = \
544 _prepare_skipped_nodes(_node_names, skip_list, skip_list_file)
545
546 # Count how many nodes active
547 self._active = [n for n, v in _nodes.items()
548 if v['conditions']['ready']['status']]
549
550 # iterate through all accepted nodes and create a dict for it
551 self.nodes = {}
552 self.skip_list = []
Alex9a4ad212020-10-01 18:04:25 -0500553 for _name in _node_names:
554 if _name in _skipped_nodes:
555 _status = NODE_SKIP
556 self.skip_list.append(_name)
557 else:
558 _status = NODE_UP if _name in self._active else NODE_DOWN
559 if _status == NODE_DOWN:
560 self.skip_list.append(_name)
561 logger_cli.info(
562 "-> '{}' shows 'Ready' as 'False', "
563 "added to skip list".format(
564 _name
565 )
566 )
567 _roles = {}
568 _labels = {}
569 for _label, _value in _nodes[_name]['labels'].items():
570 if _label in all_kube_roles_map:
571 _roles[all_kube_roles_map[_label]] = _value
572 else:
573 _labels[_label] = _value
574
575 self.nodes[_name] = deepcopy(node_tmpl)
576 self.nodes[_name].pop("grains")
577 self.nodes[_name].pop("pillars")
578
579 # hostname
580 self.nodes[_name]['shortname'] = \
581 _nodes[_name]['addresses']['hostname']['address']
582 self.nodes[_name]['internalip'] = \
583 _nodes[_name]['addresses']['internalip']['address']
Alex9a4ad212020-10-01 18:04:25 -0500584 self.nodes[_name]['node_group'] = None
585 self.nodes[_name]['labels'] = _labels
586 self.nodes[_name]['roles'] = _roles
587 self.nodes[_name]['status'] = _status
588 # Backward compatibility
589 _info = _nodes[_name]['status']['node_info']
590 self.nodes[_name]['linux_image'] = _info['os_image']
591 self.nodes[_name]['linux_arch'] = _info['architecture']
592
593 _codename = "unknown"
594 _n, _v, _c = _info['os_image'].split()
595 if _n.lower() == 'ubuntu':
596 _v, _, _ = _v.rpartition('.') if '.' in _v else (_v, "", "")
597 if _v in ubuntu_versions:
598 _codename = ubuntu_versions[_v].split()[0].lower()
599 self.nodes[_name]['linux_codename'] = _codename
600
601 # Consider per-data type transfer
602 self.nodes[_name]["raw"] = _nodes[_name]
603 # TODO: Investigate how to handle domains in Kube, probably - skip
604 # _domains = list(_domains)
605 # if len(_domains) > 1:
606 # logger_cli.warning(
607 # "Multiple domains detected: {}".format(",".join(_domains))
608 # )
609 # else:
Alex1f90e7b2021-09-03 15:31:28 -0500610 self.domain = "no.domain.in.kube.yet"
Alex9a4ad212020-10-01 18:04:25 -0500611 logger_cli.info(
612 "-> {} nodes collected: {} - active, {} - not active".format(
613 len(self.nodes),
614 len(self._active),
615 len(self.skip_list)
616 )
617 )
618
619 _role = "k8s-master"
620 _filtered = [n for n, v in self.nodes.items() if _role in v['roles']]
621 if len(_filtered) < 1:
622 raise KubeException(
623 "No k8s-master nodes detected! Check/Update node role map."
624 )
Alex Savatieievefa79c42019-03-14 19:14:04 -0500625 else:
Alex9a4ad212020-10-01 18:04:25 -0500626 _r = [n for n, v in self.nodes.items()
627 if v['status'] != NODE_UP and _role in v['roles']]
628 if len(_r) > 0:
629 logger_cli.warn(
630 "Master nodes are reporting 'NotReady:\n{}".format(
631 "\n".join(_r)
632 )
633 )
634 self.kube.master_node = _filtered[0]
Alexe0c5b9e2019-04-23 18:51:23 -0500635
Alex9a4ad212020-10-01 18:04:25 -0500636 # get specific data upfront
637 # OpenStack versions
638 self.mcp_release = ""
639 # Quick and Dirty way to detect OS release
Alexccb72e02021-01-20 16:38:03 -0600640 try:
641 _nova_version = self.kube.exec_on_target_pod(
642 "nova-manage --version",
643 "nova-api-osapi",
644 "openstack"
645 )
646 _nmajor = _nova_version.partition('.')[0]
647 self.openstack_release = nova_openstack_versions[_nmajor]
648 except KubeException as e:
649 logger_cli.warn("Openstack not detected: {}".format(e.message))
650 self.openstack_release = nova_openstack_versions["00"]
Alexe0c5b9e2019-04-23 18:51:23 -0500651
Alex9a4ad212020-10-01 18:04:25 -0500652 return
653
654 @staticmethod
Alex1f90e7b2021-09-03 15:31:28 -0500655 def _get_ssh_shell(_h, _u, _k, _p, _q, _pipe, timeout=15):
Alex9a4ad212020-10-01 18:04:25 -0500656 _ssh = SshShell(
657 _h,
658 user=_u,
659 keypath=_k,
660 port=_p,
661 silent=_q,
Alex1f90e7b2021-09-03 15:31:28 -0500662 piped=_pipe,
663 timeout=timeout
Alex9a4ad212020-10-01 18:04:25 -0500664 )
665 return _ssh.connect()
666
667 @staticmethod
Alex1f90e7b2021-09-03 15:31:28 -0500668 def _do_ssh_cmd(_cmd, _h, _u, _k, _p, _q, _pipe, timeout=None):
Alex9a4ad212020-10-01 18:04:25 -0500669 with SshShell(
670 _h,
671 user=_u,
672 keypath=_k,
673 port=_p,
674 silent=_q,
675 piped=_pipe
676 ) as ssh:
Alex1f90e7b2021-09-03 15:31:28 -0500677 if timeout is None:
678 _r = ssh.do(_cmd)
679 else:
680 _r = ssh.do(_cmd, timeout=timeout)
Alex9a4ad212020-10-01 18:04:25 -0500681 logger_cli.debug("'{}'".format(_r))
682 return _r
683
684 def node_shell(
685 self,
686 node,
687 silent=True,
688 piped=True,
689 use_sudo=True,
690 fport=None
691 ):
692 _u = self.env_config.kube_node_user
693 _k = self.env_config.kube_node_keypath
694 _h = self.nodes[node]['internalip']
695 _p = 22
Alexeffa0682021-06-04 12:18:33 -0500696 if self.kube.is_local or self.kube.config.ssh_direct:
Alex1f90e7b2021-09-03 15:31:28 -0500697 return None,
698 self._get_ssh_shell(
699 _h, _u, _k, _p, silent, piped,
700 timeout=self.kube.config.ssh_connect_timeout
701 )
Alex9a4ad212020-10-01 18:04:25 -0500702 else:
703 _fh = "localhost"
704 _p = 10022 if not fport else fport
705 _pfwd = PortForward(
706 self.env_config.ssh_host,
707 _h,
708 user=_u,
709 keypath=self.env_config.ssh_key,
Alex1f90e7b2021-09-03 15:31:28 -0500710 loc_port=_p,
711 timeout=self.kube.config.ssh_connect_timeout
Alex9a4ad212020-10-01 18:04:25 -0500712 )
713 _pfwd.connect()
Alex1f90e7b2021-09-03 15:31:28 -0500714 _ssh = self._get_ssh_shell(
715 _fh,
716 _u,
717 _k,
718 _p,
719 silent,
720 piped,
721 timeout=self.kube.config.ssh_connect_timeout
722 )
Alex9a4ad212020-10-01 18:04:25 -0500723 return _pfwd, _ssh
724
725 def execute_script_on_node(self, node, script_filename, args=[]):
726 # Prepare path
727 _target_path = os.path.join(
Alexccb72e02021-01-20 16:38:03 -0600728 self.env_config.kube_node_homepath,
Alex9a4ad212020-10-01 18:04:25 -0500729 self.env_config.kube_scripts_folder,
730 script_filename
731 )
732
733 # execute script
734 logger_cli.debug("... running script on '{}'".format(node))
735 # handle results for each node
736 _script_arguments = " ".join(args) if args else ""
737 self.not_responded = []
738 # get result
739 _nr = self.node_shell(
740 node,
741 "python {} {}".format(
742 _target_path,
743 _script_arguments
744 )
745 )
746
747 if not _nr:
748 self.not_responded.append(node)
749 return {}
750 else:
751 return {node: _nr}
752
753 def execute_cmd_on_active_nodes(self, cmd, nodes=None):
754 # execute script
755 logger_cli.debug("...running '{}' on active nodes".format(cmd))
756 # handle results for each node
757 self.not_responded = []
758 _r = {}
759 # TODO: Use threading and pool
760 for node in self._active:
761 _nr = self.node_shell(
762 node,
763 cmd
764 )
765
766 if not _nr:
767 self.not_responded.append(node)
768 else:
769 _r[node] = _nr
770
771 return _r
772
Alex1f90e7b2021-09-03 15:31:28 -0500773 def _ssh_exec_script(self, params):
Alex9a4ad212020-10-01 18:04:25 -0500774 """
775 Threadsafe method to get shell to node,
776 check/copy script and get results
777 [
778 node_name,
779 src_path,
780 tgt_path,
781 conf,
782 args
783 ]
784 """
Alex1f90e7b2021-09-03 15:31:28 -0500785 _timeout = self.kube.config.script_execution_timeout
Alex9a4ad212020-10-01 18:04:25 -0500786 _name = params[0]
787 _src = params[1]
788 _tgt = params[2]
789 _conf = params[3]
790 _args = params[4]
791 _port = params[5]
792 _log_name = "["+_name+"]:"
793 _check = "echo $(if [[ -s '{}' ]]; then echo True; " \
794 "else echo False; fi)"
795 _fwd_sh, _sh = self.node_shell(
796 _name,
797 use_sudo=False,
798 fport=_port
799 )
800 # check python3
801 _python = _sh.do("which python3")
802 _python = utils.to_bool(
803 _sh.do(_check.format(_python))
804 )
805 if not _python:
Alex1f90e7b2021-09-03 15:31:28 -0500806 _sh.do("apt install python3", sudo=True, timeout=_timeout)
Alex9a4ad212020-10-01 18:04:25 -0500807 # check if script already there
808 _folder = os.path.join(
Alexccb72e02021-01-20 16:38:03 -0600809 self.env_config.kube_node_homepath,
Alex9a4ad212020-10-01 18:04:25 -0500810 _conf.kube_scripts_folder
811 )
812 # check if folder exists
813 _folder_exists = utils.to_bool(
814 _sh.do(_check.format(_folder))
815 )
816 if not _folder_exists:
817 _sh.do("mkdir " + _folder)
818 logger.info("{} Syncing file".format(_log_name))
819 _code, _r, _e = _sh.scp(
820 _src,
821 _sh.get_host_path(_tgt),
822 )
823 # handle error code
824 if _code:
825 logger_cli.warn(
826 "{} Error in scp:\n"
827 "\tstdout:'{}'\n"
828 "\tstderr:'{}'".format(_log_name, _r, _e)
829 )
830
831 # execute script
832 logger.debug("{} Running script".format(_log_name))
833 _out = _sh.do(
834 "python3 {}{}".format(
835 _tgt,
836 _args
837 ),
Alex1f90e7b2021-09-03 15:31:28 -0500838 sudo=True,
839 timeout=_timeout
Alex9a4ad212020-10-01 18:04:25 -0500840 )
841
842 if _fwd_sh:
843 _fwd_sh.kill()
844 _sh.kill()
845
846 return [_name, _out]
847
Alex1f90e7b2021-09-03 15:31:28 -0500848 def execute_script_on_active_nodes(self, script_filename, args=None):
Alex9a4ad212020-10-01 18:04:25 -0500849 # Prepare script
850 _source_path = os.path.join(pkg_dir, 'scripts', script_filename)
851 _target_path = os.path.join(
Alexccb72e02021-01-20 16:38:03 -0600852 self.env_config.kube_node_homepath,
Alex9a4ad212020-10-01 18:04:25 -0500853 self.env_config.kube_scripts_folder,
854 script_filename
855 )
856 # handle results for each node
857 _script_arguments = " ".join(args) if args else ""
858 if _script_arguments:
859 _script_arguments = " " + _script_arguments
860 self.not_responded = []
861 _results = {}
862 logger_cli.debug(
Alexc4f59622021-08-27 13:42:00 -0500863 "... running '{}' on active nodes, {} worker threads".format(
Alex9a4ad212020-10-01 18:04:25 -0500864 script_filename,
865 self.env_config.threads
866 )
867 )
868 # Workers pool
869 pool = Pool(self.env_config.threads)
870
871 # init the parameters
872 # node_name,
873 # src_path,
874 # tgt_path,
875 # conf,
876 # args
877 _params = []
878 _port = 10022
879 for node in self._active:
880 # build parameter blocks
881 _p_list = [
882 node,
883 _source_path,
884 _target_path,
885 self.env_config,
886 _script_arguments,
887 _port
888 ]
889 _params.append(_p_list)
890 _port += 1
891
892 _progress = Progress(len(_params))
Alex1f90e7b2021-09-03 15:31:28 -0500893 results = pool.imap_unordered(self._ssh_exec_script, _params)
Alex9a4ad212020-10-01 18:04:25 -0500894
895 for ii in enumerate(results, start=1):
896 if not ii[1][1]:
897 self.not_responded.append(ii[1][0])
898 else:
899 _results[ii[1][0]] = ii[1][1]
900 _progress.write_progress(ii[0])
901
902 _progress.end()
903 pool.close()
904 pool.join()
905
906 # return path on nodes, just in case
907 return _results
908
909 def prepare_json_on_node(self, node, _dict, filename):
910 # this function assumes that all folders are created
911 _dumps = json.dumps(_dict, indent=2).splitlines()
912
913 _source_path = create_temp_file_with_content(_dumps)
914 _target_path = os.path.join(
Alexccb72e02021-01-20 16:38:03 -0600915 self.env_config.kube_node_homepath,
Alex9a4ad212020-10-01 18:04:25 -0500916 self.env_config.kube_scripts_folder,
917 filename
918 )
919 _folder = os.path.join(
Alexccb72e02021-01-20 16:38:03 -0600920 self.env_config.kube_node_homepath,
Alex9a4ad212020-10-01 18:04:25 -0500921 self.env_config.kube_scripts_folder
922 )
923 _check = "echo $(if [[ -s '{}' ]]; then echo True; " \
924 "else echo False; fi)"
925 _fwd_sh, _sh = self.node_shell(
926 node,
927 use_sudo=False
928 )
929
930 # check if folder exists
931 _folder_exists = utils.to_bool(
932 _sh.do(_check.format(_folder))
933 )
934 if not _folder_exists:
935 _sh.do("mkdir " + _folder)
936 logger_cli.debug(
Alexc4f59622021-08-27 13:42:00 -0500937 "... create data on node '{}':'{}'".format(node, _target_path)
Alex9a4ad212020-10-01 18:04:25 -0500938 )
939 _code, _r, _e = _sh.scp(
940 _source_path,
941 _sh.get_host_path(_target_path),
942 )
943 # handle error code
944 if _code:
945 logger_cli.warn(
946 "Error in scp:\n"
947 "\tstdout:'{}'\n"
948 "\tstderr:'{}'".format(_r, _e)
949 )
950
951 _fwd_sh.kill()
952 _sh.kill()
953 return _target_path
Alex1f90e7b2021-09-03 15:31:28 -0500954
955 def prepare_daemonset(self, template_filename, config_map=None):
956 # load template
957 _yaml_file = os.path.join(pkg_dir, 'templates', template_filename)
958 logger_cli.debug("... loading template '{}'".format(_yaml_file))
959 _ds = {}
960 with open(_yaml_file) as dsFile:
961 _ds = yaml.load(dsFile, Loader=yaml.SafeLoader)
962
963 # Add scripts to pod template as volumeMounts
964 _tspec = _ds['spec']['template']['spec']
965 _tspec['containers'][0]['volumeMounts'] = [
966 {
967 "name": "scripts",
968 "mountPath": os.path.join(
969 "/",
970 self.env_config.kube_scripts_folder
971 )
972 }
973 ]
974
975 _tspec['volumes'] = [
976 {
977 "name": "scripts",
978 "configMap": {
979 "name": self._configmap_name
980 }
981 }
982 ]
983
984 # create daemonset
985 logger_cli.debug("... preparing daemonset")
986 return self.kube.prepare_daemonset_from_yaml(self._namespace, _ds)
987
988 def wait_for_daemonset(self, ds, timeout=120):
989 # iteration timeout
990 _sleep_time = 5
991 _timeout = timeout
992
993 # query daemonset and check that desired=scheduled=ready
994 _ds = self.kube.get_daemon_set_by_name(
995 ds.metadata.namespace,
996 ds.metadata.name
997 )
998
999 _total = len(self.nodes)
1000 # _scheduled = _ds.status.scheduled
1001 # _ready = _ds.status.ready
1002
1003 # Init Progress bar to show daemonset readiness
1004 _progress = Progress(_total)
1005 while _timeout > 0:
1006 # get new status
1007 _ds = self.kube.get_daemon_set_by_name(
1008 ds.metadata.namespace,
1009 ds.metadata.name
1010 )
1011 _desired = _ds.status.desired_number_scheduled
1012 _scheduled = _ds.status.current_number_scheduled
1013 _ready = _ds.status.number_ready
1014 _updated = _ds.status.updated_number_scheduled
1015 # print it
1016 _progress.write_progress(
1017 _ready,
1018 note="desired: {}, scheduled: {}, ready: {},"
1019 " up-to-date: {}".format(
1020 _desired,
1021 _scheduled,
1022 _ready,
1023 _updated
1024 )
1025 )
1026
1027 # check values and return
1028 # In case of Update, also checking _updated value
1029 if _ready == _updated and _ready == _total:
1030 # close progress bar class
1031 _progress.end()
1032 logger_cli.debug("... daemonset is ready")
1033 return True
1034 # iterate
1035 _timeout -= _sleep_time
1036 # wait
1037 sleep(_sleep_time)
1038
1039 # timed out
1040 _progress.end()
1041 # log it
1042 logger_cli.error("Timed out waiting for Daemonset to be ready")
1043 return False
1044
1045 def execute_script_on_daemon_set(self, ds, script_filename, args=None):
1046 """
1047 Query daemonset for pods and execute script on all of them
1048 """
1049 def _kube_exec_on_pod(plist):
1050 return [
1051 plist[1], # node
1052 plist[3], # pod name
1053 plist[0].kube.exec_on_target_pod( # pointer to function
1054 plist[4], # cmd
1055 plist[3], # pod name
1056 plist[2], # namespace
1057 strict=True,
1058 _request_timeout=120,
1059 )
1060 ]
1061
1062 # get all pod names
1063 logger_cli.debug("... extracting pod names from daemonset")
1064 _pods = self.kube.CoreV1.list_namespaced_pod(
1065 namespace=ds.metadata.namespace,
1066 label_selector='name={}'.format(ds.metadata.name)
1067 )
1068 # Create map for threads: [[node_name, ns, pod_name, cmd]...]
1069 logger_cli.debug(
1070 "... runnning script on {} pods using {} threads at a time".format(
1071 len(_pods.items),
1072 self.env_config.threads
1073 )
1074 )
1075 _plist = []
1076 _arguments = args if args else ""
1077 _cmd = [
1078 "python3",
1079 os.path.join(
1080 "/",
1081 self.env_config.kube_scripts_folder,
1082 script_filename
1083 ),
1084 _arguments
1085 ]
1086 _cmd = " ".join(_cmd)
1087 for item in _pods.items:
1088 _plist.append(
1089 [
1090 self,
1091 item.spec.node_name,
1092 item.metadata.namespace,
1093 item.metadata.name,
1094 _cmd
1095 ]
1096 )
1097
1098 # map func and cmd
1099 logger_cli
1100 pool = Pool(self.env_config.threads)
1101 _results = {}
1102 self.not_responded = []
1103 # create result list
1104 _progress = Progress(len(_plist))
1105 ret = pool.imap_unordered(_kube_exec_on_pod, _plist)
1106
1107 for ii in enumerate(ret, start=1):
1108 if not ii[1][1]:
1109 self.not_responded.append(ii[1][0])
1110 else:
1111 _results[ii[1][0]] = ii[1][2]
1112 _progress.write_progress(ii[0])
1113
1114 _progress.end()
1115 pool.close()
1116 pool.join()
1117 logger_cli.debug(
1118 "... done, {} total outputs; {} not responded".format(
1119 len(_results),
1120 len(self.not_responded)
1121 )
1122 )
1123 return _results
1124
1125 def delete_daemonset(self, ds):
1126 # Try to delete daemonset
1127 try:
1128 _r = self.kube.delete_daemon_set_by_name(
1129 ds.metadata.namespace,
1130 ds.metadata.name
1131 )
1132 except Exception as e:
1133 logger_cli.warning("Failed to delete daemonset '{}': {}".format(
1134 ds.metadata.name,
1135 e.reason
1136 ))
1137 _r = None
1138 return _r