blob: eb2ebd8e4839b6296c3051868a21540999fe6578 [file] [log] [blame]
Alex0989ecf2022-03-29 13:43:21 -05001# Author: Alex Savatieiev (osavatieiev@mirantis.com; a.savex@gmail.com)
2# Copyright 2019-2022 Mirantis, Inc.
Alexe0c5b9e2019-04-23 18:51:23 -05003import json
Alex Savatieiev9b2f6512019-02-20 18:05:00 -06004import os
Alex1f90e7b2021-09-03 15:31:28 -05005import yaml
Alex3ebc5632019-04-18 16:47:18 -05006from copy import deepcopy
Alex9a4ad212020-10-01 18:04:25 -05007from multiprocessing.dummy import Pool
Alex1f90e7b2021-09-03 15:31:28 -05008from time import sleep
Alex Savatieiev9b2f6512019-02-20 18:05:00 -06009
Alex9a4ad212020-10-01 18:04:25 -050010from cfg_checker.clients import get_salt_remote, get_kube_remote
11from cfg_checker.common.const import all_salt_roles_map, all_kube_roles_map
Alexe9908f72020-05-19 16:04:53 -050012from cfg_checker.common.const import NODE_UP, NODE_DOWN, NODE_SKIP
Alex9a4ad212020-10-01 18:04:25 -050013from cfg_checker.common.const import ubuntu_versions, nova_openstack_versions
Alex7c9494e2019-04-22 10:40:59 -050014from cfg_checker.common import logger, logger_cli
Alexe0c5b9e2019-04-23 18:51:23 -050015from cfg_checker.common import utils
Alex9a4ad212020-10-01 18:04:25 -050016from cfg_checker.common.file_utils import create_temp_file_with_content
17from cfg_checker.common.exception import SaltException, KubeException
18from cfg_checker.common.ssh_utils import PortForward, SshShell
19from cfg_checker.common.settings import pkg_dir, ENV_TYPE_KUBE, ENV_TYPE_SALT
20from cfg_checker.helpers.console_utils import Progress
21
Alex Savatieiev9b2f6512019-02-20 18:05:00 -060022
23node_tmpl = {
24 'role': '',
25 'node_group': '',
Alexe9908f72020-05-19 16:04:53 -050026 'status': NODE_DOWN,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -060027 'pillars': {},
Alex9a4ad212020-10-01 18:04:25 -050028 'grains': {},
29 'raw': {}
Alex Savatieiev9b2f6512019-02-20 18:05:00 -060030}
31
32
Alex9a4ad212020-10-01 18:04:25 -050033def _prepare_skipped_nodes(_names, skip_list, skip_list_file):
34 _skipped_minions = []
35 # skip list file
36 if skip_list_file:
37 _valid, _invalid = utils.get_nodes_list(skip_list_file)
Alex9a4ad212020-10-01 18:04:25 -050038 _skipped_minions.extend(_valid)
Alex359e5752021-08-16 17:28:30 -050039 if len(_invalid) < 1:
40 logger_cli.info(
41 "\n# WARNING: Detected invalid entries "
42 "in nodes skip list:\n{}\n".format(
43 "\n".join(_invalid)
44 )
45 )
Alexe8643642021-08-23 14:08:46 -050046
Alex9a4ad212020-10-01 18:04:25 -050047 # process wildcard, create node list out of mask
48 if skip_list:
49 _list = []
50 _invalid = []
51 for _item in skip_list:
52 if '*' in _item:
53 _str = _item[:_item.index('*')]
54 _nodes = [_m for _m in _names if _m.startswith(_str)]
55 if not _nodes:
56 logger_cli.warn(
57 "# WARNING: No nodes found for {}".format(_item)
58 )
59 _list.extend(_nodes)
60 else:
61 if _item in _names:
62 _list += _item
63 else:
64 logger_cli.warn(
65 "# WARNING: No node found for {}".format(_item)
66 )
67 # removing duplicates
68 _list = list(set(_list))
69 _skipped_minions.extend(_list)
70
71 return _skipped_minions
72
73
74class Nodes(object):
75 def __init__(self, config):
76 self.nodes = None
77 self.env_config = config
78
79 def skip_node(self, node):
80 # Add node to skip list
81 # Fro example if it is fails to comply with the rules
82
83 # check if we know such node
84 if node in self.nodes.keys() and node not in self.skip_list:
85 # yes, add it
86 self.skip_list.append(node)
87 return True
88 else:
89 return False
90
91 def get_nodes(self, skip_list=None, skip_list_file=None):
92 if not self.nodes:
93 if not skip_list and self.env_config.skip_nodes:
94 self.gather_node_info(
95 self.env_config.skip_nodes,
96 skip_list_file
97 )
98 else:
99 self.gather_node_info(skip_list, skip_list_file)
100 return self.nodes
101
102 def get_info(self):
103 _info = {
Ievgeniia Zadorozhna33d5c9a2025-09-04 17:36:55 +0200104 'mcp_release': self.mcp_release,
105 'openstack_release': self.openstack_release,
106 'k0rdent_release': self.kube.get_k0rdent_release()
Alex9a4ad212020-10-01 18:04:25 -0500107 }
108 return _info
109
110 def is_node_available(self, node, log=True):
111 if node in self.skip_list:
112 if log:
113 logger_cli.info("-> node '{}' not active".format(node))
114 return False
115 elif node in self.not_responded:
116 if log:
117 logger_cli.info("-> node '{}' not responded".format(node))
118 return False
119 else:
120 return True
121
122
123class SaltNodes(Nodes):
124 def __init__(self, config):
125 super(SaltNodes, self).__init__(config)
Alexe0c5b9e2019-04-23 18:51:23 -0500126 logger_cli.info("# Gathering environment information")
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600127 # simple salt rest client
Alex9a4ad212020-10-01 18:04:25 -0500128 self.salt = None
129 self.env_type = ENV_TYPE_SALT
Alex3ebc5632019-04-18 16:47:18 -0500130
Alexe9908f72020-05-19 16:04:53 -0500131 def gather_node_info(self, skip_list, skip_list_file):
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600132 # Keys for all nodes
133 # this is not working in scope of 2016.8.3, will overide with list
Alexb151fbe2019-04-22 16:53:30 -0500134 logger_cli.debug("... collecting node names existing in the cloud")
Alexe0c5b9e2019-04-23 18:51:23 -0500135 if not self.salt:
Alex9a4ad212020-10-01 18:04:25 -0500136 self.salt = get_salt_remote(self.env_config)
Alexe0c5b9e2019-04-23 18:51:23 -0500137
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600138 try:
139 _keys = self.salt.list_keys()
140 _str = []
Alex3bc95f62020-03-05 17:00:04 -0600141 for _k, _v in _keys.items():
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600142 _str.append("{}: {}".format(_k, len(_v)))
143 logger_cli.info("-> keys collected: {}".format(", ".join(_str)))
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600144
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600145 self.node_keys = {
146 'minions': _keys['minions']
147 }
Alex3ebc5632019-04-18 16:47:18 -0500148 except Exception:
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600149 _keys = None
150 self.node_keys = None
Alex3ebc5632019-04-18 16:47:18 -0500151
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600152 # List of minions with grains
153 _minions = self.salt.list_minions()
154 if _minions:
Alex3ebc5632019-04-18 16:47:18 -0500155 logger_cli.info(
156 "-> api reported {} active minions".format(len(_minions))
157 )
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600158 elif not self.node_keys:
159 # this is the last resort
Alex9a4ad212020-10-01 18:04:25 -0500160 _minions = self.env_config.load_nodes_list()
Alex3ebc5632019-04-18 16:47:18 -0500161 logger_cli.info(
162 "-> {} nodes loaded from list file".format(len(_minions))
163 )
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600164 else:
165 _minions = self.node_keys['minions']
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600166
Alexe9908f72020-05-19 16:04:53 -0500167 # Skip nodes if needed
Alex9a4ad212020-10-01 18:04:25 -0500168 _skipped_minions = \
169 _prepare_skipped_nodes(_minions, skip_list, skip_list_file)
Alexe9908f72020-05-19 16:04:53 -0500170
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600171 # in case API not listed minions, we need all that answer ping
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600172 _active = self.salt.get_active_nodes()
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600173 logger_cli.info("-> nodes responded: {}".format(len(_active)))
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600174 # iterate through all accepted nodes and create a dict for it
175 self.nodes = {}
Alex Savatieievefa79c42019-03-14 19:14:04 -0500176 self.skip_list = []
Alexe9908f72020-05-19 16:04:53 -0500177 _domains = set()
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600178 for _name in _minions:
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600179 _nc = utils.get_node_code(_name)
Alex9a4ad212020-10-01 18:04:25 -0500180 _rmap = all_salt_roles_map
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600181 _role = _rmap[_nc] if _nc in _rmap else 'unknown'
Alexe9908f72020-05-19 16:04:53 -0500182 if _name in _skipped_minions:
183 _status = NODE_SKIP
Alex Savatieievefa79c42019-03-14 19:14:04 -0500184 self.skip_list.append(_name)
Alexe9908f72020-05-19 16:04:53 -0500185 else:
186 _status = NODE_UP if _name in _active else NODE_DOWN
187 if _status == NODE_DOWN:
188 self.skip_list.append(_name)
189 logger_cli.info(
190 "-> '{}' is down, "
191 "added to skip list".format(
192 _name
193 )
194 )
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600195 self.nodes[_name] = deepcopy(node_tmpl)
Alexe9908f72020-05-19 16:04:53 -0500196 self.nodes[_name]['shortname'] = _name.split(".", 1)[0]
197 _domains.add(_name.split(".", 1)[1])
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600198 self.nodes[_name]['node_group'] = _nc
199 self.nodes[_name]['role'] = _role
200 self.nodes[_name]['status'] = _status
Alexe9908f72020-05-19 16:04:53 -0500201 _domains = list(_domains)
202 if len(_domains) > 1:
203 logger_cli.warning(
204 "Multiple domains detected: {}".format(",".join(_domains))
205 )
Alex205546c2020-12-30 19:22:30 -0600206 # TODO: Use domain with biggest node count by default
207 # or force it via config option
Alexe9908f72020-05-19 16:04:53 -0500208 else:
209 self.domain = _domains[0]
Alex Savatieievefa79c42019-03-14 19:14:04 -0500210 logger_cli.info("-> {} nodes inactive".format(len(self.skip_list)))
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600211 logger_cli.info("-> {} nodes collected".format(len(self.nodes)))
212
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600213 # form an all nodes compound string to use in salt
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600214 self.active_nodes_compound = self.salt.compound_string_from_list(
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600215 filter(
Alexe9908f72020-05-19 16:04:53 -0500216 lambda nd: self.nodes[nd]['status'] == NODE_UP,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600217 self.nodes
218 )
219 )
Alex41485522019-04-12 17:26:18 -0500220 # get master node fqdn
Alex3bc95f62020-03-05 17:00:04 -0600221 # _filtered = filter(
222 # lambda nd: self.nodes[nd]['role'] == const.all_roles_map['cfg'],
223 # self.nodes
224 # )
Alex9a4ad212020-10-01 18:04:25 -0500225 _role = all_salt_roles_map['cfg']
Alex3bc95f62020-03-05 17:00:04 -0600226 _filtered = [n for n, v in self.nodes.items() if v['role'] == _role]
Alexe0c5b9e2019-04-23 18:51:23 -0500227 if len(_filtered) < 1:
228 raise SaltException(
229 "No master node detected! Check/Update node role map."
230 )
231 else:
232 self.salt.master_node = _filtered[0]
Alex3ebc5632019-04-18 16:47:18 -0500233
Alex41485522019-04-12 17:26:18 -0500234 # OpenStack versions
235 self.mcp_release = self.salt.pillar_get(
Alexe0c5b9e2019-04-23 18:51:23 -0500236 self.salt.master_node,
Alex41485522019-04-12 17:26:18 -0500237 "_param:apt_mk_version"
Alexe0c5b9e2019-04-23 18:51:23 -0500238 )[self.salt.master_node]
Alex41485522019-04-12 17:26:18 -0500239 self.openstack_release = self.salt.pillar_get(
Alexe0c5b9e2019-04-23 18:51:23 -0500240 self.salt.master_node,
Alex41485522019-04-12 17:26:18 -0500241 "_param:openstack_version"
Alexe0c5b9e2019-04-23 18:51:23 -0500242 )[self.salt.master_node]
Alexd0391d42019-05-21 18:48:55 -0500243 # Preload codenames
244 # do additional queries to get linux codename and arch for each node
245 self.get_specific_pillar_for_nodes("_param:linux_system_codename")
246 self.get_specific_pillar_for_nodes("_param:linux_system_architecture")
247 for _name in self.nodes.keys():
Alexe9547d82019-06-03 15:22:50 -0500248 _n = self.nodes[_name]
249 if _name not in self.skip_list:
250 _p = _n['pillars']['_param']
251 _n['linux_codename'] = _p['linux_system_codename']
252 _n['linux_arch'] = _p['linux_system_architecture']
Alex41485522019-04-12 17:26:18 -0500253
Alex1839bbf2019-08-22 17:17:21 -0500254 def get_cmd_for_nodes(self, cmd, target_key, target_dict=None, nodes=None):
Alex836fac82019-08-22 13:36:16 -0500255 """Function runs. cmd.run and parses result into place
256 or into dict structure provided
257
258 :return: no return value, data pulished internally
259 """
260 logger_cli.debug(
261 "... collecting results for '{}'".format(cmd)
262 )
263 if target_dict:
264 _nodes = target_dict
265 else:
266 _nodes = self.nodes
Alex1839bbf2019-08-22 17:17:21 -0500267 _result = self.execute_cmd_on_active_nodes(cmd, nodes=nodes)
Alex3bc95f62020-03-05 17:00:04 -0600268 for node, data in _nodes.items():
Alexf3dbe862019-10-07 15:17:04 -0500269
Alex836fac82019-08-22 13:36:16 -0500270 if node in self.skip_list:
271 logger_cli.debug(
272 "... '{}' skipped while collecting '{}'".format(
273 node,
274 cmd
275 )
276 )
277 continue
278 # Prepare target key
279 if target_key not in data:
280 data[target_key] = None
281 # Save data
Alexe9908f72020-05-19 16:04:53 -0500282 if data['status'] in [NODE_DOWN, NODE_SKIP]:
Alex836fac82019-08-22 13:36:16 -0500283 data[target_key] = None
Alex1839bbf2019-08-22 17:17:21 -0500284 elif node not in _result:
285 continue
Alex836fac82019-08-22 13:36:16 -0500286 elif not _result[node]:
287 logger_cli.debug(
288 "... '{}' not responded after '{}'".format(
289 node,
Alex9a4ad212020-10-01 18:04:25 -0500290 self.env_config.salt_timeout
Alex836fac82019-08-22 13:36:16 -0500291 )
292 )
293 data[target_key] = None
294 else:
295 data[target_key] = _result[node]
296
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600297 def get_specific_pillar_for_nodes(self, pillar_path):
298 """Function gets pillars on given path for all nodes
299
300 :return: no return value, data pulished internally
301 """
Alex3ebc5632019-04-18 16:47:18 -0500302 logger_cli.debug(
Alexb151fbe2019-04-22 16:53:30 -0500303 "... collecting node pillars for '{}'".format(pillar_path)
Alex3ebc5632019-04-18 16:47:18 -0500304 )
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600305 _result = self.salt.pillar_get(self.active_nodes_compound, pillar_path)
Alex Savatieievefa79c42019-03-14 19:14:04 -0500306 self.not_responded = []
Alex3bc95f62020-03-05 17:00:04 -0600307 for node, data in self.nodes.items():
Alex Savatieievefa79c42019-03-14 19:14:04 -0500308 if node in self.skip_list:
309 logger_cli.debug(
310 "... '{}' skipped while collecting '{}'".format(
311 node,
312 pillar_path
313 )
314 )
315 continue
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600316 _pillar_keys = pillar_path.split(':')
317 _data = data['pillars']
318 # pre-create nested dict
319 for idx in range(0, len(_pillar_keys)-1):
320 _key = _pillar_keys[idx]
321 if _key not in _data:
322 _data[_key] = {}
323 _data = _data[_key]
Alexe9908f72020-05-19 16:04:53 -0500324 if data['status'] in [NODE_DOWN, NODE_SKIP]:
Alex Savatieievefa79c42019-03-14 19:14:04 -0500325 _data[_pillar_keys[-1]] = None
326 elif not _result[node]:
327 logger_cli.debug(
328 "... '{}' not responded after '{}'".format(
329 node,
Alex9a4ad212020-10-01 18:04:25 -0500330 self.env_config.salt_timeout
Alex Savatieievefa79c42019-03-14 19:14:04 -0500331 )
332 )
333 _data[_pillar_keys[-1]] = None
334 self.not_responded.append(node)
335 else:
336 _data[_pillar_keys[-1]] = _result[node]
Alex3ebc5632019-04-18 16:47:18 -0500337
Alexe0c5b9e2019-04-23 18:51:23 -0500338 def prepare_json_on_node(self, node, _dict, filename):
Alex359e5752021-08-16 17:28:30 -0500339 if node in self.skip_list:
340 logger_cli.debug(
341 "... '{}' skipped while preparing json file of '{}'".format(
342 node,
343 filename
344 )
345 )
346
Alexe0c5b9e2019-04-23 18:51:23 -0500347 # this function assumes that all folders are created
348 _dumps = json.dumps(_dict, indent=2).splitlines()
349 _storage_path = os.path.join(
Alex9a4ad212020-10-01 18:04:25 -0500350 self.env_config.salt_file_root, self.env_config.salt_scripts_folder
Alexe0c5b9e2019-04-23 18:51:23 -0500351 )
352 logger_cli.debug(
353 "... uploading data as '{}' "
354 "to master's file cache folder: '{}'".format(
355 filename,
356 _storage_path
357 )
358 )
359 _cache_path = os.path.join(_storage_path, filename)
360 _source_path = os.path.join(
361 'salt://',
Alex9a4ad212020-10-01 18:04:25 -0500362 self.env_config.salt_scripts_folder,
Alexe0c5b9e2019-04-23 18:51:23 -0500363 filename
364 )
365 _target_path = os.path.join(
366 '/root',
Alex9a4ad212020-10-01 18:04:25 -0500367 self.env_config.salt_scripts_folder,
Alexe0c5b9e2019-04-23 18:51:23 -0500368 filename
369 )
370
371 logger_cli.debug("... creating file in cache '{}'".format(_cache_path))
372 self.salt.f_touch_master(_cache_path)
373 self.salt.f_append_master(_cache_path, _dumps)
374 logger.debug("... syncing file to '{}'".format(node))
375 self.salt.get_file(
376 node,
377 _source_path,
378 _target_path,
379 tgt_type="compound"
380 )
381 return _target_path
382
383 def prepare_script_on_active_nodes(self, script_filename):
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600384 # Prepare script
385 _p = os.path.join(pkg_dir, 'scripts', script_filename)
386 with open(_p, 'rt') as fd:
387 _script = fd.read().splitlines()
388 _storage_path = os.path.join(
Alex9a4ad212020-10-01 18:04:25 -0500389 self.env_config.salt_file_root, self.env_config.salt_scripts_folder
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600390 )
391 logger_cli.debug(
Alexb151fbe2019-04-22 16:53:30 -0500392 "... uploading script {} "
Alex3ebc5632019-04-18 16:47:18 -0500393 "to master's file cache folder: '{}'".format(
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600394 script_filename,
395 _storage_path
396 )
397 )
Alexe0c5b9e2019-04-23 18:51:23 -0500398 self.salt.mkdir(self.salt.master_node, _storage_path)
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600399 # Form cache, source and target path
400 _cache_path = os.path.join(_storage_path, script_filename)
401 _source_path = os.path.join(
402 'salt://',
Alex9a4ad212020-10-01 18:04:25 -0500403 self.env_config.salt_scripts_folder,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600404 script_filename
405 )
406 _target_path = os.path.join(
407 '/root',
Alex9a4ad212020-10-01 18:04:25 -0500408 self.env_config.salt_scripts_folder,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600409 script_filename
410 )
411
Alexb151fbe2019-04-22 16:53:30 -0500412 logger_cli.debug("... creating file in cache '{}'".format(_cache_path))
Alex3ebc5632019-04-18 16:47:18 -0500413 self.salt.f_touch_master(_cache_path)
414 self.salt.f_append_master(_cache_path, _script)
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600415 # command salt to copy file to minions
Alex3ebc5632019-04-18 16:47:18 -0500416 logger_cli.debug(
Alexb151fbe2019-04-22 16:53:30 -0500417 "... creating script target folder '{}'".format(
Alex3ebc5632019-04-18 16:47:18 -0500418 _cache_path
419 )
420 )
421 self.salt.mkdir(
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600422 self.active_nodes_compound,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600423 os.path.join(
424 '/root',
Alex9a4ad212020-10-01 18:04:25 -0500425 self.env_config.salt_scripts_folder
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600426 ),
427 tgt_type="compound"
428 )
Alex3ebc5632019-04-18 16:47:18 -0500429 logger.debug("... syncing file to nodes")
430 self.salt.get_file(
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600431 self.active_nodes_compound,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600432 _source_path,
433 _target_path,
434 tgt_type="compound"
435 )
Alexe0c5b9e2019-04-23 18:51:23 -0500436 # return path on nodes, just in case
437 return _target_path
438
439 def execute_script_on_node(self, node, script_filename, args=[]):
440 # Prepare path
441 _target_path = os.path.join(
442 '/root',
Alex9a4ad212020-10-01 18:04:25 -0500443 self.env_config.salt_scripts_folder,
Alexe0c5b9e2019-04-23 18:51:23 -0500444 script_filename
445 )
446
447 # execute script
448 logger.debug("... running script on '{}'".format(node))
449 # handle results for each node
450 _script_arguments = " ".join(args) if args else ""
451 self.not_responded = []
452 _r = self.salt.cmd(
453 node,
454 'cmd.run',
455 param='python {} {}'.format(_target_path, _script_arguments),
456 expr_form="compound"
457 )
458
459 # all false returns means that there is no response
460 self.not_responded = [_n for _n in _r.keys() if not _r[_n]]
461 return _r
462
Alex1f90e7b2021-09-03 15:31:28 -0500463 def execute_script_on_active_nodes(self, script_filename, args=None):
Alexe0c5b9e2019-04-23 18:51:23 -0500464 # Prepare path
465 _target_path = os.path.join(
466 '/root',
Alex9a4ad212020-10-01 18:04:25 -0500467 self.env_config.salt_scripts_folder,
Alexe0c5b9e2019-04-23 18:51:23 -0500468 script_filename
469 )
470
471 # execute script
Alexd0391d42019-05-21 18:48:55 -0500472 logger_cli.debug("... running script")
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600473 # handle results for each node
Alex1f90e7b2021-09-03 15:31:28 -0500474 _script_arguments = args if args else ""
Alex Savatieievefa79c42019-03-14 19:14:04 -0500475 self.not_responded = []
476 _r = self.salt.cmd(
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600477 self.active_nodes_compound,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600478 'cmd.run',
479 param='python {} {}'.format(_target_path, _script_arguments),
480 expr_form="compound"
481 )
482
Alex Savatieievefa79c42019-03-14 19:14:04 -0500483 # all false returns means that there is no response
Alex3ebc5632019-04-18 16:47:18 -0500484 self.not_responded = [_n for _n in _r.keys() if not _r[_n]]
Alex Savatieievefa79c42019-03-14 19:14:04 -0500485 return _r
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600486
Alex1839bbf2019-08-22 17:17:21 -0500487 def execute_cmd_on_active_nodes(self, cmd, nodes=None):
Alex836fac82019-08-22 13:36:16 -0500488 # execute cmd
489 self.not_responded = []
490 _r = self.salt.cmd(
Alex1839bbf2019-08-22 17:17:21 -0500491 nodes if nodes else self.active_nodes_compound,
Alex836fac82019-08-22 13:36:16 -0500492 'cmd.run',
493 param=cmd,
494 expr_form="compound"
495 )
496
497 # all false returns means that there is no response
498 self.not_responded = [_n for _n in _r.keys() if not _r[_n]]
499 return _r
500
Alex9a4ad212020-10-01 18:04:25 -0500501
502class KubeNodes(Nodes):
503 def __init__(self, config):
504 super(KubeNodes, self).__init__(config)
505 logger_cli.info("# Gathering environment information")
506 # simple salt rest client
507 self.kube = get_kube_remote(self.env_config)
508 self.env_type = ENV_TYPE_KUBE
Alex1f90e7b2021-09-03 15:31:28 -0500509 self._namespace = "qa-space"
510 self._configmap_name = self.env_config.kube_scripts_folder
511
512 # prepare needed resources
Alexb78191f2021-11-02 16:35:46 -0500513 self.prepared_daemonsets = []
Alex0bcf31b2022-03-29 17:38:58 -0500514 # Check if we need resources prepared
515 if not config.prepare_qa_resources:
516 logger_cli.debug("... skipped preparing resources")
517 self._scripts = None
518 return
519 else:
520 self._check_namespace()
521 self._scripts = self._check_config_map()
Alex1f90e7b2021-09-03 15:31:28 -0500522
523 def _check_namespace(self):
524 # ensure namespace
525 logger_cli.debug(
526 "... checking namespace '{}'".format(self._namespace)
527 )
528 if not self.kube.ensure_namespace(self._namespace):
529 raise KubeException(
530 "Failed to manage namespace '{}'".format(self._namespace)
531 )
532
533 def _check_config_map(self):
534 # ensure config map exists
535 logger_cli.debug(
536 "... checking config map '{}'".format(self._configmap_name)
537 )
538 _source = os.path.join(pkg_dir, 'scripts')
539 return self.kube.create_config_map(
540 self._namespace,
541 self._configmap_name,
542 _source
543 )
Alex9a4ad212020-10-01 18:04:25 -0500544
Ievgeniia Zadorozhna33d5c9a2025-09-04 17:36:55 +0200545 def get_k0rdent_release(self):
546 logger_cli.debug("... get k0rdent release")
547 self.k0rdent_release = self.kube.get_k0rdent_release()
548 return self.k0rdent_release
549
Alex9a4ad212020-10-01 18:04:25 -0500550 def gather_node_info(self, skip_list, skip_list_file):
551 # Gather nodes info and query pod lists for each node
552 logger_cli.debug("... collecting node names existing in the cloud")
553
554 # Gather node names and info
555 _nodes = self.kube.get_node_info()
556 _node_names = list(_nodes.keys())
557 # Skip nodes if needed
558 _skipped_nodes = \
559 _prepare_skipped_nodes(_node_names, skip_list, skip_list_file)
560
561 # Count how many nodes active
562 self._active = [n for n, v in _nodes.items()
563 if v['conditions']['ready']['status']]
564
565 # iterate through all accepted nodes and create a dict for it
566 self.nodes = {}
567 self.skip_list = []
Alex9a4ad212020-10-01 18:04:25 -0500568 for _name in _node_names:
569 if _name in _skipped_nodes:
570 _status = NODE_SKIP
571 self.skip_list.append(_name)
572 else:
573 _status = NODE_UP if _name in self._active else NODE_DOWN
574 if _status == NODE_DOWN:
575 self.skip_list.append(_name)
576 logger_cli.info(
577 "-> '{}' shows 'Ready' as 'False', "
578 "added to skip list".format(
579 _name
580 )
581 )
582 _roles = {}
583 _labels = {}
584 for _label, _value in _nodes[_name]['labels'].items():
585 if _label in all_kube_roles_map:
586 _roles[all_kube_roles_map[_label]] = _value
587 else:
588 _labels[_label] = _value
589
590 self.nodes[_name] = deepcopy(node_tmpl)
591 self.nodes[_name].pop("grains")
592 self.nodes[_name].pop("pillars")
593
594 # hostname
595 self.nodes[_name]['shortname'] = \
596 _nodes[_name]['addresses']['hostname']['address']
Alexe4de1142022-11-04 19:26:03 -0500597 # internal
Alex9a4ad212020-10-01 18:04:25 -0500598 self.nodes[_name]['internalip'] = \
599 _nodes[_name]['addresses']['internalip']['address']
Alexe4de1142022-11-04 19:26:03 -0500600 # alternate
601 if self.env_config.force_node_network is not None:
602 iIP = self.nodes[_name]['internalip']
603 # use last number
604 aIP = self.env_config.force_node_network + iIP.split('.')[-1]
605 self.nodes[_name]["altip"] = aIP
Alex9a4ad212020-10-01 18:04:25 -0500606 self.nodes[_name]['node_group'] = None
607 self.nodes[_name]['labels'] = _labels
608 self.nodes[_name]['roles'] = _roles
609 self.nodes[_name]['status'] = _status
610 # Backward compatibility
611 _info = _nodes[_name]['status']['node_info']
612 self.nodes[_name]['linux_image'] = _info['os_image']
613 self.nodes[_name]['linux_arch'] = _info['architecture']
614
615 _codename = "unknown"
Ievgeniia Zadorozhna463a0c22025-08-07 14:48:08 +0200616 _info_str = _info['os_image']
617 if _info_str.lower().startswith('ubuntu'):
618 _n, _v, _ = _info_str.split(maxsplit=2)
Alex9a4ad212020-10-01 18:04:25 -0500619 _v, _, _ = _v.rpartition('.') if '.' in _v else (_v, "", "")
620 if _v in ubuntu_versions:
621 _codename = ubuntu_versions[_v].split()[0].lower()
Ievgeniia Zadorozhna463a0c22025-08-07 14:48:08 +0200622 elif _info_str.lower().startswith('debian'):
623 parts = _info_str.split()
624 for part in parts:
625 if part.startswith("(") and part.endswith(")"):
626 _codename = part.strip("()").lower()
Alex9a4ad212020-10-01 18:04:25 -0500627 self.nodes[_name]['linux_codename'] = _codename
628
629 # Consider per-data type transfer
630 self.nodes[_name]["raw"] = _nodes[_name]
631 # TODO: Investigate how to handle domains in Kube, probably - skip
632 # _domains = list(_domains)
633 # if len(_domains) > 1:
634 # logger_cli.warning(
635 # "Multiple domains detected: {}".format(",".join(_domains))
636 # )
637 # else:
Alex1f90e7b2021-09-03 15:31:28 -0500638 self.domain = "no.domain.in.kube.yet"
Alex9a4ad212020-10-01 18:04:25 -0500639 logger_cli.info(
640 "-> {} nodes collected: {} - active, {} - not active".format(
641 len(self.nodes),
642 len(self._active),
643 len(self.skip_list)
644 )
645 )
646
Ievgeniia Zadorozhna463a0c22025-08-07 14:48:08 +0200647 _role = "control-plane"
648 _filtered = [n for n, v in self.nodes.items() if 'control-plane' in str(v['labels'])]
Alex9a4ad212020-10-01 18:04:25 -0500649 if len(_filtered) < 1:
650 raise KubeException(
Ievgeniia Zadorozhna463a0c22025-08-07 14:48:08 +0200651 "No {} nodes detected! Check/Update node role map.".format(_role)
Alex9a4ad212020-10-01 18:04:25 -0500652 )
Alex Savatieievefa79c42019-03-14 19:14:04 -0500653 else:
Alex9a4ad212020-10-01 18:04:25 -0500654 _r = [n for n, v in self.nodes.items()
655 if v['status'] != NODE_UP and _role in v['roles']]
656 if len(_r) > 0:
657 logger_cli.warn(
658 "Master nodes are reporting 'NotReady:\n{}".format(
659 "\n".join(_r)
660 )
661 )
662 self.kube.master_node = _filtered[0]
Alexe0c5b9e2019-04-23 18:51:23 -0500663
Alex9a4ad212020-10-01 18:04:25 -0500664 # get specific data upfront
665 # OpenStack versions
666 self.mcp_release = ""
667 # Quick and Dirty way to detect OS release
Alexccb72e02021-01-20 16:38:03 -0600668 try:
669 _nova_version = self.kube.exec_on_target_pod(
670 "nova-manage --version",
671 "nova-api-osapi",
672 "openstack"
673 )
674 _nmajor = _nova_version.partition('.')[0]
675 self.openstack_release = nova_openstack_versions[_nmajor]
676 except KubeException as e:
677 logger_cli.warn("Openstack not detected: {}".format(e.message))
678 self.openstack_release = nova_openstack_versions["00"]
Alexe0c5b9e2019-04-23 18:51:23 -0500679
Alex9a4ad212020-10-01 18:04:25 -0500680 return
681
682 @staticmethod
Alex1f90e7b2021-09-03 15:31:28 -0500683 def _get_ssh_shell(_h, _u, _k, _p, _q, _pipe, timeout=15):
Alex9a4ad212020-10-01 18:04:25 -0500684 _ssh = SshShell(
685 _h,
686 user=_u,
687 keypath=_k,
688 port=_p,
689 silent=_q,
Alex1f90e7b2021-09-03 15:31:28 -0500690 piped=_pipe,
691 timeout=timeout
Alex9a4ad212020-10-01 18:04:25 -0500692 )
693 return _ssh.connect()
694
695 @staticmethod
Alex1f90e7b2021-09-03 15:31:28 -0500696 def _do_ssh_cmd(_cmd, _h, _u, _k, _p, _q, _pipe, timeout=None):
Alex9a4ad212020-10-01 18:04:25 -0500697 with SshShell(
698 _h,
699 user=_u,
700 keypath=_k,
701 port=_p,
702 silent=_q,
703 piped=_pipe
704 ) as ssh:
Alex1f90e7b2021-09-03 15:31:28 -0500705 if timeout is None:
706 _r = ssh.do(_cmd)
707 else:
708 _r = ssh.do(_cmd, timeout=timeout)
Alex9a4ad212020-10-01 18:04:25 -0500709 logger_cli.debug("'{}'".format(_r))
710 return _r
711
712 def node_shell(
713 self,
714 node,
715 silent=True,
716 piped=True,
717 use_sudo=True,
718 fport=None
719 ):
720 _u = self.env_config.kube_node_user
721 _k = self.env_config.kube_node_keypath
Alexe4de1142022-11-04 19:26:03 -0500722
723 _n = self.nodes[node]
724 _h = _n['altip'] if "altip" in _n else _n['internalip']
Alex9a4ad212020-10-01 18:04:25 -0500725 _p = 22
Alexeffa0682021-06-04 12:18:33 -0500726 if self.kube.is_local or self.kube.config.ssh_direct:
Alexf6ec91b2021-09-10 10:11:17 -0500727 logger.debug("Getting shell with no port forward")
728 return [None, self._get_ssh_shell(
Alex1f90e7b2021-09-03 15:31:28 -0500729 _h, _u, _k, _p, silent, piped,
730 timeout=self.kube.config.ssh_connect_timeout
Alexf6ec91b2021-09-10 10:11:17 -0500731 )]
Alex9a4ad212020-10-01 18:04:25 -0500732 else:
Alexf6ec91b2021-09-10 10:11:17 -0500733 logger.debug("Getting shell with with forward")
Alex9a4ad212020-10-01 18:04:25 -0500734 _fh = "localhost"
735 _p = 10022 if not fport else fport
736 _pfwd = PortForward(
737 self.env_config.ssh_host,
738 _h,
739 user=_u,
740 keypath=self.env_config.ssh_key,
Alex1f90e7b2021-09-03 15:31:28 -0500741 loc_port=_p,
742 timeout=self.kube.config.ssh_connect_timeout
Alex9a4ad212020-10-01 18:04:25 -0500743 )
744 _pfwd.connect()
Alex1f90e7b2021-09-03 15:31:28 -0500745 _ssh = self._get_ssh_shell(
746 _fh,
747 _u,
748 _k,
749 _p,
750 silent,
751 piped,
752 timeout=self.kube.config.ssh_connect_timeout
753 )
Alexf6ec91b2021-09-10 10:11:17 -0500754 return [_pfwd, _ssh]
Alex9a4ad212020-10-01 18:04:25 -0500755
756 def execute_script_on_node(self, node, script_filename, args=[]):
757 # Prepare path
758 _target_path = os.path.join(
Alexccb72e02021-01-20 16:38:03 -0600759 self.env_config.kube_node_homepath,
Alex9a4ad212020-10-01 18:04:25 -0500760 self.env_config.kube_scripts_folder,
761 script_filename
762 )
763
764 # execute script
765 logger_cli.debug("... running script on '{}'".format(node))
766 # handle results for each node
767 _script_arguments = " ".join(args) if args else ""
768 self.not_responded = []
769 # get result
770 _nr = self.node_shell(
771 node,
772 "python {} {}".format(
773 _target_path,
774 _script_arguments
775 )
776 )
777
778 if not _nr:
779 self.not_responded.append(node)
780 return {}
781 else:
782 return {node: _nr}
783
784 def execute_cmd_on_active_nodes(self, cmd, nodes=None):
785 # execute script
786 logger_cli.debug("...running '{}' on active nodes".format(cmd))
787 # handle results for each node
788 self.not_responded = []
789 _r = {}
790 # TODO: Use threading and pool
791 for node in self._active:
792 _nr = self.node_shell(
793 node,
794 cmd
795 )
796
797 if not _nr:
798 self.not_responded.append(node)
799 else:
800 _r[node] = _nr
801
802 return _r
803
Alex1f90e7b2021-09-03 15:31:28 -0500804 def _ssh_exec_script(self, params):
Alex9a4ad212020-10-01 18:04:25 -0500805 """
806 Threadsafe method to get shell to node,
807 check/copy script and get results
808 [
809 node_name,
810 src_path,
811 tgt_path,
812 conf,
813 args
814 ]
815 """
Alex1f90e7b2021-09-03 15:31:28 -0500816 _timeout = self.kube.config.script_execution_timeout
Alex9a4ad212020-10-01 18:04:25 -0500817 _name = params[0]
818 _src = params[1]
819 _tgt = params[2]
820 _conf = params[3]
821 _args = params[4]
822 _port = params[5]
823 _log_name = "["+_name+"]:"
824 _check = "echo $(if [[ -s '{}' ]]; then echo True; " \
825 "else echo False; fi)"
826 _fwd_sh, _sh = self.node_shell(
827 _name,
828 use_sudo=False,
829 fport=_port
830 )
831 # check python3
832 _python = _sh.do("which python3")
833 _python = utils.to_bool(
834 _sh.do(_check.format(_python))
835 )
836 if not _python:
Alex1f90e7b2021-09-03 15:31:28 -0500837 _sh.do("apt install python3", sudo=True, timeout=_timeout)
Alex9a4ad212020-10-01 18:04:25 -0500838 # check if script already there
839 _folder = os.path.join(
Alexccb72e02021-01-20 16:38:03 -0600840 self.env_config.kube_node_homepath,
Alex9a4ad212020-10-01 18:04:25 -0500841 _conf.kube_scripts_folder
842 )
843 # check if folder exists
844 _folder_exists = utils.to_bool(
845 _sh.do(_check.format(_folder))
846 )
847 if not _folder_exists:
848 _sh.do("mkdir " + _folder)
849 logger.info("{} Syncing file".format(_log_name))
850 _code, _r, _e = _sh.scp(
851 _src,
852 _sh.get_host_path(_tgt),
853 )
854 # handle error code
855 if _code:
856 logger_cli.warn(
857 "{} Error in scp:\n"
858 "\tstdout:'{}'\n"
859 "\tstderr:'{}'".format(_log_name, _r, _e)
860 )
861
862 # execute script
863 logger.debug("{} Running script".format(_log_name))
864 _out = _sh.do(
865 "python3 {}{}".format(
866 _tgt,
867 _args
868 ),
Alex1f90e7b2021-09-03 15:31:28 -0500869 sudo=True,
870 timeout=_timeout
Alex9a4ad212020-10-01 18:04:25 -0500871 )
872
873 if _fwd_sh:
874 _fwd_sh.kill()
875 _sh.kill()
876
877 return [_name, _out]
878
Alex1f90e7b2021-09-03 15:31:28 -0500879 def execute_script_on_active_nodes(self, script_filename, args=None):
Alex9a4ad212020-10-01 18:04:25 -0500880 # Prepare script
881 _source_path = os.path.join(pkg_dir, 'scripts', script_filename)
882 _target_path = os.path.join(
Alexccb72e02021-01-20 16:38:03 -0600883 self.env_config.kube_node_homepath,
Alex9a4ad212020-10-01 18:04:25 -0500884 self.env_config.kube_scripts_folder,
885 script_filename
886 )
887 # handle results for each node
888 _script_arguments = " ".join(args) if args else ""
889 if _script_arguments:
890 _script_arguments = " " + _script_arguments
891 self.not_responded = []
892 _results = {}
893 logger_cli.debug(
Alexc4f59622021-08-27 13:42:00 -0500894 "... running '{}' on active nodes, {} worker threads".format(
Alex9a4ad212020-10-01 18:04:25 -0500895 script_filename,
896 self.env_config.threads
897 )
898 )
899 # Workers pool
900 pool = Pool(self.env_config.threads)
901
902 # init the parameters
903 # node_name,
904 # src_path,
905 # tgt_path,
906 # conf,
907 # args
908 _params = []
909 _port = 10022
910 for node in self._active:
911 # build parameter blocks
912 _p_list = [
913 node,
914 _source_path,
915 _target_path,
916 self.env_config,
917 _script_arguments,
918 _port
919 ]
920 _params.append(_p_list)
921 _port += 1
922
923 _progress = Progress(len(_params))
Alex1f90e7b2021-09-03 15:31:28 -0500924 results = pool.imap_unordered(self._ssh_exec_script, _params)
Alex9a4ad212020-10-01 18:04:25 -0500925
926 for ii in enumerate(results, start=1):
927 if not ii[1][1]:
928 self.not_responded.append(ii[1][0])
929 else:
930 _results[ii[1][0]] = ii[1][1]
931 _progress.write_progress(ii[0])
932
933 _progress.end()
934 pool.close()
935 pool.join()
936
937 # return path on nodes, just in case
938 return _results
939
940 def prepare_json_on_node(self, node, _dict, filename):
941 # this function assumes that all folders are created
942 _dumps = json.dumps(_dict, indent=2).splitlines()
943
944 _source_path = create_temp_file_with_content(_dumps)
945 _target_path = os.path.join(
Alexccb72e02021-01-20 16:38:03 -0600946 self.env_config.kube_node_homepath,
Alex9a4ad212020-10-01 18:04:25 -0500947 self.env_config.kube_scripts_folder,
948 filename
949 )
950 _folder = os.path.join(
Alexccb72e02021-01-20 16:38:03 -0600951 self.env_config.kube_node_homepath,
Alex9a4ad212020-10-01 18:04:25 -0500952 self.env_config.kube_scripts_folder
953 )
954 _check = "echo $(if [[ -s '{}' ]]; then echo True; " \
955 "else echo False; fi)"
956 _fwd_sh, _sh = self.node_shell(
957 node,
958 use_sudo=False
959 )
960
961 # check if folder exists
962 _folder_exists = utils.to_bool(
963 _sh.do(_check.format(_folder))
964 )
965 if not _folder_exists:
966 _sh.do("mkdir " + _folder)
967 logger_cli.debug(
Alexc4f59622021-08-27 13:42:00 -0500968 "... create data on node '{}':'{}'".format(node, _target_path)
Alex9a4ad212020-10-01 18:04:25 -0500969 )
970 _code, _r, _e = _sh.scp(
971 _source_path,
972 _sh.get_host_path(_target_path),
973 )
974 # handle error code
975 if _code:
976 logger_cli.warn(
977 "Error in scp:\n"
978 "\tstdout:'{}'\n"
979 "\tstderr:'{}'".format(_r, _e)
980 )
981
982 _fwd_sh.kill()
983 _sh.kill()
984 return _target_path
Alex1f90e7b2021-09-03 15:31:28 -0500985
Alex7b0ee9a2021-09-21 17:16:17 -0500986 def prepare_daemonset(self, template_filename):
Alex1f90e7b2021-09-03 15:31:28 -0500987 # load template
988 _yaml_file = os.path.join(pkg_dir, 'templates', template_filename)
989 logger_cli.debug("... loading template '{}'".format(_yaml_file))
990 _ds = {}
991 with open(_yaml_file) as dsFile:
992 _ds = yaml.load(dsFile, Loader=yaml.SafeLoader)
993
994 # Add scripts to pod template as volumeMounts
995 _tspec = _ds['spec']['template']['spec']
996 _tspec['containers'][0]['volumeMounts'] = [
997 {
998 "name": "scripts",
999 "mountPath": os.path.join(
1000 "/",
1001 self.env_config.kube_scripts_folder
1002 )
1003 }
1004 ]
1005
1006 _tspec['volumes'] = [
1007 {
1008 "name": "scripts",
1009 "configMap": {
1010 "name": self._configmap_name
1011 }
1012 }
1013 ]
1014
1015 # create daemonset
1016 logger_cli.debug("... preparing daemonset")
Alexb78191f2021-11-02 16:35:46 -05001017 _ds = self.kube.prepare_daemonset_from_yaml(self._namespace, _ds)
1018 # Save prepared daemonset
1019 self.prepared_daemonsets.append(_ds)
1020 # return it
1021 return _ds
Alex1f90e7b2021-09-03 15:31:28 -05001022
1023 def wait_for_daemonset(self, ds, timeout=120):
1024 # iteration timeout
1025 _sleep_time = 5
1026 _timeout = timeout
1027
1028 # query daemonset and check that desired=scheduled=ready
1029 _ds = self.kube.get_daemon_set_by_name(
1030 ds.metadata.namespace,
1031 ds.metadata.name
1032 )
1033
Alex0bcf31b2022-03-29 17:38:58 -05001034 _total = len(self.nodes) - len(self.skip_list)
Alex1f90e7b2021-09-03 15:31:28 -05001035 # _scheduled = _ds.status.scheduled
1036 # _ready = _ds.status.ready
1037
1038 # Init Progress bar to show daemonset readiness
1039 _progress = Progress(_total)
1040 while _timeout > 0:
1041 # get new status
1042 _ds = self.kube.get_daemon_set_by_name(
1043 ds.metadata.namespace,
1044 ds.metadata.name
1045 )
1046 _desired = _ds.status.desired_number_scheduled
1047 _scheduled = _ds.status.current_number_scheduled
1048 _ready = _ds.status.number_ready
1049 _updated = _ds.status.updated_number_scheduled
1050 # print it
1051 _progress.write_progress(
1052 _ready,
1053 note="desired: {}, scheduled: {}, ready: {},"
1054 " up-to-date: {}".format(
1055 _desired,
1056 _scheduled,
1057 _ready,
1058 _updated
1059 )
1060 )
1061
1062 # check values and return
1063 # In case of Update, also checking _updated value
Alex163aa042022-12-01 11:58:32 -06001064 if _ready == _updated and _ready == _total - len(self.skip_list):
Alex1f90e7b2021-09-03 15:31:28 -05001065 # close progress bar class
1066 _progress.end()
1067 logger_cli.debug("... daemonset is ready")
1068 return True
1069 # iterate
1070 _timeout -= _sleep_time
1071 # wait
1072 sleep(_sleep_time)
1073
1074 # timed out
1075 _progress.end()
1076 # log it
1077 logger_cli.error("Timed out waiting for Daemonset to be ready")
1078 return False
1079
Alexdcb792f2021-10-04 14:24:21 -05001080 def exec_script_on_target_pod(self, pod_name, script_filename, args=None):
Alex7b0ee9a2021-09-21 17:16:17 -05001081 """
1082 Run script from configmap on target pod assuming it is present
1083 """
1084 _arguments = args if args else ""
1085 _cmd = [
1086 "python3",
1087 os.path.join(
1088 "/",
1089 self.env_config.kube_scripts_folder,
1090 script_filename
1091 )
1092 ] + _arguments
1093 _result = self.kube.exec_on_target_pod(
1094 _cmd,
1095 pod_name,
1096 self._namespace,
1097 strict=True
1098 )
1099 return _result
1100
Alexdcb792f2021-10-04 14:24:21 -05001101 def exec_cmd_on_target_pod(self, pod_name, ns, command_str):
1102 """
Alex0bcf31b2022-03-29 17:38:58 -05001103 Run cmd on target pod
1104
Alexdcb792f2021-10-04 14:24:21 -05001105 """
1106 _result = self.kube.exec_on_target_pod(
1107 command_str,
1108 pod_name,
1109 ns,
1110 strict=True
1111 )
1112 return _result
1113
Alexb78191f2021-11-02 16:35:46 -05001114 def execute_cmd_on_daemon_set(
1115 self,
1116 ds,
1117 cmd,
Alexb2129542021-11-23 15:49:42 -06001118 _args=None,
Alexb78191f2021-11-02 16:35:46 -05001119 is_script=False
1120 ):
Alex1f90e7b2021-09-03 15:31:28 -05001121 """
1122 Query daemonset for pods and execute script on all of them
1123 """
Alexb2129542021-11-23 15:49:42 -06001124 _results = self.exec_cmd_on_pods(
1125 self.kube.get_pods_for_daemonset(ds),
1126 cmd,
1127 _args=_args,
1128 is_script=is_script
1129 )
1130 # Update results
1131 _ds_results = {}
Alex0bcf31b2022-03-29 17:38:58 -05001132 # only node name and result is needed
1133 # pod name and cmd ignored
1134 for _n, _, _v, _ in _results:
Alexb2129542021-11-23 15:49:42 -06001135 _ds_results[_n] = _v
1136 return _ds_results
1137
Alexe4de1142022-11-04 19:26:03 -05001138 def exec_on_labeled_pods_and_ns(
1139 self,
1140 label_str,
1141 cmd,
1142 _args=None,
1143 ns=None,
1144 silent=False
1145 ):
Alexb2129542021-11-23 15:49:42 -06001146 if not ns:
1147 ns = self._namespace
1148 _results = self.exec_cmd_on_pods(
1149 self.kube.list_pods(ns, label_str=label_str),
1150 cmd,
Alexe4de1142022-11-04 19:26:03 -05001151 _args=_args,
1152 silent=silent
Alexb2129542021-11-23 15:49:42 -06001153 )
1154 _pod_results = {}
1155 for _, _p, _v in _results:
1156 _pod_results[_p] = _v
1157 return _pod_results
1158
Alex0bcf31b2022-03-29 17:38:58 -05001159 def _pooled_exec_on_pod(self, plist, silent=False):
Alex1f90e7b2021-09-03 15:31:28 -05001160 def _kube_exec_on_pod(plist):
1161 return [
1162 plist[1], # node
1163 plist[3], # pod name
1164 plist[0].kube.exec_on_target_pod( # pointer to function
1165 plist[4], # cmd
1166 plist[3], # pod name
1167 plist[2], # namespace
1168 strict=True,
1169 _request_timeout=120,
Alexb78191f2021-11-02 16:35:46 -05001170 arguments=plist[5]
Alex0bcf31b2022-03-29 17:38:58 -05001171 ),
1172 # save cmd used
1173 plist[4]
Alex1f90e7b2021-09-03 15:31:28 -05001174 ]
Alex0bcf31b2022-03-29 17:38:58 -05001175 # map func and cmd
1176 pool = Pool(self.env_config.threads)
1177 _results = []
1178 self.not_responded = []
1179 # create result list
1180 if not silent:
1181 _progress = Progress(len(plist))
1182 ret = pool.imap_unordered(_kube_exec_on_pod, plist)
1183
1184 for ii in enumerate(ret, start=1):
1185 if not ii[1][1]:
1186 self.not_responded.append(ii[1][0])
1187 else:
1188 _results.append(ii[1])
1189 if not silent:
1190 _progress.write_progress(ii[0])
1191
1192 if not silent:
1193 _progress.end()
1194 pool.close()
1195 pool.join()
1196 logger_cli.debug(
1197 "... done, {} total outputs; {} not responded".format(
1198 len(_results),
1199 len(self.not_responded)
1200 )
1201 )
1202 return _results
1203
1204 def exec_cmd_on_pods(
1205 self,
1206 pod_list,
1207 cmd,
1208 _args=None,
1209 is_script=False,
1210 silent=False
1211 ):
Alex1f90e7b2021-09-03 15:31:28 -05001212
Alex1f90e7b2021-09-03 15:31:28 -05001213 # Create map for threads: [[node_name, ns, pod_name, cmd]...]
1214 logger_cli.debug(
1215 "... runnning script on {} pods using {} threads at a time".format(
Alexb2129542021-11-23 15:49:42 -06001216 len(pod_list.items),
Alex1f90e7b2021-09-03 15:31:28 -05001217 self.env_config.threads
1218 )
1219 )
1220 _plist = []
Alexb2129542021-11-23 15:49:42 -06001221 _arguments = _args if _args else ""
Alexb78191f2021-11-02 16:35:46 -05001222 if is_script:
1223 _cmd = [
1224 "python3",
1225 os.path.join(
1226 "/",
1227 self.env_config.kube_scripts_folder,
1228 cmd
1229 ),
1230 _arguments
1231 ]
1232 _cmd = " ".join(_cmd)
1233 else:
1234 # decide if we are to wrap it to bash
1235 if '|' in cmd:
1236 _cmd = "bash -c"
1237 _arguments = cmd
1238 else:
1239 _cmd = cmd
Alexb2129542021-11-23 15:49:42 -06001240 for item in pod_list.items:
Alex1f90e7b2021-09-03 15:31:28 -05001241 _plist.append(
1242 [
1243 self,
1244 item.spec.node_name,
1245 item.metadata.namespace,
1246 item.metadata.name,
Alexb78191f2021-11-02 16:35:46 -05001247 _cmd,
1248 _arguments
Alex1f90e7b2021-09-03 15:31:28 -05001249 ]
1250 )
1251
Alex0bcf31b2022-03-29 17:38:58 -05001252 return self._pooled_exec_on_pod(_plist, silent=silent)
Alex1f90e7b2021-09-03 15:31:28 -05001253
Alex0bcf31b2022-03-29 17:38:58 -05001254 def exec_cmds_on_pod(self, pod, cmd_list):
Alex1f90e7b2021-09-03 15:31:28 -05001255 logger_cli.debug(
Alex0bcf31b2022-03-29 17:38:58 -05001256 "... runnning {} cmds using {} threads at a time".format(
1257 len(cmd_list),
1258 self.env_config.threads
Alex1f90e7b2021-09-03 15:31:28 -05001259 )
1260 )
Alex0bcf31b2022-03-29 17:38:58 -05001261 _plist = []
1262 # decide if we are to wrap it to bash
1263 for item in cmd_list:
1264 if '|' in item:
1265 _cmd = "bash -c"
1266 _arguments = item
1267 else:
1268 _cmd = item
1269 _arguments = ""
1270 _plist.append(
1271 [
1272 self,
1273 pod.spec.node_name,
1274 pod.metadata.namespace,
1275 pod.metadata.name,
1276 _cmd,
1277 _arguments
1278 ]
1279 )
1280
1281 return self._pooled_exec_on_pod(_plist)
Alex1f90e7b2021-09-03 15:31:28 -05001282
1283 def delete_daemonset(self, ds):
1284 # Try to delete daemonset
1285 try:
1286 _r = self.kube.delete_daemon_set_by_name(
1287 ds.metadata.namespace,
1288 ds.metadata.name
1289 )
1290 except Exception as e:
1291 logger_cli.warning("Failed to delete daemonset '{}': {}".format(
1292 ds.metadata.name,
1293 e.reason
1294 ))
1295 _r = None
1296 return _r
Alex7b0ee9a2021-09-21 17:16:17 -05001297
1298 def get_pod_name_in_daemonset_by_node(self, nodename, daemonset):
1299 _podname = None
1300 _pods = self.kube.get_pods_for_daemonset(daemonset)
1301 for item in _pods.items:
1302 if item.spec.node_name == nodename:
1303 _podname = item.metadata.name
1304
1305 return _podname
1306
1307 def prepare_json_in_pod(self, podname, namespace, targets, filename):
1308 # Iterate pods in daemonset and prepare json file on each one
1309 _target_path = os.path.join(
1310 "/",
1311 "tmp",
1312 filename
1313 )
1314 # check folder will probably not needed as the daemonset links
1315 # configmap there on creation
1316 # _folder = os.path.join(
1317 # self.env_config.kube_node_homepath,
1318 # self.env_config.kube_scripts_folder
1319 # )
1320 # prepare data
1321 buffer = json.dumps(targets, indent=2).encode('utf-8')
1322
1323 # write data to pod using fancy websocket function
1324 self.kube.put_string_buffer_to_pod_as_textfile(
1325 podname,
1326 namespace,
1327 buffer,
1328 _target_path
1329 )
1330
1331 # TODO: Exception handling
1332
1333 return _target_path
Alexb78191f2021-11-02 16:35:46 -05001334
1335 def get_cmd_for_nodes(self, cmd, target_key, target_dict=None, nodes=None):
1336 """Function runs command on daemonset and parses result into place
1337 or into dict structure provided
1338
1339 :return: no return value, data pulished internally
1340 """
1341 logger_cli.debug(
1342 "... collecting results for '{}'".format(cmd)
1343 )
1344 if target_dict:
1345 _nodes = target_dict
1346 else:
1347 _nodes = self.nodes
1348 # Dirty way to get daemonset that was used in checker and not deleted
1349 _ds = self.prepared_daemonsets[0]
1350 _result = self.execute_cmd_on_daemon_set(_ds, cmd)
1351 for node, data in _nodes.items():
1352
1353 if node in self.skip_list:
1354 logger_cli.debug(
1355 "... '{}' skipped while collecting '{}'".format(
1356 node,
1357 cmd
1358 )
1359 )
1360 continue
1361 # Prepare target key
1362 if target_key not in data:
1363 data[target_key] = None
1364 # Save data
1365 if data['status'] in [NODE_DOWN, NODE_SKIP]:
1366 data[target_key] = None
1367 elif node not in _result:
1368 continue
1369 elif not _result[node]:
1370 logger_cli.debug(
1371 "... '{}' not responded after '{}'".format(
1372 node,
1373 self.env_config.salt_timeout
1374 )
1375 )
1376 data[target_key] = None
1377 else:
1378 data[target_key] = _result[node]
Alex5cace3b2021-11-10 16:40:37 -06001379
1380 def prepare_benchmark_agent(self, index, path, sc, size, template):
1381 # Load pod template
1382 _yaml_file = os.path.join(pkg_dir, 'templates', template)
1383 logger_cli.debug("... loading template '{}'".format(_yaml_file))
1384 _pod = {}
1385 with open(_yaml_file) as podFile:
1386 _pod = yaml.load(podFile, Loader=yaml.SafeLoader)
1387
1388 # set namings
1389 _n = "cfgagent-{:02}".format(index)
1390 _pvc_n = "cfgagent-pvc-{:02}".format(index)
Alex90ac1532021-12-09 11:13:14 -06001391 # _pv_n = "cfgagent-pv-{:02}".format(index)
Alex5cace3b2021-11-10 16:40:37 -06001392
1393 _pod["metadata"]["name"] = _n
1394 _pod["metadata"]["labels"]["name"] = _n
1395 # replace volumeMounts
1396 for _c in _pod["spec"]["containers"]:
1397 for _mnt in _c["volumeMounts"]:
1398 if "placeholder" in _mnt["name"]:
Alex90ac1532021-12-09 11:13:14 -06001399 # _mnt["name"] = _pv_n
Alex5cace3b2021-11-10 16:40:37 -06001400 _mnt["mountPath"] = path
1401 # replace claim
1402 for _v in _pod["spec"]["volumes"]:
Alex30380a42021-12-20 16:11:20 -06001403 if "cfgagent-pv" in _v["name"]:
Alex90ac1532021-12-09 11:13:14 -06001404 # _v["name"] = _pv_n
Alex5cace3b2021-11-10 16:40:37 -06001405 _v["persistentVolumeClaim"]["claimName"] = _pvc_n
1406
1407 # init volume resources
Alex90ac1532021-12-09 11:13:14 -06001408 # _pv_object = self.kube.init_pv_resource(_pv_n, sc, size, path)
1409 # _pv = self.kube.prepare_pv(_pv_object)
Alex30380a42021-12-20 16:11:20 -06001410 # update size of the volume to be 15% larger
Alex5cace3b2021-11-10 16:40:37 -06001411 _pvc_object = self.kube.init_pvc_resource(_pvc_n, sc, size)
1412 _pvc = self.kube.prepare_pvc(_pvc_object)
1413
1414 # start pod
1415 _pod = self.kube.prepare_pod_from_yaml(_pod)
1416
Alex90ac1532021-12-09 11:13:14 -06001417 # return _pod, _pv, _pvc
1418 return _pod, _pvc
Alex5cace3b2021-11-10 16:40:37 -06001419
1420 def expose_benchmark_agent(self, agent):
1421 return self.kube.expose_pod_port(agent, 8765)
Alex2a7657c2021-11-10 20:51:34 -06001422
1423 def cleanup_resource_by_name(self, res_type, name, ns=None, wait=False):
1424 """Cleansup resource using string res_type and the ns/name
1425
1426 Args:
1427 res_type (string): resource type name: pod, pv, pvc, svc
1428 name (string): resource name to cleanup
1429 ns (string, optional): Namespace to use. Default is 'qa-space'
1430
1431 return: (Bool) Is Success?
1432 """
1433 # fill defaults
1434 if not ns:
1435 ns = self._namespace
1436 # Handle res_type errors and choose resource type
1437 if not res_type:
1438 logger_cli.debug(
1439 "... resource type invalid: '{}'".format(res_type)
1440 )
1441 return False
1442 elif not name:
1443 logger_cli.debug("... resource name invalid: '{}'".format(name))
1444 return False
1445 elif res_type == "svc":
1446 # Delete service
1447 logger_cli.info("-> deleting svc {}/{}".format(ns, name))
1448 self.kube.CoreV1.delete_namespaced_service(name, ns)
1449 # TODO: Check if successfull
1450 elif res_type == "pod":
1451 # Delete a pod
1452 logger_cli.info("-> deleting pod {}/{}".format(ns, name))
1453 self.kube.CoreV1.delete_namespaced_pod(name, ns)
1454 if wait:
1455 self.kube.wait_for_phase(res_type, name, ns, ["Terminated"])
1456 elif res_type == "pvc":
1457 logger_cli.info("-> deleting pvc {}/{}".format(ns, name))
1458 self.kube.CoreV1.delete_namespaced_persistent_volume_claim(
1459 name,
1460 ns
1461 )
1462 if wait:
1463 self.kube.wait_for_phase(res_type, name, ns, ["Terminated"])
1464 elif res_type == "pv":
1465 logger_cli.info("-> deleting pv {}/{}".format(ns, name))
1466 self.kube.CoreV1.delete_persistent_volume(name)
1467 if wait:
1468 self.kube.wait_for_phase(res_type, name, None, ["Terminated"])
1469
1470 return True
Alexbfa947c2021-11-11 18:14:28 -06001471
1472 def get_resource_phase_by_name(self, typ, name, ns="qa-space"):
1473 if typ == "pod":
1474 _t = self.kube.get_pod_by_name_and_ns(name, ns)
1475 elif typ == "svc":
1476 _t = self.kube.get_svc_by_name_and_ns(name, ns)
1477 elif typ == "pvc":
1478 _t = self.kube.get_pvc_by_name_and_ns(name, ns)
1479 elif typ == "pv":
1480 _t = self.kube.get_pv_by_name(name)
1481 else:
1482 logger_cli.error("ERROR: '{}' is not supported yet".format(typ))
1483 return None
1484
1485 if _t:
1486 return _t.status.phase
1487 else:
1488 return None
Alexb2129542021-11-23 15:49:42 -06001489
1490 def list_resource_names_by_type_and_ns(self, typ, ns="qa-space"):
1491 if typ == "pod":
1492 _items = self.kube.list_pods(ns)
1493 elif typ == "svc":
1494 _items = self.kube.list_svc(ns)
1495 elif typ == "pvc":
1496 _items = self.kube.list_pvc(ns)
1497 elif typ == "pv":
1498 _items = self.kube.list_pv()
1499 else:
1500 logger_cli.error("ERROR: '{}' is not supported yet".format(typ))
1501 return None
1502 return [[i.metadata.namespace, i.metadata.name] for i in _items.items]
Alex0989ecf2022-03-29 13:43:21 -05001503
Alex0bcf31b2022-03-29 17:38:58 -05001504 def list_pod_names_with_containers(self, ns="qa-space", running_only=True):
1505 _result = []
1506 _pods = self.kube.list_pods(ns)
1507 if not running_only:
1508 for i in _pods.items:
1509 _result.append([
1510 i.metadata.namespace,
1511 i.metadata.name,
1512 [c.name for c in i.spec.containers]
1513 ])
1514 else:
1515 for i in _pods.items:
1516 if i.status.phase == "Running":
1517 _result.append([
1518 i.metadata.namespace,
1519 i.metadata.name,
1520 [c.name for c in i.status.container_statuses
1521 if c.state.running is not None]
1522 ])
1523 return _result
1524
1525 def get_logs_for_pod(self, podname, container, namespace, tail_lines):
1526 try:
1527 return self.kube.get_pod_logs(
1528 podname,
1529 container,
1530 namespace,
1531 tail_lines=tail_lines
1532 )
1533 except KubeException as e:
1534 logger_cli.warning(
1535 "WARNING: Log retrieval failed: '{}'".format(e.message)
1536 )
1537 return ""
1538
1539 def list_namespaces(self):
1540 return [i.metadata.name for i in self.kube.list_namespaces().items]